From 3aa9e13fb9c457f0331f52464fc41cff42c226fd Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Thu, 7 Mar 2024 18:07:24 -0500 Subject: [PATCH] Thu Mar 7 06:07:24 PM EST 2024 --- src/.kernel.tmp.1 | 72 ++++++++++++++++ src/.kernel.tmp.2 | Bin 6476 -> 8930 bytes src/QAnsel.c | 21 ++++- src/complex.c | 124 +++++++++++++++++++++++++++ src/kernel.cl | 75 ++++++++++++++++- src/kernel_cpu.cl | 74 +++++++++++++++- src/kernel_gpu.cl | 209 +++++++++++++++++++++++++++++++++++++++++++++- 7 files changed, 569 insertions(+), 6 deletions(-) diff --git a/src/.kernel.tmp.1 b/src/.kernel.tmp.1 index 42aa607..253271d 100644 --- a/src/.kernel.tmp.1 +++ b/src/.kernel.tmp.1 @@ -215,3 +215,75 @@ __kernel void kernel_knk_2x2_R } } } + + +__kernel void kernel_knk_2x2_Rx4 +( + __global float* ptrR, + __global float* ptrA, + const int rowsA, + const int colsA, + const float gate0, + const float gate1, + const float gate2, + const float gate3, + const float gate4, + const float gate5, + const float gate6, + const float gate7 +) +{ + const int rowsR = rowsA * 2; + const int colsR = colsA * 2; + const int block = get_global_id(0) * 8; //{gpu_only} + + for (int rowR = block; rowR < block + 8; rowR += 2) + { + for (int colR = 0; colR < colsR; colR += 2) + { + const int rowA = rowR / 2; + const int colA = colR / 2; + const int posA = rowA * (colsA * 2) + (colA * 2); + const float rA = ptrA[posA]; + const float iA = ptrA[posA + 1]; + + for (int i = 0; i < 4; i++) + { + float rB, iB; + switch (i) + { + case 0: rB = gate0; iB = gate1; break; + case 1: rB = gate2; iB = gate3; break; + case 2: rB = gate4; iB = gate5; break; + case 3: rB = gate6; iB = gate7; break; + } + + //(rA + iA)(rB + iB) + const float first = rA * rB; + const float outer = rA * iB; + const float inner = iA * rB; + const float lasts = iA * iB; + switch (i) + { + case 0: + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + break; + case 1: + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + break; + case 2: + ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + break; + case 3: + ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + break; + } + + } + } + } +} diff --git a/src/.kernel.tmp.2 b/src/.kernel.tmp.2 index 4b056f1de9f727c4ee15f595cbed426e525e8575..7745e0bf1596a4b6c868e163e11503f01d49229c 100644 GIT binary patch delta 466 zcmX?O^vHFCw;t%8vzmjVz}gUQJW zLMD@!aSKnDWaF8!m^Vyg*7HG6cwA?$SpQ` zqp&=fy+Mp+a)1cy8OZ(w*$UuQhpsIP2sKqQ;YDB-Fu%@8uVUTRc|` zq%InWO(y@vkY}IVATA1&6P$cYLI%k!0VtmxtC{fG1-JvrGor T5A>KwEjTPe&WEsSxoQ~zznzRq delta 9 QcmaFldd6sjw