From: miha-q <> Date: Mon, 4 Mar 2024 05:33:25 +0000 (-0500) Subject: Mon Mar 4 12:33:25 AM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=fedf9b311a8bc580f72e8bbdae716a98448f63b2;p=QAnsel.git Mon Mar 4 12:33:25 AM EST 2024 --- diff --git a/src/gpu/gpu_knk.cl b/src/gpu/gpu_knk.cl index 5d17ac1..79f5795 100644 --- a/src/gpu/gpu_knk.cl +++ b/src/gpu/gpu_knk.cl @@ -11,19 +11,29 @@ __kernel void gpu_knk const int colsB ) { - const int rowR = get_global_id(0); - const int colR = get_global_id(1); + int rowR = get_global_id(0); + int colR = get_global_id(1); - const int rowA = rowR / rowsB; - const int colA = colR / colsB; - const int rowB = rowR % rowsB; - const int colB = colR % colsB; + int rowA = rowR / rowsB; + int colA = colR / colsB; + int rowB = rowR % rowsB; + int colB = colR % colsB; - const float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; - const float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; - const float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; - const float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; + float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; + float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; + float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; + float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; - ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = (r1 * r2) + (-(i1 * i2)); + float first = r1 * r2; //real + float outer = r1 * i2; //imaginary + float inner = i1 * r2; //imaginary + float last = -(i1 * i2); //real + r1 = first + last; + i1 = outer + inner; + + ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; + ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; + ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; + ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; } diff --git a/src/gpu/gpu_mmul.cl b/src/gpu/gpu_mmul.cl index a38fd93..da23464 100644 --- a/src/gpu/gpu_mmul.cl +++ b/src/gpu/gpu_mmul.cl @@ -26,4 +26,44 @@ __kernel void gpu_mmul sum += ptrA[posA] * ptrB[posB]; } ptrR[rowR * colsR + colR] = sum; -} \ No newline at end of file +} + +__kernel void gpu_knk +( + __global float* ptrR, + const int rowsR, + const int colsR, + __global float* ptrA, + const int rowsA, + const int colsA, + __global float* ptrB, + const int rowsB, + const int colsB +) +{ + int rowR = get_global_id(0); + int colR = get_global_id(1); + + int rowA = rowR / rowsB; + int colA = colR / colsB; + int rowB = rowR % rowsB; + int colB = colR % colsB; + + float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; + float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; + float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; + float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; + + float first = r1 * r2; //real + float outer = r1 * i2; //imaginary + float inner = i1 * r2; //imaginary + float last = -(i1 * i2); //real + r1 = first + last; + i1 = outer + inner; + + ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; + ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; + ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; + ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; +} +