From daa057f650d3bfadc45a602150e4684b826de10a Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Mon, 4 Mar 2024 00:49:25 -0500 Subject: [PATCH] Mon Mar 4 12:49:25 AM EST 2024 --- src/gpu/gpu.c | 4 ++-- src/gpu/kernel.cl | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c index 1e01b0d..cf8a318 100644 --- a/src/gpu/gpu.c +++ b/src/gpu/gpu.c @@ -209,8 +209,8 @@ void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int cols err = clSetKernelArg(kernel, 8, sizeof(int), &colsB); gpuerr(clSetKernelArg); //Run the program - size_t work_size[] = {rowsR, colsR, 4}; - err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 3, NULL, work_size, NULL, 0, NULL, NULL); + size_t work_size[] = {rowsR, colsR}; + err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); gpuerr(clEnqueueNDRangeKernel); //Wait for completion diff --git a/src/gpu/kernel.cl b/src/gpu/kernel.cl index 3b465d1..da23464 100644 --- a/src/gpu/kernel.cl +++ b/src/gpu/kernel.cl @@ -43,7 +43,6 @@ __kernel void gpu_knk { int rowR = get_global_id(0); int colR = get_global_id(1); - int oper = get_global_id(2); int rowA = rowR / rowsB; int colA = colR / colsB; @@ -62,12 +61,9 @@ __kernel void gpu_knk r1 = first + last; i1 = outer + inner; - switch (oper) - { - case 0: ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; break; - case 1: ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; break; - case 2: ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; break; - case 3: ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; break; - } + ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; + ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; + ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; + ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; } -- 2.39.5