From ee436f051e11d5a18c28c8a44fc27f378e52bb68 Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Mon, 4 Mar 2024 12:01:36 -0500 Subject: [PATCH] Mon Mar 4 12:01:36 PM EST 2024 --- src/complex.c | 13 +++++-------- src/kernel.cl | 5 +++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/complex.c b/src/complex.c index b8e4197..3a111e7 100644 --- a/src/complex.c +++ b/src/complex.c @@ -151,11 +151,10 @@ void cpx_mtx_dot(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t col void cpx_mtx_knk(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB) { int rowsR = rowsA * rowsB; - int colsR = colsA * colsB; for (int i = 0; i < rowsR; i++) { GPU_GLOBAL_ID_0 = i; - kernel_knk(ptrR, ptrA, ptrB, rowsR, colsR, rowsA, colsA, rowsB, colsB); + kernel_knk(ptrR, ptrA, ptrB, rowsA, colsA, rowsB, colsB); } } @@ -375,12 +374,10 @@ void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 3, sizeof(int), &rowsR); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 4, sizeof(int), &colsR); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 5, sizeof(int), &rowsA); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 6, sizeof(int), &colsA); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 7, sizeof(int), &rowsB); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 8, sizeof(int), &colsB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg); //Run the program err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR}, NULL, 0, NULL, NULL); diff --git a/src/kernel.cl b/src/kernel.cl index df3af2e..542ec74 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -48,14 +48,15 @@ __kernel void kernel_knk __global float* ptrR, __global float* ptrA, __global float* ptrB, - const int rowsR, - const int colsR, const int rowsA, const int colsA, const int rowsB, const int colsB ) { + + const int rowsR = rowsA * rowsB; + const int colsR = colsA * colsB; int rowR = get_global_id(0); for (int colR = 0; colR < colsR; colR++) { -- 2.39.5