From d06fd9e2dc741fb7270941a5b85644e1b59e920d Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Mon, 4 Mar 2024 12:12:35 -0500 Subject: [PATCH] Mon Mar 4 12:12:35 PM EST 2024 --- src/complex.c | 9 ++++----- src/kernel.cl | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/complex.c b/src/complex.c index f616e8d..d6e5610 100644 --- a/src/complex.c +++ b/src/complex.c @@ -236,10 +236,8 @@ void cpx_mtx_clean() free(cpx_mtx_cache); } -void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared) +void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB) { - int colsA = shared; - int rowsB = shared; int rowsR = rowsA; int colsR = colsB; @@ -297,8 +295,9 @@ void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg); err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); gpuerr(clSetKernelArg); - err = clSetKernelArg(kernel, 5, sizeof(int), &shared); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg); //Run the program err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL); diff --git a/src/kernel.cl b/src/kernel.cl index f72d87e..0d9c69a 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -4,12 +4,11 @@ __kernel void kernel_dot __global float* ptrA, __global float* ptrB, const int rowsA, - const int colsB, - const int shared + const int colsA, + const int rowsB, + const int colsB ) { - const int colsA = shared; - const int rowsB = shared; const int rowsR = rowsA; const int colsR = colsB; const int rowR = get_global_id(0); @@ -20,7 +19,7 @@ __kernel void kernel_dot const int posR = rowR * (colsR * 2) + (colR * 2); - for (int i = 0; i < shared; i++) + for (int i = 0; i < colsA; i++) { int posA = rowR * (colsA * 2) + (i * 2); int posB = i * (colsB * 2) + (colR * 2); -- 2.39.5