From 017705c82dbdb526a82c58e9bbce4f9f1f285207 Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Mon, 4 Mar 2024 00:43:57 -0500 Subject: [PATCH] Mon Mar 4 12:43:57 AM EST 2024 --- src/gpu/gpu.c | 9 +++---- src/gpu/gpu_knk.cl | 39 ------------------------------ src/gpu/{gpu_mmul.cl => kernel.cl} | 0 3 files changed, 3 insertions(+), 45 deletions(-) delete mode 100644 src/gpu/gpu_knk.cl rename src/gpu/{gpu_mmul.cl => kernel.cl} (100%) diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c index c04ae9c..cf8a318 100644 --- a/src/gpu/gpu.c +++ b/src/gpu/gpu.c @@ -1,6 +1,5 @@ #define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); } -#include "gpu_mmul.cl.c" -#include "gpu_knk.cl.c" +#include "kernel.cl.c" cl_platform_id GPU_platform_id; cl_device_id GPU_device_id; cl_context GPU_context; @@ -71,7 +70,6 @@ void GPU_clean() void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared) { - printf("a\n"); //Create buffers size_t sizeA = rowsA * shared; size_t sizeB = shared * colsB; @@ -94,7 +92,7 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s cl_program program; if (GPU_cache == NULL) { - program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err); + program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err); gpuerr(clCreateProgramWithSource); err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); if (err != CL_SUCCESS) @@ -152,7 +150,6 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int colsA, float* ptrB, int rowsB, int colsB) { - printf("b\n"); //Create buffers size_t sizeA = (rowsA * 2) * (colsA * 2); size_t sizeB = (rowsB * 2) * (colsB * 2); @@ -175,7 +172,7 @@ void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int cols cl_program program; if (GPU_cache == NULL) { - program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err); + program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err); gpuerr(clCreateProgramWithSource); err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); if (err != CL_SUCCESS) diff --git a/src/gpu/gpu_knk.cl b/src/gpu/gpu_knk.cl deleted file mode 100644 index 79f5795..0000000 --- a/src/gpu/gpu_knk.cl +++ /dev/null @@ -1,39 +0,0 @@ -__kernel void gpu_knk -( - __global float* ptrR, - const int rowsR, - const int colsR, - __global float* ptrA, - const int rowsA, - const int colsA, - __global float* ptrB, - const int rowsB, - const int colsB -) -{ - int rowR = get_global_id(0); - int colR = get_global_id(1); - - int rowA = rowR / rowsB; - int colA = colR / colsB; - int rowB = rowR % rowsB; - int colB = colR % colsB; - - float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; - float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; - float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; - float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; - - float first = r1 * r2; //real - float outer = r1 * i2; //imaginary - float inner = i1 * r2; //imaginary - float last = -(i1 * i2); //real - r1 = first + last; - i1 = outer + inner; - - ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; - ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; - ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; - ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; -} - diff --git a/src/gpu/gpu_mmul.cl b/src/gpu/kernel.cl similarity index 100% rename from src/gpu/gpu_mmul.cl rename to src/gpu/kernel.cl -- 2.39.5