From: miha-q <> Date: Mon, 4 Mar 2024 05:43:57 +0000 (-0500) Subject: Mon Mar 4 12:43:57 AM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=017705c82dbdb526a82c58e9bbce4f9f1f285207;p=QAnsel.git Mon Mar 4 12:43:57 AM EST 2024 --- diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c index c04ae9c..cf8a318 100644 --- a/src/gpu/gpu.c +++ b/src/gpu/gpu.c @@ -1,6 +1,5 @@ #define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); } -#include "gpu_mmul.cl.c" -#include "gpu_knk.cl.c" +#include "kernel.cl.c" cl_platform_id GPU_platform_id; cl_device_id GPU_device_id; cl_context GPU_context; @@ -71,7 +70,6 @@ void GPU_clean() void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared) { - printf("a\n"); //Create buffers size_t sizeA = rowsA * shared; size_t sizeB = shared * colsB; @@ -94,7 +92,7 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s cl_program program; if (GPU_cache == NULL) { - program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err); + program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err); gpuerr(clCreateProgramWithSource); err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); if (err != CL_SUCCESS) @@ -152,7 +150,6 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int colsA, float* ptrB, int rowsB, int colsB) { - printf("b\n"); //Create buffers size_t sizeA = (rowsA * 2) * (colsA * 2); size_t sizeB = (rowsB * 2) * (colsB * 2); @@ -175,7 +172,7 @@ void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int cols cl_program program; if (GPU_cache == NULL) { - program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err); + program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err); gpuerr(clCreateProgramWithSource); err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); if (err != CL_SUCCESS) diff --git a/src/gpu/gpu_knk.cl b/src/gpu/gpu_knk.cl deleted file mode 100644 index 79f5795..0000000 --- a/src/gpu/gpu_knk.cl +++ /dev/null @@ -1,39 +0,0 @@ -__kernel void gpu_knk -( - __global float* ptrR, - const int rowsR, - const int colsR, - __global float* ptrA, - const int rowsA, - const int colsA, - __global float* ptrB, - const int rowsB, - const int colsB -) -{ - int rowR = get_global_id(0); - int colR = get_global_id(1); - - int rowA = rowR / rowsB; - int colA = colR / colsB; - int rowB = rowR % rowsB; - int colB = colR % colsB; - - float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; - float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; - float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; - float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; - - float first = r1 * r2; //real - float outer = r1 * i2; //imaginary - float inner = i1 * r2; //imaginary - float last = -(i1 * i2); //real - r1 = first + last; - i1 = outer + inner; - - ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; - ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; - ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; - ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; -} - diff --git a/src/gpu/gpu_mmul.cl b/src/gpu/gpu_mmul.cl deleted file mode 100644 index da23464..0000000 --- a/src/gpu/gpu_mmul.cl +++ /dev/null @@ -1,69 +0,0 @@ -__kernel void gpu_mmul -( - __global float* ptrR, - __global float* ptrA, - __global float* ptrB, - const int rowsA, - const int colsB, - const int shared -) -{ - const int colsA = shared; - const int rowsB = shared; - const int rowsR = rowsA; - const int colsR = colsB; - const int rowR = get_global_id(0); - const int colR = get_global_id(1); - int posA, posB; - float sum = 0; - - const int posR = colR + rowR * colsR; - - for (int i = 0; i < shared; i++) - { - int posA = i + rowR * colsA; - int posB = colR + i * colsB; - sum += ptrA[posA] * ptrB[posB]; - } - ptrR[rowR * colsR + colR] = sum; -} - -__kernel void gpu_knk -( - __global float* ptrR, - const int rowsR, - const int colsR, - __global float* ptrA, - const int rowsA, - const int colsA, - __global float* ptrB, - const int rowsB, - const int colsB -) -{ - int rowR = get_global_id(0); - int colR = get_global_id(1); - - int rowA = rowR / rowsB; - int colA = colR / colsB; - int rowB = rowR % rowsB; - int colB = colR % colsB; - - float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; - float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; - float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; - float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; - - float first = r1 * r2; //real - float outer = r1 * i2; //imaginary - float inner = i1 * r2; //imaginary - float last = -(i1 * i2); //real - r1 = first + last; - i1 = outer + inner; - - ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; - ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; - ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; - ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; -} - diff --git a/src/gpu/kernel.cl b/src/gpu/kernel.cl new file mode 100644 index 0000000..da23464 --- /dev/null +++ b/src/gpu/kernel.cl @@ -0,0 +1,69 @@ +__kernel void gpu_mmul +( + __global float* ptrR, + __global float* ptrA, + __global float* ptrB, + const int rowsA, + const int colsB, + const int shared +) +{ + const int colsA = shared; + const int rowsB = shared; + const int rowsR = rowsA; + const int colsR = colsB; + const int rowR = get_global_id(0); + const int colR = get_global_id(1); + int posA, posB; + float sum = 0; + + const int posR = colR + rowR * colsR; + + for (int i = 0; i < shared; i++) + { + int posA = i + rowR * colsA; + int posB = colR + i * colsB; + sum += ptrA[posA] * ptrB[posB]; + } + ptrR[rowR * colsR + colR] = sum; +} + +__kernel void gpu_knk +( + __global float* ptrR, + const int rowsR, + const int colsR, + __global float* ptrA, + const int rowsA, + const int colsA, + __global float* ptrB, + const int rowsB, + const int colsB +) +{ + int rowR = get_global_id(0); + int colR = get_global_id(1); + + int rowA = rowR / rowsB; + int colA = colR / colsB; + int rowB = rowR % rowsB; + int colB = colR % colsB; + + float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)]; + float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)]; + float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)]; + float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)]; + + float first = r1 * r2; //real + float outer = r1 * i2; //imaginary + float inner = i1 * r2; //imaginary + float last = -(i1 * i2); //real + r1 = first + last; + i1 = outer + inner; + + ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1; + ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1; + ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1; + ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1; +} +