Mon Mar 4 12:43:57 AM EST 2024

author miha-q <>

Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)

committer miha-q <>

Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)
author miha-q <>
Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)
committer miha-q <>
Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)
diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c

index c04ae9c8b97e32fac35a1c1b53612ad2af1a2364..cf8a318f1951828b6a6c2b34b0a109b201a0f969 100644 (file)
--- a/src/gpu/gpu.c
+++ b/src/gpu/gpu.c
@@ -1,6 +1,5 @@
  #define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); }
-#include "gpu_mmul.cl.c"
-#include "gpu_knk.cl.c"
+#include "kernel.cl.c"
  cl_platform_id GPU_platform_id;
  cl_device_id GPU_device_id;
  cl_context GPU_context;
@@ -71,7 +70,6 @@ void GPU_clean()
  
  void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared)
  {
-       printf("a\n");
         //Create buffers
         size_t sizeA = rowsA * shared;
         size_t sizeB = shared * colsB;
@@ -94,7 +92,7 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s
         cl_program program;
         if (GPU_cache == NULL)
         {
-               program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err);
+               program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err);
                 gpuerr(clCreateProgramWithSource);
                 err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
                 if (err != CL_SUCCESS)
@@ -152,7 +150,6 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int s
  
  void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int colsA, float* ptrB, int rowsB, int colsB)
  {
-       printf("b\n");
         //Create buffers
         size_t sizeA = (rowsA * 2) * (colsA * 2);
         size_t sizeB = (rowsB * 2) * (colsB * 2);
@@ -175,7 +172,7 @@ void GPU_knk(float* ptrR, int rowsR, int colsR, float* ptrA, int rowsA, int cols
         cl_program program;
         if (GPU_cache == NULL)
         {
-               program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_mmul_cl}, NULL, &err);
+               program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_kernel_cl}, NULL, &err);
                 gpuerr(clCreateProgramWithSource);
                 err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
                 if (err != CL_SUCCESS)
diff --git a/src/gpu/gpu_knk.cl b/src/gpu/gpu_knk.cl

deleted file mode 100644 (file)

index 79f5795..0000000
--- a/src/gpu/gpu_knk.cl
+++ /dev/null
@@ -1,39 +0,0 @@
-__kernel void gpu_knk
-(
-    __global float* ptrR,
-    const int rowsR,
-    const int colsR,
-    __global float* ptrA,
-    const int rowsA,
-    const int colsA,
-    __global float* ptrB,
-    const int rowsB,
-    const int colsB
-)
-{
-    int rowR = get_global_id(0);
-    int colR = get_global_id(1);
-
-    int rowA = rowR / rowsB;
-    int colA = colR / colsB;
-    int rowB = rowR % rowsB;
-    int colB = colR % colsB;
-
-    float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-    float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-    float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-    float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
-
-    float first = r1 * r2; //real
-    float outer = r1 * i2; //imaginary
-    float inner = i1 * r2; //imaginary
-    float last  = -(i1 * i2); //real
-    r1 = first + last;
-    i1 = outer + inner;
-
-    ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
-    ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
-    ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
-    ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
-}
-
diff --git a/src/gpu/gpu_mmul.cl b/src/gpu/gpu_mmul.cl

deleted file mode 100644 (file)

index da23464..0000000
--- a/src/gpu/gpu_mmul.cl
+++ /dev/null
@@ -1,69 +0,0 @@
-__kernel void gpu_mmul
-(
-    __global float* ptrR,
-    __global float* ptrA,
-    __global float* ptrB,
-    const int rowsA,
-    const int colsB,
-    const int shared
-)
-{
-    const int colsA = shared;
-    const int rowsB = shared;
-    const int rowsR = rowsA;
-    const int colsR = colsB;
-    const int rowR = get_global_id(0);
-    const int colR = get_global_id(1);
-    int posA, posB;
-    float sum = 0;
-
-    const int posR = colR + rowR * colsR;
-
-    for (int i = 0; i < shared; i++)
-    {
-        int posA = i + rowR * colsA;
-        int posB = colR + i * colsB;
-        sum += ptrA[posA] * ptrB[posB];
-    }
-    ptrR[rowR * colsR + colR] = sum;
-}
-
-__kernel void gpu_knk
-(
-    __global float* ptrR,
-    const int rowsR,
-    const int colsR,
-    __global float* ptrA,
-    const int rowsA,
-    const int colsA,
-    __global float* ptrB,
-    const int rowsB,
-    const int colsB
-)
-{
-    int rowR = get_global_id(0);
-    int colR = get_global_id(1);
-
-    int rowA = rowR / rowsB;
-    int colA = colR / colsB;
-    int rowB = rowR % rowsB;
-    int colB = colR % colsB;
-
-    float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-    float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-    float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-    float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
-
-    float first = r1 * r2; //real
-    float outer = r1 * i2; //imaginary
-    float inner = i1 * r2; //imaginary
-    float last  = -(i1 * i2); //real
-    r1 = first + last;
-    i1 = outer + inner;
-
-    ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
-    ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
-    ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
-    ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
-}
-
diff --git a/src/gpu/kernel.cl b/src/gpu/kernel.cl

new file mode 100644 (file)

index 0000000..da23464
--- /dev/null
+++ b/src/gpu/kernel.cl
@@ -0,0 +1,69 @@
+__kernel void gpu_mmul
+(
+    __global float* ptrR,
+    __global float* ptrA,
+    __global float* ptrB,
+    const int rowsA,
+    const int colsB,
+    const int shared
+)
+{
+    const int colsA = shared;
+    const int rowsB = shared;
+    const int rowsR = rowsA;
+    const int colsR = colsB;
+    const int rowR = get_global_id(0);
+    const int colR = get_global_id(1);
+    int posA, posB;
+    float sum = 0;
+
+    const int posR = colR + rowR * colsR;
+
+    for (int i = 0; i < shared; i++)
+    {
+        int posA = i + rowR * colsA;
+        int posB = colR + i * colsB;
+        sum += ptrA[posA] * ptrB[posB];
+    }
+    ptrR[rowR * colsR + colR] = sum;
+}
+
+__kernel void gpu_knk
+(
+    __global float* ptrR,
+    const int rowsR,
+    const int colsR,
+    __global float* ptrA,
+    const int rowsA,
+    const int colsA,
+    __global float* ptrB,
+    const int rowsB,
+    const int colsB
+)
+{
+    int rowR = get_global_id(0);
+    int colR = get_global_id(1);
+
+    int rowA = rowR / rowsB;
+    int colA = colR / colsB;
+    int rowB = rowR % rowsB;
+    int colB = colR % colsB;
+
+    float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+    float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+    float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+    float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+
+    float first = r1 * r2; //real
+    float outer = r1 * i2; //imaginary
+    float inner = i1 * r2; //imaginary
+    float last  = -(i1 * i2); //real
+    r1 = first + last;
+    i1 = outer + inner;
+
+    ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
+    ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
+    ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
+    ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
+}
+
author	miha-q <>
	Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)
committer	miha-q <>
	Mon, 4 Mar 2024 05:43:57 +0000 (00:43 -0500)
src/gpu/gpu.c		patch \| blob \| history
src/gpu/gpu_knk.cl	[deleted file]	patch \| blob \| history
src/gpu/gpu_mmul.cl	[deleted file]	patch \| blob \| history
src/gpu/kernel.cl	[new file with mode: 0644]	patch \| blob