]> foleosoft.com Git - QAnsel.git/commitdiff
Sun Mar 3 10:57:48 PM EST 2024
authormiha-q <>
Mon, 4 Mar 2024 03:57:48 +0000 (22:57 -0500)
committermiha-q <>
Mon, 4 Mar 2024 03:57:48 +0000 (22:57 -0500)
Makefile
src/QAnsel.c
src/complex.c
src/gpu/gpu.c
src/gpu/gpu_knk.cl
src/gpu/gpu_test.c

index 66dac57aaf99ee5f1a9f7c730653fe21347be361..65d62263f941b8266253a2ea41f352b94058a4d9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,17 @@
 
 all:
+
+       #gpu mmul
        mv src/gpu/gpu_mmul.cl src/gpu/.gpu_mmul.cl
        bash -c 'echo -ne "$$(cat src/gpu/.gpu_mmul.cl)\x00" > src/gpu/gpu_mmul.cl'
        xxd -i src/gpu/gpu_mmul.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/gpu_mmul.cl.c
        mv src/gpu/.gpu_mmul.cl src/gpu/gpu_mmul.cl
+
+       #gpu knk
+       mv src/gpu/gpu_knk.cl src/gpu/.gpu_knk.cl
+       bash -c 'echo -ne "$$(cat src/gpu/.gpu_knk.cl)\x00" > src/gpu/gpu_knk.cl'
+       xxd -i src/gpu/gpu_knk.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/gpu_knk.cl.c
+       mv src/gpu/.gpu_knk.cl src/gpu/gpu_knk.cl
+
        gcc src/QAnsel.c -g -o bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread
        rm -f src/gpu/*.cl.c
index dc3ab57713e472d73cc2056cfc5c67242f32bce1..cb5f0c30854bb62f4248ba4b0fdcb11ea73e3f5b 100644 (file)
@@ -218,12 +218,35 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
                tmp.rows = filter.rows * gate.rows;
                tmp.cols = filter.cols * gate.cols;
                tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(float));
+
+               #ifdef GPU_ENABLED
+               if (USE_GPU)
+               {
+                       GPU_knk
+                       (
+                               tmp.ptr, tmp.rows, tmp.cols,
+                               filter.ptr, filter.rows, filter.cols,
+                               gate.ptr, gate.rows, gate.cols
+                       );
+               }
+               else
+               {
+                       cpx_ncpx_knk_mt
+                       (
+                               tmp.ptr, tmp.rows, tmp.cols,
+                               filter.ptr, filter.rows, filter.cols,
+                               gate.ptr, gate.rows, gate.cols
+                       );
+               }
+               #else
                cpx_ncpx_knk_mt
                (
                        tmp.ptr, tmp.rows, tmp.cols,
                        filter.ptr, filter.rows, filter.cols,
                        gate.ptr, gate.rows, gate.cols
                );
+               #endif
+
 
                free(filter.ptr);
                filter.ptr = tmp.ptr;
@@ -1373,7 +1396,6 @@ void main(int argc, char** argv)
        #ifdef GPU_ENABLED
        USE_GPU = GPU_init();
        #endif
-       USE_GPU = 0;
        RANDOM_FILE = fopen("/dev/TrueRNG0", "r");
        if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r");
        process(argc, argv);
index 54548e307e77e1995c93afc5f1f41a25e6e838b0..453af0b9ea8595df199c2c1710e1d17fcc43543a 100644 (file)
@@ -377,7 +377,6 @@ void* cpx_ncpx_knk_mtc(void *context)
             ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
             ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
             ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
-            
         }
     }
 }
index 29305968441ba7574a7070318bd5a5bb5bf6785f..adcefe39854f6c0b832abbf66f808f47ed274379 100644 (file)
@@ -1,10 +1,13 @@
 #include "gpu_mmul.cl.c"
+#include "gpu_knk.cl.c"
 cl_platform_id GPU_platform_id;
 cl_device_id GPU_device_id;
 cl_context GPU_context;
 cl_command_queue GPU_command_queue;
 unsigned char* GPU_mmul_cache = NULL;
 size_t GPU_mmul_cache_len = 0;
+unsigned char* GPU_knk_cache = NULL;
+size_t GPU_knk_cache_len = 0;
 
 uint8_t GPU_init()
 {
@@ -254,4 +257,212 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB,
                fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
                exit(1);
        }
+}
+
+void GPU_knk(float* ptrR, size_t rowsR, size_t colsR, float* ptrA, size_t rowsA, size_t colsA, float* ptrB, size_t rowsB, size_t colsB)
+{
+       //Create buffers
+       size_t sizeA = rowsA * colsA;
+       size_t sizeB = rowsB * colsB;
+       size_t sizeR = rowsR * colsR;
+       cl_int err;
+       cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+               exit(1);
+       }
+       cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+               exit(1);
+       }
+       cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+               exit(1);
+       }
+       //Populate buffers
+       err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+               exit(1);
+       }
+       err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+               exit(1);
+       }
+
+       cl_program program;
+       if (GPU_knk_cache == NULL)
+       {
+               //Load and compile program
+               program = clCreateProgramWithSource(GPU_context, 1, (const char*[]){src_gpu_knk_cl}, NULL, &err);
+               if (err != CL_SUCCESS)
+               {
+                       fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
+                       exit(1);
+               }
+               err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
+               if (err != CL_SUCCESS)
+               {
+                       fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+                       size_t log_size;
+                       clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+                       char* log = malloc(log_size);
+                       clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+                       printf("%s", log);
+                       free(log);
+                       exit(1);
+               }
+               err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &GPU_knk_cache_len, NULL);
+               if (err != CL_SUCCESS)
+               {
+                       fprintf(stderr, "GPU fatal error: clGetProgramInfo() failed.\n");
+                       exit(1);
+               }
+               GPU_knk_cache = malloc(GPU_knk_cache_len);
+               clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &GPU_knk_cache, NULL);
+               if (err != CL_SUCCESS)
+               {
+                       fprintf(stderr, "GPU fatal error: clGetProgramInfo() failed.\n");
+                       exit(1);
+               }
+       }
+       else
+       {
+               program = clCreateProgramWithBinary(GPU_context, 1, &GPU_device_id, &GPU_knk_cache_len, (const unsigned char**)&GPU_knk_cache, NULL, &err);
+               if (err != CL_SUCCESS)
+               {
+                       fprintf(stderr, "GPU fatal error: clCreateProgramWithBinary() failed.\n");
+                       exit(1);
+               }
+       }
+
+       //Setup kernel
+       cl_kernel kernel = clCreateKernel(program, "gpu_knk", &err);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 1, sizeof(int), &rowsR);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 2, sizeof(int), &colsR);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*)&memA);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 4, sizeof(int), &rowsB);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 5, sizeof(int), &colsB);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&memA);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 7, sizeof(int), &rowsA);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+       err = clSetKernelArg(kernel, 8, sizeof(int), &colsA);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+               exit(1);
+       }
+
+       //Run the program
+       size_t work_size[] = {rowsR, colsR};
+       err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n");
+               exit(1);
+       }
+       //Wait for completion
+       err = clFlush(GPU_command_queue);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clFlush() failed.\n");
+               exit(1);
+       }
+       err = clFinish(GPU_command_queue);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clFinish() failed.\n");
+               exit(1);
+       }
+       //Read results
+       err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+               exit(1);
+       }
+       //Clean up
+       err = clReleaseKernel(kernel);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n");
+               exit(1);
+       }
+       err = clReleaseProgram(program);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n");
+               exit(1);
+       }
+       err = clReleaseMemObject(memA);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+               exit(1);
+       }
+       err = clReleaseMemObject(memB);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+               exit(1);
+       }
+       err = clReleaseMemObject(memR);
+       if (err != CL_SUCCESS)
+       {
+               fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+               exit(1);
+       }
 }
\ No newline at end of file
index 3be09b8bba635c30e9a8f243dc5e102f27910b0d..79f57951b2c7e33b8fad549ff72961d47f1b811f 100644 (file)
@@ -11,36 +11,29 @@ __kernel void gpu_knk
     const int colsB
 )
 {
+    int rowR = get_global_id(0);
+    int colR = get_global_id(1);
 
-    for (size_t rowR = 0; rowR < rowsR; rowR++)
-    {
-        size_t a = data->ID * data->BlockSize;
-        size_t b = (data->ID + 1) * data->BlockSize;
-        if (data->ID == data->Last) b += data->Continue;
-        for (size_t colR = a; colR < b; colR++)
-        {
-            size_t rowA = rowR / rowsB;
-            size_t colA = colR / colsB;
-            size_t rowB = rowR % rowsB;
-            size_t colB = colR % colsB;
+    int rowA = rowR / rowsB;
+    int colA = colR / colsB;
+    int rowB = rowR % rowsB;
+    int colB = colR % colsB;
 
-            float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-            float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-            float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-            float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+    float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+    float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+    float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+    float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
 
-            float first = r1 * r2; //real
-            float outer = r1 * i2; //imaginary
-            float inner = i1 * r2; //imaginary
-            float last  = -(i1 * i2); //real
-            r1 = first + last;
-            i1 = outer + inner;
+    float first = r1 * r2; //real
+    float outer = r1 * i2; //imaginary
+    float inner = i1 * r2; //imaginary
+    float last  = -(i1 * i2); //real
+    r1 = first + last;
+    i1 = outer + inner;
+
+    ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
+    ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
+    ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
+    ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
+}
 
-            ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
-            ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
-            ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
-            ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
-            
-        }
-    }
-}
\ No newline at end of file
index 6c75205a984a2b037c55a8d81d0978f0f4c8636a..68d8baaf978a1bede5fa257fa3c8a6ed5256c896 100644 (file)
@@ -12,6 +12,7 @@ int get_global_id(int id)
     }
 }
 #include "gpu_mmul.cl"
+#include "gpu_knk.cl"
 
 void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
 {
@@ -26,6 +27,18 @@ void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB,
     }
 }
 
+void GPU_knk(float* ptrR, size_t rowsR, size_t colsR, float* ptrA, size_t rowsA, size_t colsA, float* ptrB, size_t rowsB, size_t colsB)
+{
+    for (int i = 0; i < rowsR; i++)
+    {
+        for (int j = 0; j < colsR; j++)
+        {
+            GPU_GLOBAL_ID_0 = i;
+            GPU_GLOBAL_ID_1 = j;
+            gpu_knk(ptrR, rowsR, colsR, ptrA, rowsA, colsA, ptrB, rowsB, colsB);
+        }
+    }
+}
 
 uint8_t GPU_init()
 {