From e2d033e37149cd04b001d9c8118155e089717854 Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Sat, 2 Mar 2024 20:46:34 -0500 Subject: [PATCH] Sat Mar 2 08:46:34 PM EST 2024 --- Makefile | 5 +- src/QAnsel.c | 64 ++++++-------- src/g_mmul.cl | 11 --- src/gpu.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++ src/gpu_mmul.cl | 11 +++ 5 files changed, 260 insertions(+), 52 deletions(-) delete mode 100644 src/g_mmul.cl create mode 100644 src/gpu.c create mode 100644 src/gpu_mmul.cl diff --git a/Makefile b/Makefile index 0b035a8..145e07c 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,5 @@ all: - cd src && gcc QAnsel.c -g -o ../bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread \ No newline at end of file + cd src && \ + xxd -i gpu_mmul.cl > gpu_mmul.cl.c && \ + gcc QAnsel.c -g -o ../bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread + rm -f src/*.cl.c \ No newline at end of file diff --git a/src/QAnsel.c b/src/QAnsel.c index b4f3a9d..1d084c9 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -16,9 +16,7 @@ uint8_t USE_GPU = 0; #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_TARGET_OPENCL_VERSION 120 #include -cl_platform_id GPU_platform_id; -cl_device_id GPU_device_id; -cl_context GPU_context; +#include "gpu.c" #endif typedef struct @@ -233,11 +231,30 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst } cpx_mtx_init(&tmp, stateVector->rows, stateVector->cols); + #ifdef GPU_ENABLED + if (USE_GPU) + { + GPU_mmul + ( + tmp.ptr, stateVector->ptr, filter.ptr, + stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2 + ); + } + else + { + cpx_ncpx_mmul_mt + ( + tmp.ptr, stateVector->ptr, filter.ptr, + stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2 + ); + } + #else cpx_ncpx_mmul_mt ( tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2 ); + #endif free(stateVector->ptr); stateVector->ptr = tmp.ptr; free(filter.ptr); @@ -1350,43 +1367,6 @@ void process(int argc, char** argv) free(lineIDs); } -#ifdef GPU_ENABLED -uint8_t GPU_init() -{ - cl_uint count; - cl_int err; - - err = clGetPlatformIDs(1, &GPU_platform_id, &count); - if (err != CL_SUCCESS || count == 0) - { - if (err == 0) - fprintf(stderr, "GPU disabled: No supported platforms found.\n"); - else - fprintf(stderr, "GPU disabled: clGetPlatformIDs() failed.\n"); - return 0; - } - - err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count); - if (err != CL_SUCCESS || count == 0) - { - if (count == 0) - fprintf(stderr, "GPU disabled: No supported GPUs found.\n"); - else - fprintf(stderr, "GPU disabled: clGetDeviceIDs() failed.\n"); - return 0; - } - - GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU disabled: clCreateContext() failed.\n"); - return 0; - } - - return 1; -} -#endif - void main(int argc, char** argv) { #ifdef GPU_ENABLED @@ -1397,4 +1377,8 @@ void main(int argc, char** argv) if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r"); process(argc, argv); fclose(RANDOM_FILE); + + #ifdef GPU_ENABLED + if (USE_GPU) GPU_clean(); + #endif } \ No newline at end of file diff --git a/src/g_mmul.cl b/src/g_mmul.cl deleted file mode 100644 index f8283ea..0000000 --- a/src/g_mmul.cl +++ /dev/null @@ -1,11 +0,0 @@ -__kernel g_mmul(__global double* A, __global double* B, __global double* C, const unsigned int N, const unsigned int W) -{ - int row = get_global_id(0); - int col = get_global_id(1); - double sum = 0; - for (int i = 0; i < N; i++) - { - sum += A[row * W + i] * B[i * W + col]; - } - C[row * W + col] = sum; -} \ No newline at end of file diff --git a/src/gpu.c b/src/gpu.c new file mode 100644 index 0000000..991db40 --- /dev/null +++ b/src/gpu.c @@ -0,0 +1,221 @@ +#include "gpu_mmul.cl.c" +cl_platform_id GPU_platform_id; +cl_device_id GPU_device_id; +cl_context GPU_context; +cl_command_queue GPU_command_queue; + +uint8_t GPU_init() +{ + cl_uint count; + cl_int err; + + err = clGetPlatformIDs(1, &GPU_platform_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (err == 0) + fprintf(stderr, "GPU error: No supported platforms found.\n"); + else + fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n"); + return 0; + } + + err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (count == 0) + fprintf(stderr, "GPU error: No supported GPUs found.\n"); + else + fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n"); + return 0; + } + + GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateContext() failed.\n"); + return 0; + } + + GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n"); + err = clReleaseContext(GPU_context); + if (err != CL_SUCCESS) + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + return 0; + } + + return 1; +} + +void GPU_clean() +{ + cl_int err; + err = clReleaseCommandQueue(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n"); + } + err = clReleaseContext(GPU_context); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + } +} + +void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared) +{ + //Create buffers + size_t sizeA = rowsA * shared; + size_t sizeB = shared * colsB; + size_t sizeR = rowsA * colsB; + cl_int err; + cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeA, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeB, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(double) * sizeR, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + //Populate buffers + err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(double) * sizeA, ptrA, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(double) * sizeB, ptrB, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + //Load and compile program + cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)(&gpu_mmul_cl), (const size_t*)(&gpu_mmul_cl_len), &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n"); + exit(1); + } + err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n"); + exit(1); + } + //Setup kernel + cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + //Run the program + size_t work_size[] = {rowsA, colsB}; + err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n"); + exit(1); + } + //Wait for completion + err = clFlush(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clFlush() failed.\n"); + exit(1); + } + err = clFinish(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clFinish() failed.\n"); + exit(1); + } + //Read results + err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(double) * sizeR, ptrR, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + + //Clean up + err = clReleaseKernel(kernel); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n"); + exit(1); + } + err = clReleaseProgram(program); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } +} \ No newline at end of file diff --git a/src/gpu_mmul.cl b/src/gpu_mmul.cl new file mode 100644 index 0000000..f58257f --- /dev/null +++ b/src/gpu_mmul.cl @@ -0,0 +1,11 @@ +__kernel gpu_mmul(__global double* ptrR, __global double* ptrA, __global double* ptrB, const int N, const int W) +{ + int row = get_global_id(0); + int col = get_global_id(1); + double sum = 0; + for (int i = 0; i < N; i++) + { + sum += ptrA[row * W + i] * ptrB[i * W + col]; + } + ptrR[row * W + col] = sum; +} \ No newline at end of file -- 2.39.5