From: miha-q <> Date: Sun, 3 Mar 2024 05:29:57 +0000 (-0500) Subject: Sun Mar 3 12:29:57 AM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=3496d8cd4bd819efc5b1f6b6abc0ef64ad4c6e0a;p=QAnsel.git Sun Mar 3 12:29:57 AM EST 2024 --- diff --git a/Makefile b/Makefile index 29a4948..2b53049 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ all: - mv src/gpu_mmul.cl src/.gpu_mmul.cl - bash -c 'echo -ne "$$(cat src/.gpu_mmul.cl)\x00" > src/gpu_mmul.cl' - xxd -i src/gpu_mmul.cl > src/gpu_mmul.cl.c - mv src/.gpu_mmul.cl src/gpu_mmul.cl + mv src/gpu/gpu_mmul.cl src/gpu/.gpu_mmul.cl + bash -c 'echo -ne "$$(cat src/gpu/.gpu_mmul.cl)\x00" > src/gpu/gpu_mmul.cl' + xxd -i src/gpu/gpu_mmul.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/gpu_mmul.cl.c + mv src/gpu/.gpu_mmul.cl src/gpu/gpu_mmul.cl gcc src/QAnsel.c -g -o bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread - rm -f src/*.cl.c \ No newline at end of file + rm -f src/*.cl.c diff --git a/src/.gpu_mmul.cl.tmp b/src/.gpu_mmul.cl.tmp deleted file mode 100644 index 369c9dd..0000000 Binary files a/src/.gpu_mmul.cl.tmp and /dev/null differ diff --git a/src/QAnsel.c b/src/QAnsel.c index d0748da..8801aeb 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -16,7 +16,8 @@ uint8_t USE_GPU = 0; #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_TARGET_OPENCL_VERSION 120 #include -#include "gpu.c" +#include "gpu/gpu.c" + #endif typedef struct diff --git a/src/gpu.c b/src/gpu.c deleted file mode 100644 index b5e2ef0..0000000 --- a/src/gpu.c +++ /dev/null @@ -1,238 +0,0 @@ -#include "gpu_mmul.cl.c" -cl_platform_id GPU_platform_id; -cl_device_id GPU_device_id; -cl_context GPU_context; -cl_command_queue GPU_command_queue; - -uint8_t GPU_init() -{ - cl_uint count; - cl_int err; - - err = clGetPlatformIDs(1, &GPU_platform_id, &count); - if (err != CL_SUCCESS || count == 0) - { - if (err == 0) - fprintf(stderr, "GPU error: No supported platforms found.\n"); - else - fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n"); - return 0; - } - - err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count); - if (err != CL_SUCCESS || count == 0) - { - if (count == 0) - fprintf(stderr, "GPU error: No supported GPUs found.\n"); - else - fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n"); - return 0; - } - - GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU error: clCreateContext() failed.\n"); - return 0; - } - - GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n"); - err = clReleaseContext(GPU_context); - if (err != CL_SUCCESS) - fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); - return 0; - } - - return 1; -} - -void GPU_clean() -{ - cl_int err; - err = clReleaseCommandQueue(GPU_command_queue); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n"); - } - err = clReleaseContext(GPU_context); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); - } -} - -void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared) -{ - //Create buffers - size_t sizeA = rowsA * shared; - size_t sizeB = shared * colsB; - size_t sizeR = rowsA * colsB; - cl_int err; - cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); - exit(1); - } - cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); - exit(1); - } - cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); - exit(1); - } - //Populate buffers - err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); - exit(1); - } - err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); - exit(1); - } - //Load and compile program - printf("------------------------------\n%s\n------------------------------\n", src_gpu_mmul_cl); - char* tmp = malloc(src_gpu_mmul_cl_len); - memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len); - const char* ptr = (const char*)src_gpu_mmul_cl; - cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err); - if (err != CL_SUCCESS) - { - free(tmp); - fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n"); - exit(1); - } - err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); - free(tmp); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n"); - size_t log_size; - clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); - char* log = malloc(log_size); - clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); - printf("%s", log); - free(log); - exit(1); - } - //Setup kernel - cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - err = clSetKernelArg(kernel, 5, sizeof(int), &shared); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); - exit(1); - } - //Run the program - size_t work_size[] = {rowsA, colsB}; - err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n"); - exit(1); - } - //Wait for completion - err = clFlush(GPU_command_queue); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clFlush() failed.\n"); - exit(1); - } - err = clFinish(GPU_command_queue); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clFinish() failed.\n"); - exit(1); - } - //Read results - err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); - exit(1); - } - //Clean up - err = clReleaseKernel(kernel); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n"); - exit(1); - } - err = clReleaseProgram(program); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n"); - exit(1); - } - err = clReleaseMemObject(memA); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); - exit(1); - } - err = clReleaseMemObject(memB); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); - exit(1); - } - err = clReleaseMemObject(memR); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); - exit(1); - } - err = clReleaseMemObject(memR); - if (err != CL_SUCCESS) - { - fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); - exit(1); - } -} \ No newline at end of file diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c new file mode 100644 index 0000000..b5e2ef0 --- /dev/null +++ b/src/gpu/gpu.c @@ -0,0 +1,238 @@ +#include "gpu_mmul.cl.c" +cl_platform_id GPU_platform_id; +cl_device_id GPU_device_id; +cl_context GPU_context; +cl_command_queue GPU_command_queue; + +uint8_t GPU_init() +{ + cl_uint count; + cl_int err; + + err = clGetPlatformIDs(1, &GPU_platform_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (err == 0) + fprintf(stderr, "GPU error: No supported platforms found.\n"); + else + fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n"); + return 0; + } + + err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (count == 0) + fprintf(stderr, "GPU error: No supported GPUs found.\n"); + else + fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n"); + return 0; + } + + GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateContext() failed.\n"); + return 0; + } + + GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n"); + err = clReleaseContext(GPU_context); + if (err != CL_SUCCESS) + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + return 0; + } + + return 1; +} + +void GPU_clean() +{ + cl_int err; + err = clReleaseCommandQueue(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n"); + } + err = clReleaseContext(GPU_context); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + } +} + +void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared) +{ + //Create buffers + size_t sizeA = rowsA * shared; + size_t sizeB = shared * colsB; + size_t sizeR = rowsA * colsB; + cl_int err; + cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n"); + exit(1); + } + //Populate buffers + err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + //Load and compile program + printf("------------------------------\n%s\n------------------------------\n", src_gpu_mmul_cl); + char* tmp = malloc(src_gpu_mmul_cl_len); + memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len); + const char* ptr = (const char*)src_gpu_mmul_cl; + cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err); + if (err != CL_SUCCESS) + { + free(tmp); + fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n"); + exit(1); + } + err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL); + free(tmp); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n"); + size_t log_size; + clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char* log = malloc(log_size); + clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + printf("%s", log); + free(log); + exit(1); + } + //Setup kernel + cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + err = clSetKernelArg(kernel, 5, sizeof(int), &shared); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n"); + exit(1); + } + //Run the program + size_t work_size[] = {rowsA, colsB}; + err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n"); + exit(1); + } + //Wait for completion + err = clFlush(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clFlush() failed.\n"); + exit(1); + } + err = clFinish(GPU_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clFinish() failed.\n"); + exit(1); + } + //Read results + err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n"); + exit(1); + } + //Clean up + err = clReleaseKernel(kernel); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n"); + exit(1); + } + err = clReleaseProgram(program); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memA); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memB); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } + err = clReleaseMemObject(memR); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n"); + exit(1); + } +} \ No newline at end of file diff --git a/src/gpu/gpu_mmul.cl b/src/gpu/gpu_mmul.cl new file mode 100644 index 0000000..a38fd93 --- /dev/null +++ b/src/gpu/gpu_mmul.cl @@ -0,0 +1,29 @@ +__kernel void gpu_mmul +( + __global float* ptrR, + __global float* ptrA, + __global float* ptrB, + const int rowsA, + const int colsB, + const int shared +) +{ + const int colsA = shared; + const int rowsB = shared; + const int rowsR = rowsA; + const int colsR = colsB; + const int rowR = get_global_id(0); + const int colR = get_global_id(1); + int posA, posB; + float sum = 0; + + const int posR = colR + rowR * colsR; + + for (int i = 0; i < shared; i++) + { + int posA = i + rowR * colsA; + int posB = colR + i * colsB; + sum += ptrA[posA] * ptrB[posB]; + } + ptrR[rowR * colsR + colR] = sum; +} \ No newline at end of file diff --git a/src/gpu/gpu_mmul.cl.c b/src/gpu/gpu_mmul.cl.c new file mode 100644 index 0000000..0cc1d45 --- /dev/null +++ b/src/gpu/gpu_mmul.cl.c @@ -0,0 +1,58 @@ +unsigned char src_gpu_mmul_cl[] = { + 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, + 0x64, 0x20, 0x67, 0x70, 0x75, 0x5f, 0x6d, 0x6d, 0x75, 0x6c, 0x0a, 0x28, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, + 0x52, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, + 0x74, 0x72, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, + 0x20, 0x70, 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, + 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x2c, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, + 0x6e, 0x74, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x0a, 0x29, 0x0a, + 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x3d, 0x20, + 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, + 0x77, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52, 0x20, 0x3d, 0x20, + 0x72, 0x6f, 0x77, 0x73, 0x41, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, + 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x3b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, + 0x74, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, + 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, + 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x3d, 0x20, + 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, + 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, + 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x2c, 0x20, 0x70, 0x6f, 0x73, 0x42, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, + 0x73, 0x75, 0x6d, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x70, 0x6f, 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, + 0x2b, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, + 0x73, 0x52, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, + 0x20, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, + 0x20, 0x69, 0x20, 0x3c, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, + 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x70, 0x6f, 0x73, 0x41, 0x20, 0x3d, 0x20, 0x69, 0x20, 0x2b, 0x20, 0x72, + 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, + 0x20, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, + 0x20, 0x2b, 0x20, 0x69, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x75, + 0x6d, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, + 0x73, 0x41, 0x5d, 0x20, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70, + 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77, + 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2b, 0x20, + 0x63, 0x6f, 0x6c, 0x52, 0x5d, 0x20, 0x3d, 0x20, 0x73, 0x75, 0x6d, 0x3b, + 0x0a, 0x7d, 0x00 +}; +unsigned int src_gpu_mmul_cl_len = 651; diff --git a/src/gpu/gpu_test.c b/src/gpu/gpu_test.c new file mode 100644 index 0000000..6b14609 --- /dev/null +++ b/src/gpu/gpu_test.c @@ -0,0 +1,27 @@ +//This is for testing GPU functions on the CPU +#define __kernel +#define __global +#include "gpu/gpu_mmul.cl" +int GPU_GLOBAL_ID_0, GPU_GLOBAL_ID_1, GPU_GLOBAL_ID_2; +int get_global_int(int id) +{ + switch (id) + { + case 0: return GPU_GLOBAL_ID_0; + case 1: return GPU_GLOBAL_ID_1; + case 2: return GPU_GLOBAL_ID_2; + } +} + +void GPU_mmul_test(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared) +{ + for (int i = 0; i < rowsA; i++) + { + for (int j = 0; j < colsB; j++) + { + GPU_GLOBAL_ID_0 = i; + GPU_GLOBAL_ID_1 = i; + gpu_mmul(ptrR, ptrA, ptrB, rowsA, colsB, shared); + } + } +} \ No newline at end of file diff --git a/src/gpu_mmul.cl b/src/gpu_mmul.cl deleted file mode 100644 index a38fd93..0000000 --- a/src/gpu_mmul.cl +++ /dev/null @@ -1,29 +0,0 @@ -__kernel void gpu_mmul -( - __global float* ptrR, - __global float* ptrA, - __global float* ptrB, - const int rowsA, - const int colsB, - const int shared -) -{ - const int colsA = shared; - const int rowsB = shared; - const int rowsR = rowsA; - const int colsR = colsB; - const int rowR = get_global_id(0); - const int colR = get_global_id(1); - int posA, posB; - float sum = 0; - - const int posR = colR + rowR * colsR; - - for (int i = 0; i < shared; i++) - { - int posA = i + rowR * colsA; - int posB = colR + i * colsB; - sum += ptrA[posA] * ptrB[posB]; - } - ptrR[rowR * colsR + colR] = sum; -} \ No newline at end of file