#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
-cl_platform_id GPU_platform_id;
-cl_device_id GPU_device_id;
-cl_context GPU_context;
+#include "gpu.c"
#endif
typedef struct
}
cpx_mtx_init(&tmp, stateVector->rows, stateVector->cols);
+ #ifdef GPU_ENABLED
+ if (USE_GPU)
+ {
+ GPU_mmul
+ (
+ tmp.ptr, stateVector->ptr, filter.ptr,
+ stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2
+ );
+ }
+ else
+ {
+ cpx_ncpx_mmul_mt
+ (
+ tmp.ptr, stateVector->ptr, filter.ptr,
+ stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2
+ );
+ }
+ #else
cpx_ncpx_mmul_mt
(
tmp.ptr, stateVector->ptr, filter.ptr,
stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2
);
+ #endif
free(stateVector->ptr);
stateVector->ptr = tmp.ptr;
free(filter.ptr);
free(lineIDs);
}
-#ifdef GPU_ENABLED
-uint8_t GPU_init()
-{
- cl_uint count;
- cl_int err;
-
- err = clGetPlatformIDs(1, &GPU_platform_id, &count);
- if (err != CL_SUCCESS || count == 0)
- {
- if (err == 0)
- fprintf(stderr, "GPU disabled: No supported platforms found.\n");
- else
- fprintf(stderr, "GPU disabled: clGetPlatformIDs() failed.\n");
- return 0;
- }
-
- err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count);
- if (err != CL_SUCCESS || count == 0)
- {
- if (count == 0)
- fprintf(stderr, "GPU disabled: No supported GPUs found.\n");
- else
- fprintf(stderr, "GPU disabled: clGetDeviceIDs() failed.\n");
- return 0;
- }
-
- GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU disabled: clCreateContext() failed.\n");
- return 0;
- }
-
- return 1;
-}
-#endif
-
void main(int argc, char** argv)
{
#ifdef GPU_ENABLED
if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r");
process(argc, argv);
fclose(RANDOM_FILE);
+
+ #ifdef GPU_ENABLED
+ if (USE_GPU) GPU_clean();
+ #endif
}
\ No newline at end of file
--- /dev/null
+#include "gpu_mmul.cl.c"
+cl_platform_id GPU_platform_id;
+cl_device_id GPU_device_id;
+cl_context GPU_context;
+cl_command_queue GPU_command_queue;
+
+uint8_t GPU_init()
+{
+ cl_uint count;
+ cl_int err;
+
+ err = clGetPlatformIDs(1, &GPU_platform_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (err == 0)
+ fprintf(stderr, "GPU error: No supported platforms found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n");
+ return 0;
+ }
+
+ err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (count == 0)
+ fprintf(stderr, "GPU error: No supported GPUs found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n");
+ return 0;
+ }
+
+ GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateContext() failed.\n");
+ return 0;
+ }
+
+ GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n");
+ err = clReleaseContext(GPU_context);
+ if (err != CL_SUCCESS)
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+void GPU_clean()
+{
+ cl_int err;
+ err = clReleaseCommandQueue(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n");
+ }
+ err = clReleaseContext(GPU_context);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ }
+}
+
+void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+{
+ //Create buffers
+ size_t sizeA = rowsA * shared;
+ size_t sizeB = shared * colsB;
+ size_t sizeR = rowsA * colsB;
+ cl_int err;
+ cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeA, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeB, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(double) * sizeR, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ //Populate buffers
+ err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(double) * sizeA, ptrA, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+ err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(double) * sizeB, ptrB, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+ //Load and compile program
+ cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)(&gpu_mmul_cl), (const size_t*)(&gpu_mmul_cl_len), &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
+ exit(1);
+ }
+ err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+ exit(1);
+ }
+ //Setup kernel
+ cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ //Run the program
+ size_t work_size[] = {rowsA, colsB};
+ err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n");
+ exit(1);
+ }
+ //Wait for completion
+ err = clFlush(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clFlush() failed.\n");
+ exit(1);
+ }
+ err = clFinish(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clFinish() failed.\n");
+ exit(1);
+ }
+ //Read results
+ err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(double) * sizeR, ptrR, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+
+ //Clean up
+ err = clReleaseKernel(kernel);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n");
+ exit(1);
+ }
+ err = clReleaseProgram(program);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+}
\ No newline at end of file