all:
- mv src/gpu_mmul.cl src/.gpu_mmul.cl
- bash -c 'echo -ne "$$(cat src/.gpu_mmul.cl)\x00" > src/gpu_mmul.cl'
- xxd -i src/gpu_mmul.cl > src/gpu_mmul.cl.c
- mv src/.gpu_mmul.cl src/gpu_mmul.cl
+ mv src/gpu/gpu_mmul.cl src/gpu/.gpu_mmul.cl
+ bash -c 'echo -ne "$$(cat src/gpu/.gpu_mmul.cl)\x00" > src/gpu/gpu_mmul.cl'
+ xxd -i src/gpu/gpu_mmul.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/gpu_mmul.cl.c
+ mv src/gpu/.gpu_mmul.cl src/gpu/gpu_mmul.cl
gcc src/QAnsel.c -g -o bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread
- rm -f src/*.cl.c
\ No newline at end of file
+ rm -f src/*.cl.c
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
-#include "gpu.c"
+#include "gpu/gpu.c"
+
#endif
typedef struct
+++ /dev/null
-#include "gpu_mmul.cl.c"
-cl_platform_id GPU_platform_id;
-cl_device_id GPU_device_id;
-cl_context GPU_context;
-cl_command_queue GPU_command_queue;
-
-uint8_t GPU_init()
-{
- cl_uint count;
- cl_int err;
-
- err = clGetPlatformIDs(1, &GPU_platform_id, &count);
- if (err != CL_SUCCESS || count == 0)
- {
- if (err == 0)
- fprintf(stderr, "GPU error: No supported platforms found.\n");
- else
- fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n");
- return 0;
- }
-
- err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count);
- if (err != CL_SUCCESS || count == 0)
- {
- if (count == 0)
- fprintf(stderr, "GPU error: No supported GPUs found.\n");
- else
- fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n");
- return 0;
- }
-
- GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU error: clCreateContext() failed.\n");
- return 0;
- }
-
- GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n");
- err = clReleaseContext(GPU_context);
- if (err != CL_SUCCESS)
- fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
- return 0;
- }
-
- return 1;
-}
-
-void GPU_clean()
-{
- cl_int err;
- err = clReleaseCommandQueue(GPU_command_queue);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n");
- }
- err = clReleaseContext(GPU_context);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
- }
-}
-
-void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
-{
- //Create buffers
- size_t sizeA = rowsA * shared;
- size_t sizeB = shared * colsB;
- size_t sizeR = rowsA * colsB;
- cl_int err;
- cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
- exit(1);
- }
- cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
- exit(1);
- }
- cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
- exit(1);
- }
- //Populate buffers
- err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
- exit(1);
- }
- err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
- exit(1);
- }
- //Load and compile program
- printf("------------------------------\n%s\n------------------------------\n", src_gpu_mmul_cl);
- char* tmp = malloc(src_gpu_mmul_cl_len);
- memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len);
- const char* ptr = (const char*)src_gpu_mmul_cl;
- cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err);
- if (err != CL_SUCCESS)
- {
- free(tmp);
- fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
- exit(1);
- }
- err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
- free(tmp);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
- size_t log_size;
- clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
- char* log = malloc(log_size);
- clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
- printf("%s", log);
- free(log);
- exit(1);
- }
- //Setup kernel
- cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 4, sizeof(int), &colsB);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- err = clSetKernelArg(kernel, 5, sizeof(int), &shared);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
- exit(1);
- }
- //Run the program
- size_t work_size[] = {rowsA, colsB};
- err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n");
- exit(1);
- }
- //Wait for completion
- err = clFlush(GPU_command_queue);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clFlush() failed.\n");
- exit(1);
- }
- err = clFinish(GPU_command_queue);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clFinish() failed.\n");
- exit(1);
- }
- //Read results
- err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
- exit(1);
- }
- //Clean up
- err = clReleaseKernel(kernel);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n");
- exit(1);
- }
- err = clReleaseProgram(program);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n");
- exit(1);
- }
- err = clReleaseMemObject(memA);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
- exit(1);
- }
- err = clReleaseMemObject(memB);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
- exit(1);
- }
- err = clReleaseMemObject(memR);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
- exit(1);
- }
- err = clReleaseMemObject(memR);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
- exit(1);
- }
-}
\ No newline at end of file
--- /dev/null
+#include "gpu_mmul.cl.c"
+cl_platform_id GPU_platform_id;
+cl_device_id GPU_device_id;
+cl_context GPU_context;
+cl_command_queue GPU_command_queue;
+
+uint8_t GPU_init()
+{
+ cl_uint count;
+ cl_int err;
+
+ err = clGetPlatformIDs(1, &GPU_platform_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (err == 0)
+ fprintf(stderr, "GPU error: No supported platforms found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n");
+ return 0;
+ }
+
+ err = clGetDeviceIDs(GPU_platform_id, CL_DEVICE_TYPE_GPU, 1, &GPU_device_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (count == 0)
+ fprintf(stderr, "GPU error: No supported GPUs found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n");
+ return 0;
+ }
+
+ GPU_context = clCreateContext(NULL, 1, &GPU_device_id, NULL, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateContext() failed.\n");
+ return 0;
+ }
+
+ GPU_command_queue = clCreateCommandQueue(GPU_context, GPU_device_id, 0, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n");
+ err = clReleaseContext(GPU_context);
+ if (err != CL_SUCCESS)
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+void GPU_clean()
+{
+ cl_int err;
+ err = clReleaseCommandQueue(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n");
+ }
+ err = clReleaseContext(GPU_context);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ }
+}
+
+void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
+{
+ //Create buffers
+ size_t sizeA = rowsA * shared;
+ size_t sizeB = shared * colsB;
+ size_t sizeR = rowsA * colsB;
+ cl_int err;
+ cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
+ exit(1);
+ }
+ //Populate buffers
+ err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+ err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+ //Load and compile program
+ printf("------------------------------\n%s\n------------------------------\n", src_gpu_mmul_cl);
+ char* tmp = malloc(src_gpu_mmul_cl_len);
+ memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len);
+ const char* ptr = (const char*)src_gpu_mmul_cl;
+ cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ free(tmp);
+ fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
+ exit(1);
+ }
+ err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
+ free(tmp);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+ size_t log_size;
+ clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+ char* log = malloc(log_size);
+ clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+ printf("%s", log);
+ free(log);
+ exit(1);
+ }
+ //Setup kernel
+ cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ err = clSetKernelArg(kernel, 5, sizeof(int), &shared);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
+ exit(1);
+ }
+ //Run the program
+ size_t work_size[] = {rowsA, colsB};
+ err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n");
+ exit(1);
+ }
+ //Wait for completion
+ err = clFlush(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clFlush() failed.\n");
+ exit(1);
+ }
+ err = clFinish(GPU_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clFinish() failed.\n");
+ exit(1);
+ }
+ //Read results
+ err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
+ exit(1);
+ }
+ //Clean up
+ err = clReleaseKernel(kernel);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n");
+ exit(1);
+ }
+ err = clReleaseProgram(program);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memA);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memB);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+ err = clReleaseMemObject(memR);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
+ exit(1);
+ }
+}
\ No newline at end of file
--- /dev/null
+__kernel void gpu_mmul
+(
+ __global float* ptrR,
+ __global float* ptrA,
+ __global float* ptrB,
+ const int rowsA,
+ const int colsB,
+ const int shared
+)
+{
+ const int colsA = shared;
+ const int rowsB = shared;
+ const int rowsR = rowsA;
+ const int colsR = colsB;
+ const int rowR = get_global_id(0);
+ const int colR = get_global_id(1);
+ int posA, posB;
+ float sum = 0;
+
+ const int posR = colR + rowR * colsR;
+
+ for (int i = 0; i < shared; i++)
+ {
+ int posA = i + rowR * colsA;
+ int posB = colR + i * colsB;
+ sum += ptrA[posA] * ptrB[posB];
+ }
+ ptrR[rowR * colsR + colR] = sum;
+}
\ No newline at end of file
--- /dev/null
+unsigned char src_gpu_mmul_cl[] = {
+ 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69,
+ 0x64, 0x20, 0x67, 0x70, 0x75, 0x5f, 0x6d, 0x6d, 0x75, 0x6c, 0x0a, 0x28,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61,
+ 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72,
+ 0x52, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f,
+ 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70,
+ 0x74, 0x72, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67,
+ 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a,
+ 0x20, 0x70, 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63,
+ 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77,
+ 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+ 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x2c,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69,
+ 0x6e, 0x74, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x0a, 0x29, 0x0a,
+ 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x3d, 0x20,
+ 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f,
+ 0x77, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52, 0x20, 0x3d, 0x20,
+ 0x72, 0x6f, 0x77, 0x73, 0x41, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63,
+ 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c,
+ 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x3b, 0x0a,
+ 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e,
+ 0x74, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74,
+ 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30,
+ 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
+ 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x3d, 0x20,
+ 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69,
+ 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e,
+ 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x2c, 0x20, 0x70, 0x6f, 0x73, 0x42,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20,
+ 0x73, 0x75, 0x6d, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20,
+ 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20,
+ 0x70, 0x6f, 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20,
+ 0x2b, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c,
+ 0x73, 0x52, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72,
+ 0x20, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b,
+ 0x20, 0x69, 0x20, 0x3c, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b,
+ 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7b, 0x0a,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20,
+ 0x70, 0x6f, 0x73, 0x41, 0x20, 0x3d, 0x20, 0x69, 0x20, 0x2b, 0x20, 0x72,
+ 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x3b,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74,
+ 0x20, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52,
+ 0x20, 0x2b, 0x20, 0x69, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x75,
+ 0x6d, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f,
+ 0x73, 0x41, 0x5d, 0x20, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70,
+ 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+ 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77,
+ 0x52, 0x20, 0x2a, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2b, 0x20,
+ 0x63, 0x6f, 0x6c, 0x52, 0x5d, 0x20, 0x3d, 0x20, 0x73, 0x75, 0x6d, 0x3b,
+ 0x0a, 0x7d, 0x00
+};
+unsigned int src_gpu_mmul_cl_len = 651;
--- /dev/null
+//This is for testing GPU functions on the CPU
+#define __kernel
+#define __global
+#include "gpu/gpu_mmul.cl"
+int GPU_GLOBAL_ID_0, GPU_GLOBAL_ID_1, GPU_GLOBAL_ID_2;
+int get_global_int(int id)
+{
+ switch (id)
+ {
+ case 0: return GPU_GLOBAL_ID_0;
+ case 1: return GPU_GLOBAL_ID_1;
+ case 2: return GPU_GLOBAL_ID_2;
+ }
+}
+
+void GPU_mmul_test(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
+{
+ for (int i = 0; i < rowsA; i++)
+ {
+ for (int j = 0; j < colsB; j++)
+ {
+ GPU_GLOBAL_ID_0 = i;
+ GPU_GLOBAL_ID_1 = i;
+ gpu_mmul(ptrR, ptrA, ptrB, rowsA, colsB, shared);
+ }
+ }
+}
\ No newline at end of file
+++ /dev/null
-__kernel void gpu_mmul
-(
- __global float* ptrR,
- __global float* ptrA,
- __global float* ptrB,
- const int rowsA,
- const int colsB,
- const int shared
-)
-{
- const int colsA = shared;
- const int rowsB = shared;
- const int rowsR = rowsA;
- const int colsR = colsB;
- const int rowR = get_global_id(0);
- const int colR = get_global_id(1);
- int posA, posB;
- float sum = 0;
-
- const int posR = colR + rowR * colsR;
-
- for (int i = 0; i < shared; i++)
- {
- int posA = i + rowR * colsA;
- int posB = colR + i * colsB;
- sum += ptrA[posA] * ptrB[posB];
- }
- ptrR[rowR * colsR + colR] = sum;
-}
\ No newline at end of file