/*-----------------------------------------------------------------------------------*/
/*METAL*/
/*-----------------------------------------------------------------------------------*/
+
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
-#define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); }
+#define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU Error on line %i: %s.\n", __LINE__, clGetErrorString(x)); exit(1); }
cl_platform_id cpx_mtx_platform_id;
cl_device_id cpx_mtx_device_id;
cl_context cpx_mtx_context;
unsigned char* cpx_mtx_cache = NULL;
size_t cpx_mtx_cache_len = 0;
+#include <CL/cl.h>
+#include <stdio.h>
+
+const char* clGetErrorString(cl_int err)
+{
+ switch (err)
+ {
+ case CL_SUCCESS: return "Success";
+ case CL_DEVICE_NOT_FOUND: return "Device not found";
+ case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
+ case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
+ case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
+ case CL_OUT_OF_RESOURCES: return "Out of resources";
+ case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
+ case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
+ case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
+ case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
+ case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
+ case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
+ case CL_MAP_FAILURE: return "Map failure";
+ case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "Misaligned sub buffer offset";
+ case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "Execution status error for events in wait list";
+ case CL_COMPILE_PROGRAM_FAILURE: return "Compile program failure";
+ case CL_LINKER_NOT_AVAILABLE: return "Linker not available";
+ case CL_LINK_PROGRAM_FAILURE: return "Link program failure";
+ case CL_DEVICE_PARTITION_FAILED: return "Device partition failed";
+ case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "Kernel argument information not available";
+ case CL_INVALID_VALUE: return "Invalid value";
+ case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
+ case CL_INVALID_PLATFORM: return "Invalid platform";
+ case CL_INVALID_DEVICE: return "Invalid device";
+ case CL_INVALID_CONTEXT: return "Invalid context";
+ case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
+ case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
+ case CL_INVALID_HOST_PTR: return "Invalid host pointer";
+ case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
+ case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
+ case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
+ case CL_INVALID_SAMPLER: return "Invalid sampler";
+ case CL_INVALID_BINARY: return "Invalid binary";
+ case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
+ case CL_INVALID_PROGRAM: return "Invalid program";
+ case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
+ case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
+ case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
+ case CL_INVALID_KERNEL: return "Invalid kernel";
+ case CL_INVALID_ARG_INDEX: return "Invalid argument index";
+ case CL_INVALID_ARG_VALUE: return "Invalid argument value";
+ case CL_INVALID_ARG_SIZE: return "Invalid argument size";
+ case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
+ case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
+ case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
+ case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
+ case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
+ case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
+ case CL_INVALID_EVENT: return "Invalid event";
+ case CL_INVALID_OPERATION: return "Invalid operation";
+ case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
+ case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
+ case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
+ case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid global work size";
+ case CL_INVALID_PROPERTY: return "Invalid property";
+ case CL_INVALID_IMAGE_DESCRIPTOR: return "Invalid image descriptor";
+ case CL_INVALID_COMPILER_OPTIONS: return "Invalid compiler options";
+ case CL_INVALID_LINKER_OPTIONS: return "Invalid linker options";
+ case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid device partition count";
+ case CL_INVALID_PIPE_SIZE: return "Invalid pipe size";
+ case CL_INVALID_DEVICE_QUEUE: return "Invalid device queue";
+ case CL_INVALID_SPEC_ID: return "Invalid spec ID";
+ case CL_MAX_SIZE_RESTRICTION_EXCEEDED: return "Max size restriction exceeded";
+ default: return "Unknown error code";
+ }
+}
+
+
uint8_t cpx_mtx_begin()
{
cl_uint count;
size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float);
size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float);
cl_int err;
- cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err);
- gpuerr(clCreateBuffer);
- cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err);
- gpuerr(clCreateBuffer);
- cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err);
- gpuerr(clCreateBuffer);
+ cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
+ cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(err);
+ cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(err);
//Populate buffers
- err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
- gpuerr(clEnqueueWriteBuffer);
- err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL);
- gpuerr(clEnqueueWriteBuffer);
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL); gpuerr(err);
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL); gpuerr(err);
//Load and compile program
cl_program program;
if (cpx_mtx_cache == NULL)
{
- program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){kernel_gpu}, NULL, &err);
- gpuerr(clCreateProgramWithSource);
+ program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){kernel_gpu}, NULL, &err); gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
free(log);
exit(1);
}
- err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL);
- gpuerr(clGetProgramInfo);
+ err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL); gpuerr(err);
cpx_mtx_cache = malloc(cpx_mtx_cache_len);
- clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL);
- gpuerr(clGetProgramInfo);
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL); gpuerr(err);
}
else
{
program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err);
- gpuerr(clCreateProgramWithBinary);
+ gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
}
}
- //Setup kernel
- cl_kernel kernel = clCreateKernel(program, "kernel_dot", &err);
- //printf("%i\n", err);
- //printf("\t%s: %i\n", "CL_SUCCESS", CL_SUCCESS);
- //printf("\t%s: %i\n", "CL_INVALID_PROGRAM", CL_INVALID_PROGRAM);
- //printf("\t%s: %i\n", "CL_INVALID_PROGRAM_EXECUTABLE", CL_INVALID_PROGRAM_EXECUTABLE);
- //printf("\t%s: %i\n", "CL_INVALID_KERNEL_NAME", CL_INVALID_KERNEL_NAME);
- //printf("\t%s: %i\n", "CL_INVALID_KERNEL_DEFINITION", CL_INVALID_KERNEL_DEFINITION);
- //printf("\t%s: %i\n", "CL_INVALID_VALUE", CL_INVALID_VALUE);
- //printf("\t%s: %i\n", "CL_OUT_OF_RESOURCES", CL_OUT_OF_RESOURCES);
- //printf("\t%s: %i\n", "CL_OUT_OF_HOST_MEMORY", CL_OUT_OF_HOST_MEMORY);
- gpuerr(clCreateKernel);
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg);
+ cl_kernel kernel = clCreateKernel(program, "kernel_dot", &err); gpuerr(err);
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(err);
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(err);
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(err);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(err);
+ err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(err);
//Run the program
err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL);
- gpuerr(clEnqueueNDRangeKernel);
+ gpuerr(err);
//Wait for completion
- err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush);
- err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish);
+ err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+ err = clFinish(cpx_mtx_command_queue); gpuerr(err);
//Read results
err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
- gpuerr(clEnqueueReadBuffer);
+ gpuerr(err);
//Clean up
- err = clReleaseKernel(kernel); gpuerr(clReleaseKernel);
- err = clReleaseProgram(program); gpuerr(clReleaseProgram);
- err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject);
- err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject);
- err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject);
+ err = clReleaseKernel(kernel); gpuerr(err);
+ err = clReleaseProgram(program); gpuerr(err);
+ err = clReleaseMemObject(memA); gpuerr(err);
+ err = clReleaseMemObject(memB); gpuerr(err);
+ err = clReleaseMemObject(memR); gpuerr(err);
}
void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float);
size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float);
cl_int err;
- cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(clCreateBuffer);
- cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(clCreateBuffer);
- cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(clCreateBuffer);
+ cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
+ cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(err);
+ cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(err);
//Populate buffers
err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
- gpuerr(clEnqueueWriteBuffer);
+ gpuerr(err);
err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL);
- gpuerr(clEnqueueWriteBuffer);
+ gpuerr(err);
//Load and compile program
cl_program program;
if (cpx_mtx_cache == NULL)
{
program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){kernel_gpu}, NULL, &err);
- gpuerr(clCreateProgramWithSource);
+ gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
exit(1);
}
err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL);
- gpuerr(clGetProgramInfo);
+ gpuerr(err);
cpx_mtx_cache = malloc(cpx_mtx_cache_len);
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL);
- gpuerr(clGetProgramInfo);
+ gpuerr(err);
}
else
{
program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err);
- gpuerr(clCreateProgramWithBinary);
+ gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
}
//Setup kernel
- cl_kernel kernel = clCreateKernel(program, "kernel_knk", &err); gpuerr(clCreateKernel);
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg);
+ cl_kernel kernel = clCreateKernel(program, "kernel_knk", &err); gpuerr(err);
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(err);
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(err);
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(err);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(err);
+ err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(err);
//Run the program
err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR}, NULL, 0, NULL, NULL);
- gpuerr(clEnqueueNDRangeKernel);
+ gpuerr(err);
//Wait for completion
- err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush);
- err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish);
+ err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+ err = clFinish(cpx_mtx_command_queue); gpuerr(err);
//Read results
err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
- gpuerr(clEnqueueReadBuffer);
+ gpuerr(err);
//Clean up
- err = clReleaseKernel(kernel); gpuerr(clReleaseKernel);
- err = clReleaseProgram(program); gpuerr(clReleaseProgram);
- err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject);
- err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject);
- err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject);
+ err = clReleaseKernel(kernel); gpuerr(err);
+ err = clReleaseProgram(program); gpuerr(err);
+ err = clReleaseMemObject(memA); gpuerr(err);
+ err = clReleaseMemObject(memB); gpuerr(err);
+ err = clReleaseMemObject(memR); gpuerr(err);
}
//This only works if ptrA is NxM where both N and X are divisible by two,
size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float);
size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float);
cl_int err;
- cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(clCreateBuffer);
- cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeR, NULL, &err); gpuerr(clCreateBuffer);
+ cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
+ cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeR, NULL, &err); gpuerr(err);
//Populate buffers
err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
- gpuerr(clEnqueueWriteBuffer);
+ gpuerr(err);
//Load and compile program
cl_program program;
if (cpx_mtx_cache == NULL)
{
program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){kernel_gpu}, NULL, &err);
- gpuerr(clCreateProgramWithSource);
+ gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
exit(1);
}
err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL);
- gpuerr(clGetProgramInfo);
+ gpuerr(err);
cpx_mtx_cache = malloc(cpx_mtx_cache_len);
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL);
- gpuerr(clGetProgramInfo);
+ gpuerr(err);
}
else
{
program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err);
- gpuerr(clCreateProgramWithBinary);
+ gpuerr(err);
err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
float gate5 = ptrB[5];
float gate6 = ptrB[6];
float gate7 = ptrB[7];
- cl_kernel kernel = clCreateKernel(program, "kernel_knk_2x2", &err); gpuerr(clCreateKernel);
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 2, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 3, sizeof(int), &colsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 4, sizeof(float), &gate0); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 5, sizeof(float), &gate1); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 6, sizeof(float), &gate2); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 7, sizeof(float), &gate3); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 8, sizeof(float), &gate4); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 9, sizeof(float), &gate5); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel,10, sizeof(float), &gate6); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel,11, sizeof(float), &gate7); gpuerr(clSetKernelArg);
+ cl_kernel kernel = clCreateKernel(program, "kernel_knk_2x2", &err); gpuerr(err);
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(err);
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(err);
+ err = clSetKernelArg(kernel, 2, sizeof(int), &rowsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &colsA); gpuerr(err);
+ err = clSetKernelArg(kernel, 4, sizeof(float), &gate0); gpuerr(err);
+ err = clSetKernelArg(kernel, 5, sizeof(float), &gate1); gpuerr(err);
+ err = clSetKernelArg(kernel, 6, sizeof(float), &gate2); gpuerr(err);
+ err = clSetKernelArg(kernel, 7, sizeof(float), &gate3); gpuerr(err);
+ err = clSetKernelArg(kernel, 8, sizeof(float), &gate4); gpuerr(err);
+ err = clSetKernelArg(kernel, 9, sizeof(float), &gate5); gpuerr(err);
+ err = clSetKernelArg(kernel,10, sizeof(float), &gate6); gpuerr(err);
+ err = clSetKernelArg(kernel,11, sizeof(float), &gate7); gpuerr(err);
size_t q = 2;
- size_t max_work_group_size;
- err = clGetDeviceInfo(cpx_mtx_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
- if (err != CL_SUCCESS) {
- printf("Error getting device info\n");
- exit(1);
- }
- printf(">%lu<\n", max_work_group_size);
- exit(1);
-
//Run the program
err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR / 2}, &q, 0, NULL, NULL);
-
- gpuerr(clEnqueueNDRangeKernel);
+ gpuerr(err);
//Wait for completion
- err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush);
- err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish);
+ err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+ err = clFinish(cpx_mtx_command_queue); gpuerr(err);
//Read results
err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
- gpuerr(clEnqueueReadBuffer);
+ gpuerr(err);
//Clean up
- err = clReleaseKernel(kernel); gpuerr(clReleaseKernel);
- err = clReleaseProgram(program); gpuerr(clReleaseProgram);
- err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject);
- err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject);
+ err = clReleaseKernel(kernel); gpuerr(err);
+ err = clReleaseProgram(program); gpuerr(err);
+ err = clReleaseMemObject(memA); gpuerr(err);
+ err = clReleaseMemObject(memR); gpuerr(err);
}