cl_device_id GPU_device_id;
cl_context GPU_context;
cl_command_queue GPU_command_queue;
+unsigned char* GPU_mmul_cache = NULL;
+size_t GPU_mmul_cache_len = 0;
uint8_t GPU_init()
{
fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
exit(1);
}
- //Load and compile program
- //printf("------------------------------\n%s\n------------------------------\n", src_gpu_mmul_cl);
- char* tmp = malloc(src_gpu_mmul_cl_len);
- memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len);
- const char* ptr = (const char*)src_gpu_mmul_cl;
- cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err);
- if (err != CL_SUCCESS)
- {
+
+ cl_program program;
+ if (GPU_mmul_cache == NULL)
+ {
+ //Load and compile program
+ char* tmp = malloc(src_gpu_mmul_cl_len);
+ memcpy(tmp, src_gpu_mmul_cl, src_gpu_mmul_cl_len);
+ const char* ptr = (const char*)src_gpu_mmul_cl;
+ cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)&tmp, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ free(tmp);
+ fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
+ exit(1);
+ }
+ err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
free(tmp);
- fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
- exit(1);
- }
- err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
- free(tmp);
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
- size_t log_size;
- clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
- char* log = malloc(log_size);
- clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
- printf("%s", log);
- free(log);
- exit(1);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+ size_t log_size;
+ clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+ char* log = malloc(log_size);
+ clGetProgramBuildInfo(program, GPU_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+ printf("%s", log);
+ free(log);
+ exit(1);
+ }
+ clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &GPU_mmul_cache_len, NULL);
+ GPU_mmul_cache = malloc(GPU_mmul_cache_len);
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &GPU_mmul_cache, NULL);
+ }
+ else
+ {
+ program = clCreateProgramWithBinary(GPU_context, 1, &GPU_device_id, &GPU_mmul_cache_len, (const unsigned char**)&GPU_mmul_cache, NULL, &err);
}
+
//Setup kernel
cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err);
if (err != CL_SUCCESS)