void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
{
+ #define GPU_DEBUG
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Create buffers
size_t sizeA = rowsA * shared;
size_t sizeB = shared * colsB;
size_t sizeR = rowsA * colsB;
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
cl_int err;
cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeA, NULL, &err);
if (err != CL_SUCCESS)
fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeB, NULL, &err);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(double) * sizeR, NULL, &err);
if (err != CL_SUCCESS)
{
exit(1);
}
//Populate buffers
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(double) * sizeA, ptrA, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(double) * sizeB, ptrB, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
exit(1);
}
//Load and compile program
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
cl_program program = clCreateProgramWithSource(GPU_context, 1, (const char**)(&gpu_mmul_cl), (const size_t*)(&gpu_mmul_cl_len), &err);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clCreateProgramWithSource() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clBuildProgram(program, 1, &GPU_device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Setup kernel
cl_kernel kernel = clCreateKernel(program, "gpu_mmul", &err);
if (err != CL_SUCCESS)
fprintf(stderr, "GPU fatal error: clCreateKernel() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clSetKernelArg(kernel, 4, sizeof(int), &colsB);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clSetKernelArg() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Run the program
size_t work_size[] = {rowsA, colsB};
err = clEnqueueNDRangeKernel(GPU_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
fprintf(stderr, "GPU fatal error: clEnqueueNDRangeKernel() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Wait for completion
err = clFlush(GPU_command_queue);
if (err != CL_SUCCESS)
fprintf(stderr, "GPU fatal error: clFlush() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clFinish(GPU_command_queue);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clFinish() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Read results
err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(double) * sizeR, ptrR, 0, NULL, NULL);
if (err != CL_SUCCESS)
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
//Clean up
err = clReleaseKernel(kernel);
if (err != CL_SUCCESS)
fprintf(stderr, "GPU fatal error: clReleaseKernel() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clReleaseProgram(program);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clReleaseProgram() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clReleaseMemObject(memA);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clReleaseMemObject(memB);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clReleaseMemObject(memR);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
err = clReleaseMemObject(memR);
if (err != CL_SUCCESS)
{
fprintf(stderr, "GPU fatal error: clReleaseMemObject() failed.\n");
exit(1);
}
+ #ifdef GPU_DEBUG
+ printf("Line %d.\n", __LINE__);
+ #endif
}
\ No newline at end of file