#include <pthread.h>
#include <string.h>
#include "cores.c"
-
+#define GPU_ENABLED
typedef struct
{
float real, imaginary;
}
}
-uint8_t mtx_init()
+#ifdef GPU_ENABLED
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+#define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); }
+#include "kernel2.cl.c"
+cl_platform_id cpx_mtx_platform_id;
+cl_device_id cpx_mtx_device_id;
+cl_context cpx_mtx_context;
+cl_command_queue cpx_mtx_command_queue;
+unsigned char* cpx_mtx_cache = NULL;
+size_t cpx_mtx_cache_len = 0;
+#endif
+
+uint8_t cpx_mtx_begin()
+{
+ #ifdef GPU_ENABLED
+ cl_uint count;
+ cl_int err;
+
+ err = clGetPlatformIDs(1, &cpx_mtx_platform_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (err == 0)
+ fprintf(stderr, "GPU error: No supported platforms found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n");
+ return 0;
+ }
+
+ err = clGetDeviceIDs(cpx_mtx_platform_id, CL_DEVICE_TYPE_GPU, 1, &cpx_mtx_device_id, &count);
+ if (err != CL_SUCCESS || count == 0)
+ {
+ if (count == 0)
+ fprintf(stderr, "GPU error: No supported GPUs found.\n");
+ else
+ fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n");
+ return 0;
+ }
+
+ cpx_mtx_context = clCreateContext(NULL, 1, &cpx_mtx_device_id, NULL, NULL, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateContext() failed.\n");
+ return 0;
+ }
+
+ cpx_mtx_command_queue = clCreateCommandQueue(cpx_mtx_context, cpx_mtx_device_id, 0, &err);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n");
+ err = clReleaseContext(cpx_mtx_context);
+ if (err != CL_SUCCESS)
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ return 0;
+ }
+ #endif
+ return 1;
+}
+
+void cpx_mtx_clean()
{
- return 1;
+ #ifdef GPU_ENABLED
+ cl_int err;
+ err = clReleaseCommandQueue(cpx_mtx_command_queue);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n");
+ }
+ err = clReleaseContext(cpx_mtx_context);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU error: clReleaseContext() failed.\n");
+ }
+ free(cpx_mtx_cache);
+ #endif
+}
+
+#ifdef GPU_ENABLED
+void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared)
+{
+ int colsA = shared;
+ int rowsB = shared;
+ int rowsR = rowsA;
+ int colsR = colsB;
+
+ //Create buffers
+ size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float);
+ size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float);
+ size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float);
+ cl_int err;
+ cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err);
+ gpuerr(clCreateBuffer);
+ cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err);
+ gpuerr(clCreateBuffer);
+ cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err);
+ gpuerr(clCreateBuffer);
+
+ //Populate buffers
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
+ gpuerr(clEnqueueWriteBuffer);
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL);
+ gpuerr(clEnqueueWriteBuffer);
+
+ //Load and compile program
+ cl_program program;
+ if (cpx_mtx_cache == NULL)
+ {
+ program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){src_kernel2_cl}, NULL, &err);
+ gpuerr(clCreateProgramWithSource);
+ err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+ size_t log_size;
+ clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+ char* log = malloc(log_size);
+ clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+ printf("%s", log);
+ free(log);
+ exit(1);
+ }
+ err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL);
+ gpuerr(clGetProgramInfo);
+ cpx_mtx_cache = malloc(cpx_mtx_cache_len);
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL);
+ gpuerr(clGetProgramInfo);
+ }
+ else
+ {
+ program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err);
+ gpuerr(clCreateProgramWithBinary);
+ }
+
+ //Setup kernel
+ cl_kernel kernel = clCreateKernel(program, "kernel_dot", &err); gpuerr(clCreateKernel);
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 5, sizeof(int), &shared); gpuerr(clSetKernelArg);
+
+ //Run the program
+ size_t work_size[] = {rowsA, colsB};
+ err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
+ gpuerr(clEnqueueNDRangeKernel);
+
+ //Wait for completion
+ err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush);
+ err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish);
+
+ //Read results
+ err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
+ gpuerr(clEnqueueReadBuffer);
+
+ //Clean up
+ err = clReleaseKernel(kernel); gpuerr(clReleaseKernel);
+ err = clReleaseProgram(program); gpuerr(clReleaseProgram);
+ err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject);
+ err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject);
+ err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject);
}
-void mtx_clean() {}
+void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsR, int colsR, int rowsA, int colsA, int rowsB, int colsB)
+{
+ //Create buffers
+ size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float);
+ size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float);
+ size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float);
+ cl_int err;
+ cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(clCreateBuffer);
+ cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(clCreateBuffer);
+ cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(clCreateBuffer);
+
+ //Populate buffers
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
+ gpuerr(clEnqueueWriteBuffer);
+ err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL);
+ gpuerr(clEnqueueWriteBuffer);
+
+ //Load and compile program
+ cl_program program;
+ if (cpx_mtx_cache == NULL)
+ {
+ program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){src_kernel2_cl}, NULL, &err);
+ gpuerr(clCreateProgramWithSource);
+ err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n");
+ size_t log_size;
+ clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+ char* log = malloc(log_size);
+ clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+ printf("%s", log);
+ free(log);
+ exit(1);
+ }
+ err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL);
+ gpuerr(clGetProgramInfo);
+ cpx_mtx_cache = malloc(cpx_mtx_cache_len);
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL);
+ gpuerr(clGetProgramInfo);
+ }
+ else
+ {
+ program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err);
+ gpuerr(clCreateProgramWithBinary);
+ }
+
+ //Setup kernel
+ cl_kernel kernel = clCreateKernel(program, "kernel_knk", &err); gpuerr(clCreateKernel);
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsR); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsR); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 5, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 6, sizeof(int), &colsA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 7, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 8, sizeof(int), &colsB); gpuerr(clSetKernelArg);
+
+ //Run the program
+ size_t work_size[] = {rowsR, colsR};
+ err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL);
+ gpuerr(clEnqueueNDRangeKernel);
+
+ //Wait for completion
+ err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush);
+ err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish);
+
+ //Read results
+ err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
+ gpuerr(clEnqueueReadBuffer);
+
+ //Clean up
+ err = clReleaseKernel(kernel); gpuerr(clReleaseKernel);
+ err = clReleaseProgram(program); gpuerr(clReleaseProgram);
+ err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject);
+ err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject);
+ err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject);
+}
+#endif
#endif
--- /dev/null
+unsigned char src_kernel2_cl[] = {
+ 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69,
+ 0x64, 0x20, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x5f, 0x64, 0x6f, 0x74,
+ 0x0a, 0x28, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f,
+ 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70,
+ 0x74, 0x72, 0x52, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67,
+ 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a,
+ 0x20, 0x70, 0x74, 0x72, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f,
+ 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+ 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20,
+ 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72,
+ 0x6f, 0x77, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+ 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73,
+ 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
+ 0x20, 0x69, 0x6e, 0x74, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x0a,
+ 0x29, 0x0a, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+ 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20,
+ 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20,
+ 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20,
+ 0x72, 0x6f, 0x77, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72,
+ 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+ 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52, 0x20,
+ 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x41, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+ 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63,
+ 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67,
+ 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64,
+ 0x28, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+ 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20,
+ 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c,
+ 0x5f, 0x69, 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x2c, 0x20, 0x70, 0x6f,
+ 0x73, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+ 0x74, 0x20, 0x72, 0x52, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20,
+ 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x52, 0x20, 0x3d,
+ 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+ 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x52, 0x20,
+ 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f,
+ 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28,
+ 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a,
+ 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x69, 0x6e, 0x74,
+ 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20,
+ 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x20,
+ 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f,
+ 0x6c, 0x73, 0x41, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28,
+ 0x69, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42,
+ 0x20, 0x3d, 0x20, 0x69, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73,
+ 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f,
+ 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20,
+ 0x72, 0x41, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f,
+ 0x73, 0x41, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d, 0x20,
+ 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x2b, 0x20,
+ 0x31, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x70,
+ 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
+ 0x20, 0x69, 0x42, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70,
+ 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x3b, 0x0a, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x28, 0x72, 0x41,
+ 0x20, 0x2b, 0x20, 0x69, 0x41, 0x29, 0x28, 0x72, 0x42, 0x20, 0x2b, 0x20,
+ 0x69, 0x42, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20,
+ 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
+ 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x41, 0x20,
+ 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x6e, 0x65,
+ 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f,
+ 0x61, 0x74, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69,
+ 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x52, 0x20, 0x2b, 0x3d, 0x20, 0x66,
+ 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x52,
+ 0x20, 0x2b, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20,
+ 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f,
+ 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20,
+ 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52,
+ 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x72, 0x52, 0x3b,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f,
+ 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20,
+ 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52,
+ 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d,
+ 0x20, 0x69, 0x52, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x5f, 0x5f, 0x6b, 0x65,
+ 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x6b, 0x65,
+ 0x72, 0x6e, 0x65, 0x6c, 0x5f, 0x6b, 0x6e, 0x6b, 0x0a, 0x28, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20,
+ 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x52, 0x2c,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61,
+ 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72,
+ 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f,
+ 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70,
+ 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+ 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52,
+ 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x2c, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74,
+ 0x20, 0x72, 0x6f, 0x77, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f,
+ 0x6c, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+ 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42,
+ 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+ 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x0a, 0x29, 0x0a,
+ 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f,
+ 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f,
+ 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20,
+ 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c,
+ 0x5f, 0x69, 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+ 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x41, 0x20, 0x3d, 0x20,
+ 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2f, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f,
+ 0x6c, 0x41, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2f, 0x20,
+ 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69,
+ 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f,
+ 0x77, 0x52, 0x20, 0x25, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, 0x3b, 0x0a,
+ 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x42,
+ 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x25, 0x20, 0x63, 0x6f,
+ 0x6c, 0x73, 0x42, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e,
+ 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77,
+ 0x41, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x2a,
+ 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x41, 0x20,
+ 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e,
+ 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77,
+ 0x42, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x20, 0x2a,
+ 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x42, 0x20,
+ 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
+ 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x41, 0x20, 0x3d, 0x20, 0x70, 0x74,
+ 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x5d, 0x3b, 0x0a, 0x20, 0x20,
+ 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d,
+ 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x2b,
+ 0x20, 0x31, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f,
+ 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x42,
+ 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x42, 0x20, 0x3d, 0x20, 0x70,
+ 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31,
+ 0x5d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x28, 0x72,
+ 0x41, 0x20, 0x2b, 0x20, 0x69, 0x41, 0x29, 0x28, 0x72, 0x42, 0x20, 0x2b,
+ 0x20, 0x69, 0x42, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f,
+ 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x3d, 0x20, 0x72,
+ 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20,
+ 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x6e,
+ 0x65, 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42,
+ 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20,
+ 0x6c, 0x61, 0x73, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a,
+ 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72,
+ 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f,
+ 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28,
+ 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d,
+ 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73,
+ 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52,
+ 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c,
+ 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63,
+ 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31,
+ 0x5d, 0x20, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20,
+ 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x3b, 0x0a, 0x7d, 0x00
+};
+unsigned int src_kernel2_cl_len = 1965;