From: miha-q <> Date: Mon, 4 Mar 2024 15:16:48 +0000 (-0500) Subject: Mon Mar 4 10:16:48 AM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=9324c46401ee8484159849b03461340c90c36e53;p=QAnsel.git Mon Mar 4 10:16:48 AM EST 2024 --- diff --git a/Makefile b/Makefile index ab6d379..c39e5ad 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,16 @@ all: #gpu mmul - mv src/gpu/kernel.cl src/gpu/.kernel.cl - bash -c 'echo -ne "$$(cat src/gpu/.kernel.cl)\x00" > src/gpu/kernel.cl' - xxd -i src/gpu/kernel.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/kernel.cl.c - mv src/gpu/.kernel.cl src/gpu/kernel.cl + mv src/kernel2.cl src/.kernel2.cl + bash -c 'echo -ne "$$(cat src/.kernel2.cl)\x00" > src/kernel2.cl' + xxd -i src/kernel2.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/kernel2.cl.c + mv src/.kernel2.cl src/kernel2.cl + + #gpu mmul + #mv src/gpu/kernel.cl src/gpu/.kernel.cl + #bash -c 'echo -ne "$$(cat src/gpu/.kernel.cl)\x00" > src/gpu/kernel.cl' + #xxd -i src/gpu/kernel.cl | sed -e 's/gpu_gpu_/gpu_/g' > src/gpu/kernel.cl.c + #mv src/gpu/.kernel.cl src/gpu/kernel.cl gcc src/QAnsel.c -g -o bin/QAnsel -lm -I/usr/include/SDL2 -D_REENTRANT -lSDL2 -lOpenCL -pthread rm -f src/gpu/*.cl.c diff --git a/src/QAnsel.c b/src/QAnsel.c index 79104d7..e8ca436 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -9,14 +9,14 @@ #define QUBITS_MAX 14 unsigned char HIDDEN_VARIABLE = 0; FILE* RANDOM_FILE; - +#define GPU_ENABLED //#define GPU_ENABLED unsigned char USE_GPU = 0; #ifdef GPU_ENABLED -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#define CL_TARGET_OPENCL_VERSION 120 -#include -#include "gpu/gpu.c" +//#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +//#define CL_TARGET_OPENCL_VERSION 120 +//#include +//#include "gpu/gpu.c" //#include "gpu/gpu_test.c" #endif @@ -225,11 +225,12 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr #ifdef GPU_ENABLED if (USE_GPU/* && (tmp.rows >= 512 || tmp.cols >= 512)*/) { - GPU_knk + cpx_mtx_knk_metal ( - tmp.ptr, tmp.rows, tmp.cols, - filter.ptr, filter.rows, filter.cols, - gate.ptr, gate.rows, gate.cols + tmp.ptr, filter.ptr, gate.ptr, + tmp.rows, tmp.cols, + filter.rows, filter.cols, + gate.rows, gate.cols ); //cpx_ncpx_knk_mt //( @@ -240,11 +241,12 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr } else { - cpx_ncpx_knk_mt + cpx_mtx_knk ( - tmp.ptr, tmp.rows, tmp.cols, - filter.ptr, filter.rows, filter.cols, - gate.ptr, gate.rows, gate.cols + tmp.ptr, filter.ptr, gate.ptr, + tmp.rows, tmp.cols, + filter.rows, filter.cols, + gate.rows, gate.cols ); } #else @@ -274,18 +276,18 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr #ifdef GPU_ENABLED if (USE_GPU && (filter.cols >= 512 || stateVector->cols >= 512)) { - GPU_mmul + cpx_mtx_dot_metal ( tmp.ptr, stateVector->ptr, filter.ptr, - stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2 + stateVector->rows, filter.cols, stateVector->cols ); } else { - cpx_ncpx_mmul_mt + cpx_mtx_dot ( tmp.ptr, stateVector->ptr, filter.ptr, - stateVector->rows * 2, filter.cols * 2, stateVector->cols * 2 + stateVector->rows, filter.cols, stateVector->cols ); } #else @@ -1414,15 +1416,11 @@ void process(int argc, char** argv) void main(int argc, char** argv) { - #ifdef GPU_ENABLED - USE_GPU = GPU_init(); - #endif + USE_GPU = cpx_mtx_begin(); RANDOM_FILE = fopen("/dev/TrueRNG0", "r"); if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r"); process(argc, argv); fclose(RANDOM_FILE); - #ifdef GPU_ENABLED - if (USE_GPU) GPU_clean(); - #endif + cpx_mtx_clean(); } \ No newline at end of file diff --git a/src/complex2.c b/src/complex2.c index 0f8fc2c..5309236 100644 --- a/src/complex2.c +++ b/src/complex2.c @@ -6,7 +6,7 @@ #include #include #include "cores.c" - +#define GPU_ENABLED typedef struct { float real, imaginary; @@ -163,11 +163,249 @@ void cpx_mtx_knk(float* ptrR, float* ptrA, float* ptrB, int rowsR, int colsR, in } } -uint8_t mtx_init() +#ifdef GPU_ENABLED +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_TARGET_OPENCL_VERSION 120 +#include +#define gpuerr(x) if (err != CL_SUCCESS) { fprintf(stderr, "GPU fatal error: " #x "() failed.\n"); exit(1); } +#include "kernel2.cl.c" +cl_platform_id cpx_mtx_platform_id; +cl_device_id cpx_mtx_device_id; +cl_context cpx_mtx_context; +cl_command_queue cpx_mtx_command_queue; +unsigned char* cpx_mtx_cache = NULL; +size_t cpx_mtx_cache_len = 0; +#endif + +uint8_t cpx_mtx_begin() +{ + #ifdef GPU_ENABLED + cl_uint count; + cl_int err; + + err = clGetPlatformIDs(1, &cpx_mtx_platform_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (err == 0) + fprintf(stderr, "GPU error: No supported platforms found.\n"); + else + fprintf(stderr, "GPU error: clGetPlatformIDs() failed.\n"); + return 0; + } + + err = clGetDeviceIDs(cpx_mtx_platform_id, CL_DEVICE_TYPE_GPU, 1, &cpx_mtx_device_id, &count); + if (err != CL_SUCCESS || count == 0) + { + if (count == 0) + fprintf(stderr, "GPU error: No supported GPUs found.\n"); + else + fprintf(stderr, "GPU error: clGetDeviceIDs() failed.\n"); + return 0; + } + + cpx_mtx_context = clCreateContext(NULL, 1, &cpx_mtx_device_id, NULL, NULL, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateContext() failed.\n"); + return 0; + } + + cpx_mtx_command_queue = clCreateCommandQueue(cpx_mtx_context, cpx_mtx_device_id, 0, &err); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clCreateCommandQueue() failed.\n"); + err = clReleaseContext(cpx_mtx_context); + if (err != CL_SUCCESS) + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + return 0; + } + #endif + return 1; +} + +void cpx_mtx_clean() { - return 1; + #ifdef GPU_ENABLED + cl_int err; + err = clReleaseCommandQueue(cpx_mtx_command_queue); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseCommandQueue() failed.\n"); + } + err = clReleaseContext(cpx_mtx_context); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU error: clReleaseContext() failed.\n"); + } + free(cpx_mtx_cache); + #endif +} + +#ifdef GPU_ENABLED +void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared) +{ + int colsA = shared; + int rowsB = shared; + int rowsR = rowsA; + int colsR = colsB; + + //Create buffers + size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float); + size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float); + size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float); + cl_int err; + cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); + gpuerr(clCreateBuffer); + cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); + gpuerr(clCreateBuffer); + cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); + gpuerr(clCreateBuffer); + + //Populate buffers + err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL); + gpuerr(clEnqueueWriteBuffer); + err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL); + gpuerr(clEnqueueWriteBuffer); + + //Load and compile program + cl_program program; + if (cpx_mtx_cache == NULL) + { + program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){src_kernel2_cl}, NULL, &err); + gpuerr(clCreateProgramWithSource); + err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n"); + size_t log_size; + clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char* log = malloc(log_size); + clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + printf("%s", log); + free(log); + exit(1); + } + err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL); + gpuerr(clGetProgramInfo); + cpx_mtx_cache = malloc(cpx_mtx_cache_len); + clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL); + gpuerr(clGetProgramInfo); + } + else + { + program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err); + gpuerr(clCreateProgramWithBinary); + } + + //Setup kernel + cl_kernel kernel = clCreateKernel(program, "kernel_dot", &err); gpuerr(clCreateKernel); + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 5, sizeof(int), &shared); gpuerr(clSetKernelArg); + + //Run the program + size_t work_size[] = {rowsA, colsB}; + err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); + gpuerr(clEnqueueNDRangeKernel); + + //Wait for completion + err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush); + err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish); + + //Read results + err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL); + gpuerr(clEnqueueReadBuffer); + + //Clean up + err = clReleaseKernel(kernel); gpuerr(clReleaseKernel); + err = clReleaseProgram(program); gpuerr(clReleaseProgram); + err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject); + err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject); + err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject); } -void mtx_clean() {} +void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsR, int colsR, int rowsA, int colsA, int rowsB, int colsB) +{ + //Create buffers + size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float); + size_t sizeA = ((colsA * 2) * rowsA) * sizeof(float); + size_t sizeB = ((colsB * 2) * rowsB) * sizeof(float); + cl_int err; + cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(clCreateBuffer); + cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(clCreateBuffer); + cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(clCreateBuffer); + + //Populate buffers + err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL); + gpuerr(clEnqueueWriteBuffer); + err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL); + gpuerr(clEnqueueWriteBuffer); + + //Load and compile program + cl_program program; + if (cpx_mtx_cache == NULL) + { + program = clCreateProgramWithSource(cpx_mtx_context, 1, (const char*[]){src_kernel2_cl}, NULL, &err); + gpuerr(clCreateProgramWithSource); + err = clBuildProgram(program, 1, &cpx_mtx_device_id, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + fprintf(stderr, "GPU fatal error: clBuildProgram() failed.\n"); + size_t log_size; + clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char* log = malloc(log_size); + clGetProgramBuildInfo(program, cpx_mtx_device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + printf("%s", log); + free(log); + exit(1); + } + err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &cpx_mtx_cache_len, NULL); + gpuerr(clGetProgramInfo); + cpx_mtx_cache = malloc(cpx_mtx_cache_len); + clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &cpx_mtx_cache, NULL); + gpuerr(clGetProgramInfo); + } + else + { + program = clCreateProgramWithBinary(cpx_mtx_context, 1, &cpx_mtx_device_id, &cpx_mtx_cache_len, (const unsigned char**)&cpx_mtx_cache, NULL, &err); + gpuerr(clCreateProgramWithBinary); + } + + //Setup kernel + cl_kernel kernel = clCreateKernel(program, "kernel_knk", &err); gpuerr(clCreateKernel); + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 3, sizeof(int), &rowsR); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 4, sizeof(int), &colsR); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 5, sizeof(int), &rowsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 6, sizeof(int), &colsA); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 7, sizeof(int), &rowsB); gpuerr(clSetKernelArg); + err = clSetKernelArg(kernel, 8, sizeof(int), &colsB); gpuerr(clSetKernelArg); + + //Run the program + size_t work_size[] = {rowsR, colsR}; + err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, NULL); + gpuerr(clEnqueueNDRangeKernel); + + //Wait for completion + err = clFlush(cpx_mtx_command_queue); gpuerr(clFlush); + err = clFinish(cpx_mtx_command_queue); gpuerr(clFinish); + + //Read results + err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL); + gpuerr(clEnqueueReadBuffer); + + //Clean up + err = clReleaseKernel(kernel); gpuerr(clReleaseKernel); + err = clReleaseProgram(program); gpuerr(clReleaseProgram); + err = clReleaseMemObject(memA); gpuerr(clReleaseMemObject); + err = clReleaseMemObject(memB); gpuerr(clReleaseMemObject); + err = clReleaseMemObject(memR); gpuerr(clReleaseMemObject); +} +#endif #endif diff --git a/src/kernel2.cl.c b/src/kernel2.cl.c new file mode 100644 index 0000000..239eaa3 --- /dev/null +++ b/src/kernel2.cl.c @@ -0,0 +1,167 @@ +unsigned char src_kernel2_cl[] = { + 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, + 0x64, 0x20, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x5f, 0x64, 0x6f, 0x74, + 0x0a, 0x28, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, + 0x74, 0x72, 0x52, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, + 0x20, 0x70, 0x74, 0x72, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, + 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, + 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, + 0x6f, 0x77, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, + 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x0a, + 0x29, 0x0a, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, + 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x72, 0x6f, 0x77, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x73, 0x68, 0x61, 0x72, + 0x65, 0x64, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52, 0x20, + 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x41, 0x3b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, + 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67, + 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, + 0x28, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, + 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x5f, 0x69, 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x2c, 0x20, 0x70, 0x6f, + 0x73, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, + 0x74, 0x20, 0x72, 0x52, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x52, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x52, 0x20, + 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, + 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x69, 0x6e, 0x74, + 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, + 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x20, + 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, + 0x6c, 0x73, 0x41, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, + 0x69, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42, + 0x20, 0x3d, 0x20, 0x69, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, + 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, + 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, + 0x72, 0x41, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, + 0x73, 0x41, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d, 0x20, + 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x2b, 0x20, + 0x31, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x70, + 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, + 0x20, 0x69, 0x42, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70, + 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x3b, 0x0a, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x28, 0x72, 0x41, + 0x20, 0x2b, 0x20, 0x69, 0x41, 0x29, 0x28, 0x72, 0x42, 0x20, 0x2b, 0x20, + 0x69, 0x42, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, + 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, + 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x41, 0x20, + 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x6e, 0x65, + 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, + 0x61, 0x74, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69, + 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x52, 0x20, 0x2b, 0x3d, 0x20, 0x66, + 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x52, + 0x20, 0x2b, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20, + 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, + 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, + 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, + 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x72, 0x52, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, + 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, + 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, + 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d, + 0x20, 0x69, 0x52, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x5f, 0x5f, 0x6b, 0x65, + 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x6b, 0x65, + 0x72, 0x6e, 0x65, 0x6c, 0x5f, 0x6b, 0x6e, 0x6b, 0x0a, 0x28, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, 0x52, 0x2c, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, 0x74, 0x72, + 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x70, + 0x74, 0x72, 0x42, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x52, + 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x2c, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, + 0x20, 0x72, 0x6f, 0x77, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, + 0x6c, 0x73, 0x41, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, + 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x0a, 0x29, 0x0a, + 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, + 0x77, 0x52, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, + 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x5f, 0x69, 0x64, 0x28, 0x31, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x41, 0x20, 0x3d, 0x20, + 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2f, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, + 0x6c, 0x41, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2f, 0x20, + 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, + 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, + 0x77, 0x52, 0x20, 0x25, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, 0x3b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x42, + 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x25, 0x20, 0x63, 0x6f, + 0x6c, 0x73, 0x42, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, + 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, + 0x41, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x2a, + 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x41, 0x20, + 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, + 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, + 0x42, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x20, 0x2a, + 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x42, 0x20, + 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, + 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x41, 0x20, 0x3d, 0x20, 0x70, 0x74, + 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x5d, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d, + 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x2b, + 0x20, 0x31, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, + 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x70, 0x74, 0x72, 0x42, + 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x42, 0x20, 0x3d, 0x20, 0x70, + 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31, + 0x5d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x28, 0x72, + 0x41, 0x20, 0x2b, 0x20, 0x69, 0x41, 0x29, 0x28, 0x72, 0x42, 0x20, 0x2b, + 0x20, 0x69, 0x42, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, + 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x3d, 0x20, 0x72, + 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, + 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x6e, + 0x65, 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, + 0x6c, 0x61, 0x73, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, + 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, + 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, + 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, + 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, + 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, + 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, + 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x63, + 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, + 0x5d, 0x20, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20, + 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x3b, 0x0a, 0x7d, 0x00 +}; +unsigned int src_kernel2_cl_len = 1965;