#include "cores.c"
#include "kernel_cpu.cl"
#include "kernel_gpu.cl"
+#define OPTIMAL_WGS_KNK 2
typedef struct
{
float real, imaginary;
err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(err);
err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(err);
- //Run the program
- err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL);
+ size_t wgs[2] = {1, 1};
+ err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, wgs, 0, NULL, NULL);
gpuerr(err);
//Wait for completion
err = clSetKernelArg(kernel,10, sizeof(float), &gate6); gpuerr(err);
err = clSetKernelArg(kernel,11, sizeof(float), &gate7); gpuerr(err);
-
//Run the program
- size_t wgs = 2;
- err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR / 2}, &wgs, 0, NULL, NULL);
+ size_t wgs[2] = {OPTIMAL_WGS_KNK};
+ err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR / 2}, wgs, 0, NULL, NULL);
gpuerr(err);
//Wait for completion