#include "kernel.h" //{cpu_only}}
-__kernel void kernel_dot
+__kernel //{gpu_only}
+void kernel_dot
(
- __global float* ptrR,
- __global float* ptrA,
- __global float* ptrB,
+ __global //{gpu_only}
+ float* ptrR,
+ __global //{gpu_only}
+ float* ptrA,
+ __global //{gpu_only}
+ float* ptrB,
const int rowsA,
const int colsA,
const int rowsB,
const int colR = get_global_id(1); //{gpu_only}
const int rowR = get_global_id_0; //{cpu_only}
const int colR = get_global_id_1; //{cpu_only}
-
+
float rR = 0;
float iR = 0;
ptrR[(size_t)rowR * ((size_t)colsR * (size_t)2) + ((size_t)colR * (size_t)2) + (size_t)1] = iR;
}
-__kernel void kernel_knk
+__kernel //{gpu_only}
+void kernel_knk
(
- __global float* ptrR,
- __global float* ptrA,
- __global float* ptrB,
+ __global //{gpu_only}
+ float* ptrR,
+ __global //{gpu_only}
+ float* ptrA,
+ __global //{gpu_only}
+ float* ptrB,
const int rowsA,
const int colsA,
const int rowsB,
}
}
-__kernel void kernel_knk_2x2
+__kernel //{gpu_only}
+void kernel_knk_2x2
(
- __global float* ptrR,
- __global float* ptrA,
+ __global //{gpu_only}
+ float* ptrR,
+ __global //{gpu_only}
+ float* ptrA,
const int rowsA,
const int colsA,
const float gate0,