const int colsB
)
{
- const int rowR = get_global_id(0);
- const int colR = get_global_id(1);
+ int rowR = get_global_id(0);
+ int colR = get_global_id(1);
- const int rowA = rowR / rowsB;
- const int colA = colR / colsB;
- const int rowB = rowR % rowsB;
- const int colB = colR % colsB;
+ int rowA = rowR / rowsB;
+ int colA = colR / colsB;
+ int rowB = rowR % rowsB;
+ int colB = colR % colsB;
- const float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
- const float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
- const float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
- const float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+ float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+ float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+ float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+ float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
- ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = (r1 * r2) + (-(i1 * i2));
+ float first = r1 * r2; //real
+ float outer = r1 * i2; //imaginary
+ float inner = i1 * r2; //imaginary
+ float last = -(i1 * i2); //real
+ r1 = first + last;
+ i1 = outer + inner;
+
+ ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
+ ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
+ ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
+ ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
}
sum += ptrA[posA] * ptrB[posB];
}
ptrR[rowR * colsR + colR] = sum;
-}
\ No newline at end of file
+}
+
+__kernel void gpu_knk
+(
+ __global float* ptrR,
+ const int rowsR,
+ const int colsR,
+ __global float* ptrA,
+ const int rowsA,
+ const int colsA,
+ __global float* ptrB,
+ const int rowsB,
+ const int colsB
+)
+{
+ int rowR = get_global_id(0);
+ int colR = get_global_id(1);
+
+ int rowA = rowR / rowsB;
+ int colA = colR / colsB;
+ int rowB = rowR % rowsB;
+ int colB = colR % colsB;
+
+ float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+ float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+ float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+ float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+
+ float first = r1 * r2; //real
+ float outer = r1 * i2; //imaginary
+ float inner = i1 * r2; //imaginary
+ float last = -(i1 * i2); //real
+ r1 = first + last;
+ i1 = outer + inner;
+
+ ptrR[(colR * 2) + (rowR * 2) * (colsR * 2)] = r1;
+ ptrR[((colR * 2) + 1) + (rowR * 2) * (colsR * 2)] = -i1;
+ ptrR[(colR * 2) + ((rowR * 2) + 1) * (colsR * 2)] = i1;
+ ptrR[((colR * 2) + 1) + ((rowR * 2) + 1) * (colsR * 2)] = r1;
+}
+