-__kernel void gpu_mmul(__global float* ptrR, __global float* ptrA, __global float* ptrB, const int N, const int W)
+__kernel void gpu_mmul
+(
+ __global float* ptrR,
+ __global float* ptrA,
+ __global float* ptrB,
+ const int rowsA,
+ const int colsB,
+ const int shared
+)
{
+ const int colsA = shared;
+ const int rowsB = shared;
int row = get_global_id(0);
int col = get_global_id(1);
float sum = 0;
for (int i = 0; i < N; i++)
{
- sum += ptrA[row * W + i] * ptrB[i * W + col];
+ sum += ptrA[row * colsA + i] * ptrB[i * colsB + col];
}
- ptrR[row * W + col] = sum;
+ ptrR[row * W + col] = sum
}
\ No newline at end of file