us2 = get_time();
printf("\tMetal2x2_R: %lu\n", us2 - us1);
- if (filter.rows / 128 == 0)
+ if (filter.rows / 1024 == 0)
{
printf("\tMetal2x2_Rx4: Invalid\n");
}
{
int rowsR = rowsA * rowsB;
int colsR = colsA * colsB;
- for (int i = 0; i < rowsR / (2 * 128); i++)
+ for (int i = 0; i < rowsR / (2 * 1024); i++)
{
kernel_knk_2x2_Rx4(ptrR, ptrA, rowsA, colsA, ptrB[0], ptrB[1], ptrB[2], ptrB[3], ptrB[4], ptrB[5], ptrB[6], ptrB[7], i);
}
{
const int rowsR = rowsA * 2;
const int colsR = colsA * 2;
- const int block = get_global_id(0) * 2 * 128; //{gpu_only}
- const int block = get_global_id_0 * 2 * 128; //{cpu_only}
+ const int block = get_global_id(0) * 2 * 1024; //{gpu_only}
+ const int block = get_global_id_0 * 2 * 1024; //{cpu_only}
- for (int rowR = block; rowR < block + 2 * 128; rowR += 2)
+ for (int rowR = block; rowR < block + 2 * 1024; rowR += 2)
{
for (int colR = 0; colR < colsR; colR += 2)
{