From: miha-q <> Date: Thu, 7 Mar 2024 23:40:46 +0000 (-0500) Subject: Thu Mar 7 06:40:46 PM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=06100a5aae09953c039866949f729281c7df3e95;p=QAnsel.git Thu Mar 7 06:40:46 PM EST 2024 --- diff --git a/src/.kernel.tmp.1 b/src/.kernel.tmp.1 index 1da9581..04c0834 100644 --- a/src/.kernel.tmp.1 +++ b/src/.kernel.tmp.1 @@ -135,12 +135,12 @@ __kernel void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/.kernel.tmp.2 b/src/.kernel.tmp.2 index 1e791cc..32bfc81 100644 Binary files a/src/.kernel.tmp.2 and b/src/.kernel.tmp.2 differ diff --git a/src/kernel.cl b/src/kernel.cl index d11e287..d980077 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -143,12 +143,12 @@ __kernel void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/kernel_cpu.cl b/src/kernel_cpu.cl index f8a12f2..7376958 100644 --- a/src/kernel_cpu.cl +++ b/src/kernel_cpu.cl @@ -139,12 +139,12 @@ void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/kernel_gpu.cl b/src/kernel_gpu.cl index e5e8787..6345e20 100644 --- a/src/kernel_gpu.cl +++ b/src/kernel_gpu.cl @@ -332,14 +332,14 @@ unsigned char kernel_gpu[] = { 0x20, 0x63, 0x61, 0x73, 0x65, 0x20, 0x32, 0x3a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, - 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, + 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, - 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, + 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, @@ -351,14 +351,14 @@ unsigned char kernel_gpu[] = { 0x73, 0x65, 0x20, 0x33, 0x3a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, - 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, + 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, - 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, + 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d,