From 06100a5aae09953c039866949f729281c7df3e95 Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Thu, 7 Mar 2024 18:40:46 -0500 Subject: [PATCH] Thu Mar 7 06:40:46 PM EST 2024 --- src/.kernel.tmp.1 | 8 ++++---- src/.kernel.tmp.2 | Bin 4428 -> 4428 bytes src/kernel.cl | 8 ++++---- src/kernel_cpu.cl | 8 ++++---- src/kernel_gpu.cl | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/.kernel.tmp.1 b/src/.kernel.tmp.1 index 1da9581..04c0834 100644 --- a/src/.kernel.tmp.1 +++ b/src/.kernel.tmp.1 @@ -135,12 +135,12 @@ __kernel void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/.kernel.tmp.2 b/src/.kernel.tmp.2 index 1e791cc72a16245bb1931b4a70dfb487507ea110..32bfc81c314627764b9396871127e92642f20c4b 100644 GIT binary patch delta 35 kcmX@3bVg~zBL2w_c=t|@7f=DxGLz>ESVL*{$$tg>0Tr(fjsO4v delta 47 pcmX@3bVg~zB7R20$q#wMz@*Y-8G&>#OM3Er0c$vmeez!ce*l+k5R(7^ diff --git a/src/kernel.cl b/src/kernel.cl index d11e287..d980077 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -143,12 +143,12 @@ __kernel void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/kernel_cpu.cl b/src/kernel_cpu.cl index f8a12f2..7376958 100644 --- a/src/kernel_cpu.cl +++ b/src/kernel_cpu.cl @@ -139,12 +139,12 @@ void kernel_knk_2x2 ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; case 2: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 0) * 2) + 1] = outer + inner; break; case 3: - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; - ptrR[(rowR + 1) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2)] = first + lasts; + ptrR[(rowR + 0) * (colsR * 2) + ((colR + 1) * 2) + 1] = outer + inner; break; } diff --git a/src/kernel_gpu.cl b/src/kernel_gpu.cl index e5e8787..6345e20 100644 --- a/src/kernel_gpu.cl +++ b/src/kernel_gpu.cl @@ -332,14 +332,14 @@ unsigned char kernel_gpu[] = { 0x20, 0x63, 0x61, 0x73, 0x65, 0x20, 0x32, 0x3a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, - 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, + 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, - 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, + 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, @@ -351,14 +351,14 @@ unsigned char kernel_gpu[] = { 0x73, 0x65, 0x20, 0x33, 0x3a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, 0x6f, 0x77, 0x52, - 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, + 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x28, 0x72, - 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x28, + 0x6f, 0x77, 0x52, 0x20, 0x2b, 0x20, 0x30, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2b, 0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d, -- 2.39.5