From 85db3d46b5f120fd371c8926f70a979830be5723 Mon Sep 17 00:00:00 2001 From: miha-q <> Date: Tue, 5 Mar 2024 19:35:47 -0500 Subject: [PATCH] Tue Mar 5 07:35:47 PM EST 2024 --- examples/slow.txt | 27 ++++++----- src/QAnsel.c | 10 ++-- src/kernel.cl | 9 ++-- src/kernel.cl.c | 110 ++++++++++++++++++++++---------------------- src/kernel_cpu.cl.c | 9 ++-- 5 files changed, 86 insertions(+), 79 deletions(-) diff --git a/examples/slow.txt b/examples/slow.txt index 7d34b45..f4126d6 100644 --- a/examples/slow.txt +++ b/examples/slow.txt @@ -1,13 +1,18 @@ //designed to be slow qreg q[14]; -h q[0]; -h q[1]; -h q[2]; -h q[3]; -h q[4]; -h q[5]; -h q[6]; -h q[7]; -h q[8]; -h q[9]; -h q[10]; \ No newline at end of file +x q[0]; +x q[1]; +x q[2]; +x q[3]; +x q[4]; +x q[5]; +x q[6]; +x q[7]; +x q[8]; +x q[9]; +x q[10]; +x q[11]; +x q[12]; +x q[13]; + +born; \ No newline at end of file diff --git a/src/QAnsel.c b/src/QAnsel.c index 0eca8d2..833970a 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -12,7 +12,7 @@ FILE* RANDOM_FILE; #define GPU_ENABLED unsigned char USE_GPU = 1; unsigned char USE_THREADS = 1; -//#define SPEED_TEST +#define SPEED_TEST typedef struct { @@ -226,9 +226,9 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr us1 = get_time(); cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); us2 = get_time(); - printf("\tBare: %lu\n", us2 - us1); + printf("\tBare(1): %lu\n", us2 - us1); #else - if (USE_GPU && 0) //this one's slower for some reason + if (USE_GPU) //this one's slower for some reason { cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); } @@ -238,7 +238,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr } else { - cpx_mtx_knk_2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); + cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); } #endif @@ -1404,7 +1404,7 @@ void main(int argc, char** argv) USE_GPU = cpx_mtx_begin(); RANDOM_FILE = fopen("/dev/TrueRNG0", "r"); if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r"); - USE_GPU = 0; + USE_GPU = 1; USE_THREADS = 0; process(argc, argv); fclose(RANDOM_FILE); diff --git a/src/kernel.cl b/src/kernel.cl index 66c40db..37d178f 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -110,9 +110,9 @@ __kernel void kernel_knk_2 // carry out effectively a copy of // the dot product procedure. const int rowsI = rowsB; - const int colsI = rowsB; + const int colsI = rowsI; const int rowsJ = colsA; - const int colsJ = colsA; + const int colsJ = rowsJ; const int rowsX = rowsA * rowsI; const int colsX = colsA * colsI; const int rowsY = rowsJ * rowsB; @@ -146,8 +146,8 @@ __kernel void kernel_knk_2 const int rowB = rowY % rowsB; const int colB = colY % colsB; - const int posA = rowA * (colsA * 2) + (colsA * 2); - const int posB = rowB * (colsB * 2) + (colsB * 2); + const int posA = rowA * (colsA * 2) + (colA * 2); + const int posB = rowB * (colsB * 2) + (colB * 2); const float rA = rowI == colI ? ptrA[posA] : 0; const float iA = rowI == colI ? ptrA[posA + 1] : 0; @@ -165,4 +165,5 @@ __kernel void kernel_knk_2 } ptrR[rowR * (colsR * 2) + (colR * 2)] = rR; ptrR[rowR * (colsR * 2) + (colR * 2) + 1] = iR; + } diff --git a/src/kernel.cl.c b/src/kernel.cl.c index 0b2ee4e..bcc822b 100644 --- a/src/kernel.cl.c +++ b/src/kernel.cl.c @@ -275,12 +275,12 @@ unsigned char src_kernel_cl[] = { 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x49, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, - 0x6f, 0x6c, 0x73, 0x49, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x42, + 0x6f, 0x6c, 0x73, 0x49, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x49, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x4a, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6f, 0x6c, - 0x73, 0x4a, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x3b, 0x0a, + 0x73, 0x4a, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x4a, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x58, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x49, 0x3b, @@ -368,60 +368,60 @@ unsigned char src_kernel_cl[] = { 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x41, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, - 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x41, 0x20, 0x2a, 0x20, 0x32, 0x29, - 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, - 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42, - 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x42, 0x20, 0x2a, 0x20, 0x28, 0x63, - 0x6f, 0x6c, 0x73, 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, - 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, - 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, - 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x41, - 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x49, 0x20, 0x3d, 0x3d, 0x20, 0x63, - 0x6f, 0x6c, 0x49, 0x20, 0x3f, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, - 0x6f, 0x73, 0x41, 0x5d, 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, - 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d, 0x20, 0x72, - 0x6f, 0x77, 0x49, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x49, 0x20, - 0x3f, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, - 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x41, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x6f, 0x73, 0x42, 0x20, + 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x42, 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6f, + 0x6c, 0x73, 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x42, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x3b, 0x0a, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x41, 0x20, 0x3d, + 0x20, 0x72, 0x6f, 0x77, 0x49, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, + 0x49, 0x20, 0x3f, 0x20, 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, + 0x41, 0x5d, 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, + 0x6f, 0x61, 0x74, 0x20, 0x69, 0x41, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, + 0x49, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x49, 0x20, 0x3f, 0x20, + 0x70, 0x74, 0x72, 0x41, 0x5b, 0x70, 0x6f, 0x73, 0x41, 0x20, 0x2b, 0x20, + 0x31, 0x5d, 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, + 0x6f, 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, + 0x4a, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x4a, 0x20, 0x3f, 0x20, + 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, 0x20, 0x3a, + 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, + 0x69, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x4a, 0x20, 0x3d, 0x3d, + 0x20, 0x63, 0x6f, 0x6c, 0x4a, 0x20, 0x3f, 0x20, 0x70, 0x74, 0x72, 0x42, + 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3a, + 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x2f, 0x2f, 0x28, 0x72, 0x41, 0x20, 0x2b, 0x20, 0x69, 0x41, 0x29, + 0x28, 0x72, 0x42, 0x20, 0x2b, 0x20, 0x69, 0x42, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, - 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72, 0x42, 0x20, 0x3d, 0x20, 0x72, - 0x6f, 0x77, 0x4a, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x4a, 0x20, - 0x3f, 0x20, 0x70, 0x74, 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x5d, - 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, - 0x74, 0x20, 0x69, 0x42, 0x20, 0x3d, 0x20, 0x72, 0x6f, 0x77, 0x4a, 0x20, - 0x3d, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x4a, 0x20, 0x3f, 0x20, 0x70, 0x74, - 0x72, 0x42, 0x5b, 0x70, 0x6f, 0x73, 0x42, 0x20, 0x2b, 0x20, 0x31, 0x5d, - 0x20, 0x3a, 0x20, 0x30, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x28, 0x72, 0x41, 0x20, 0x2b, 0x20, 0x69, - 0x41, 0x29, 0x28, 0x72, 0x42, 0x20, 0x2b, 0x20, 0x69, 0x42, 0x29, 0x0a, + 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, + 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6f, 0x75, 0x74, 0x65, 0x72, + 0x20, 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, - 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x66, 0x69, 0x72, 0x73, - 0x74, 0x20, 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, + 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x6e, 0x65, + 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6f, 0x75, 0x74, - 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, - 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, - 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x69, 0x6e, - 0x6e, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x72, - 0x42, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, - 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6c, - 0x61, 0x73, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, - 0x69, 0x42, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x72, 0x52, 0x20, 0x2b, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, - 0x20, 0x2b, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x52, 0x20, 0x2b, 0x3d, 0x20, - 0x6f, 0x75, 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20, 0x69, 0x6e, 0x6e, 0x65, - 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, - 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, - 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, - 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x72, 0x52, 0x3b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x70, 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, - 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, - 0x20, 0x2b, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, - 0x29, 0x20, 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d, 0x20, 0x69, 0x52, 0x3b, - 0x0a, 0x7d, 0x00 + 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6c, 0x61, 0x73, + 0x74, 0x73, 0x20, 0x3d, 0x20, 0x69, 0x41, 0x20, 0x2a, 0x20, 0x69, 0x42, + 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, + 0x52, 0x20, 0x2b, 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x2b, + 0x20, 0x6c, 0x61, 0x73, 0x74, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x52, 0x20, 0x2b, 0x3d, 0x20, 0x6f, 0x75, + 0x74, 0x65, 0x72, 0x20, 0x2b, 0x20, 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, + 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, + 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x5d, + 0x20, 0x3d, 0x20, 0x72, 0x52, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, + 0x74, 0x72, 0x52, 0x5b, 0x72, 0x6f, 0x77, 0x52, 0x20, 0x2a, 0x20, 0x28, + 0x63, 0x6f, 0x6c, 0x73, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, + 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x52, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, + 0x2b, 0x20, 0x31, 0x5d, 0x20, 0x3d, 0x20, 0x69, 0x52, 0x3b, 0x0a, 0x0a, + 0x7d, 0x00 }; -unsigned int src_kernel_cl_len = 5079; +unsigned int src_kernel_cl_len = 5078; diff --git a/src/kernel_cpu.cl.c b/src/kernel_cpu.cl.c index c304adc..908a689 100644 --- a/src/kernel_cpu.cl.c +++ b/src/kernel_cpu.cl.c @@ -104,9 +104,9 @@ void kernel_knk_2 // carry out effectively a copy of // the dot product procedure. const int rowsI = rowsB; - const int colsI = rowsB; + const int colsI = rowsI; const int rowsJ = colsA; - const int colsJ = colsA; + const int colsJ = rowsJ; const int rowsX = rowsA * rowsI; const int colsX = colsA * colsI; const int rowsY = rowsJ * rowsB; @@ -138,8 +138,8 @@ void kernel_knk_2 const int rowB = rowY % rowsB; const int colB = colY % colsB; - const int posA = rowA * (colsA * 2) + (colsA * 2); - const int posB = rowB * (colsB * 2) + (colsB * 2); + const int posA = rowA * (colsA * 2) + (colA * 2); + const int posB = rowB * (colsB * 2) + (colB * 2); const float rA = rowI == colI ? ptrA[posA] : 0; const float iA = rowI == colI ? ptrA[posA + 1] : 0; @@ -157,4 +157,5 @@ void kernel_knk_2 } ptrR[rowR * (colsR * 2) + (colR * 2)] = rR; ptrR[rowR * (colsR * 2) + (colR * 2) + 1] = iR; + } -- 2.39.5