From 56b1ad68b916bf0724468568cf3c77ada2ec2f10 Mon Sep 17 00:00:00 2001 From: server Date: Wed, 22 Jan 2025 23:31:25 -0500 Subject: [PATCH] Wed Jan 22 11:31:25 PM EST 2025 --- src/.kernel.tmp.1 | 144 ---------------------------------------------- src/.kernel.tmp.2 | Bin 5187 -> 0 bytes 2 files changed, 144 deletions(-) delete mode 100644 src/.kernel.tmp.1 delete mode 100644 src/.kernel.tmp.2 diff --git a/src/.kernel.tmp.1 b/src/.kernel.tmp.1 deleted file mode 100644 index 2a8664f..0000000 --- a/src/.kernel.tmp.1 +++ /dev/null @@ -1,144 +0,0 @@ -__kernel void kernel_dot -( - __global float* ptrR, - __global float* ptrA, - __global float* ptrB, - const int rowsA, - const int colsA, - const int rowsB, - const int colsB -) -{ - const int rowsR = rowsA; - const int colsR = colsB; - const int rowR = get_global_id(0); //{gpu_only} - const int colR = get_global_id(1); //{gpu_only} - - float rR = 0; - float iR = 0; - - for (int i = 0; i < colsA; i++) - { - const float rA = ptrA[(size_t)rowR * ((size_t)colsA * (size_t)2) + ((size_t)i * (size_t)2)]; - const float iA = ptrA[(size_t)rowR * ((size_t)colsA * (size_t)2) + ((size_t)i * (size_t)2) + (size_t)1]; - const float rB = ptrB[(size_t)i * ((size_t)colsB * (size_t)2) + ((size_t)colR * (size_t)2)]; - const float iB = ptrB[(size_t)i * ((size_t)colsB * (size_t)2) + ((size_t)colR * (size_t)2) + (size_t)1]; - - //(rA + iA)(rB + iB) - const float first = rA * rB; - const float outer = rA * iB; - const float inner = iA * rB; - const float lasts = iA * iB; - - rR += first + lasts; - iR += outer + inner; - } - ptrR[(size_t)rowR * ((size_t)colsR * (size_t)2) + ((size_t)colR * (size_t)2)] = rR; - ptrR[(size_t)rowR * ((size_t)colsR * (size_t)2) + ((size_t)colR * (size_t)2) + (size_t)1] = iR; -} - -__kernel void kernel_knk -( - __global float* ptrR, - __global float* ptrA, - __global float* ptrB, - const int rowsA, - const int colsA, - const int rowsB, - const int colsB -) -{ - const int rowsR = rowsA * rowsB; - const int colsR = colsA * colsB; - const int rowR = get_global_id(0); //{gpu_only} - for (int colR = 0; colR < colsR; colR++) - { - const int rowA = rowR / rowsB; - const int colA = colR / colsB; - const int rowB = rowR % rowsB; - const int colB = colR % colsB; - - const int posA = rowA * (colsA * 2) + (colA * 2); - const int posB = rowB * (colsB * 2) + (colB * 2); - - const float rA = ptrA[posA]; - const float iA = ptrA[posA + 1]; - const float rB = ptrB[posB]; - const float iB = ptrB[posB + 1]; - - //(rA + iA)(rB + iB) - const float first = rA * rB; - const float outer = rA * iB; - const float inner = iA * rB; - const float lasts = iA * iB; - ptrR[rowR * (colsR * 2) + (colR * 2)] = first + lasts; - ptrR[rowR * (colsR * 2) + (colR * 2) + 1] = outer + inner; - } -} - -__kernel void kernel_knk_2x2 -( - __global float* ptrR, - __global float* ptrA, - const int rowsA, - const int colsA, - const float gate0, - const float gate1, - const float gate2, - const float gate3, - const float gate4, - const float gate5, - const float gate6, - const float gate7 -) -{ - const int rowsR = rowsA * 2; - const int colsR = colsA * 2; - const int rowR = get_global_id(0) * 2; //{gpu_only} - - for (int colR = 0; colR < colsR; colR += 2) - { - const int rowA = rowR / 2; - const int colA = colR / 2; - const float rA = ptrA[(size_t)rowA * ((size_t)colsA * (size_t)2) + ((size_t)colA * (size_t)2)]; - const float iA = ptrA[(size_t)rowA * ((size_t)colsA * (size_t)2) + ((size_t)colA * (size_t)2) + (size_t)1]; - - for (int i = 0; i < 4; i++) - { - float rB, iB; - switch (i) - { - case 0: rB = gate0; iB = gate1; break; - case 1: rB = gate2; iB = gate3; break; - case 2: rB = gate4; iB = gate5; break; - case 3: rB = gate6; iB = gate7; break; - } - - //(rA + iA)(rB + iB) - const float first = rA * rB; - const float outer = rA * iB; - const float inner = iA * rB; - const float lasts = iA * iB; - switch (i) - { - case 0: - ptrR[((size_t)rowR + (size_t)0) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)0) * (size_t)2)] = first + lasts; - ptrR[((size_t)rowR + (size_t)0) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)0) * (size_t)2) + (size_t)1] = outer + inner; - break; - case 1: - ptrR[((size_t)rowR + (size_t)0) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)1) * (size_t)2)] = first + lasts; - ptrR[((size_t)rowR + (size_t)0) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)1) * (size_t)2) + (size_t)1] = outer + inner; - break; - case 2: - ptrR[((size_t)rowR + (size_t)1) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)0) * (size_t)2)] = first + lasts; - ptrR[((size_t)rowR + (size_t)1) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)0) * (size_t)2) + (size_t)1] = outer + inner; - break; - case 3: - ptrR[((size_t)rowR + (size_t)1) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)1) * (size_t)2)] = first + lasts; - ptrR[((size_t)rowR + (size_t)1) * ((size_t)colsR * (size_t)2) + (((size_t)colR + (size_t)1) * (size_t)2) + (size_t)1] = outer + inner; - break; - } - - } - } -} diff --git a/src/.kernel.tmp.2 b/src/.kernel.tmp.2 deleted file mode 100644 index fd607072464755b25e62b8061b835bfd592badce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5187 zcmds4OK+P%5bl{@v6n~?N3y|rNO5z3pWtg1A;%%k$_OGLP20%-y)&~97T6_;Ory43 zZ05D|{B}sweO47&&c2uYo{9hIec5=v$KXlRU0&X8a`upyo92`~HP!me#fhDqL~yoc zQ8$bi4XetRT8g*CZJ7^=C^=9^$;1o1S8}DbfWm8bBh0UQOjJT9DfgM6iM(Cb$cR&Z z?=ONCo6lc&PtR#tO%0Yv2rX%kp;u=&Ot>>+B-c6vZ3|4{ z2E>9V38okmL<(IqTPJoaGhQ}~b1ke-X}41;%mAEm@VwN9iTh&zA7CQ30RpPXWec1* zM+id-B51DqLhr}O2ZdrZfrxi#c`dPQE#6#kChrGC8ai=qy4=nRih)rSBinOwelQ5q z8csM_I~lcxVr1<^S<}PNChMuJrE@q|KFGuSDHCu?F5!#FUe{mHl#V1y6aQ#U@nTG% z9pKRc7nEs$CNF2~ZD2*GT-G!&lIOyj&d5IlXjlY%2s@|?WZ4jdbn?Z^Ep&hb@!q2j zkP@N^D=v;;OB(%%j=-0;IU&5^5=AX)?qFhN*ln6@VJC-na%3kj?c{Si`NB@Vw3Dyi z+Vil6eW=2KO*|V2P!|mMJ+jr^`KaCB1!#C5M)nnLh-uOzY|FNI1RfgC1|lPi9wCvr z6%oO@=t+c9!cpKFehno`6q0t#Ii2g)06CppW9L0L_Fc@;t41AslZi;qV25gP7CH3` zZ?=!HLbt0GlDq0i*>38LEv{j|f@!5efD_gKaK-MbY_sp$)@q@l78z>Gaka=$!}!!` z7vpM6L+#Q~yRxbQ#-N|-8O3N6GH)2?;@KB;E}wlt=K|Un94w(Ipq`HwwUYs5L@bU* zE5rV(KR*@$On&D>$+Dg*T^OLDt{%0K3q-kszn|rf5v1#4GTj<)`_D~mNJaT;65DfU z!o)_`A5X0C#%M{n?*^at2Kx_p?1b)2nAqhfC$`rjQ$WEniS4;_d}58uMSSHSyu+8@ D#Kf67 -- 2.39.5