Sat Mar 2 10:16:09 PM EST 2024

author miha-q <>

Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)

committer miha-q <>

Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)
author miha-q <>
Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)
committer miha-q <>
Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)
diff --git a/src/QAnsel.c b/src/QAnsel.c

index 1d084c93dcc1f4ba66334bba1b5c5475b7f051c5..d0748da94741f0ad25cc1b307c113b8042c1cc46 100644 (file)
--- a/src/QAnsel.c
+++ b/src/QAnsel.c
@@ -23,20 +23,20 @@ typedef struct
  {
         char n[128];
         uint8_t q0, q1, q2;
-       double arg0, arg1, arg2;
+       float arg0, arg1, arg2;
  } QInstr;
  
-double qansel_rand_s(float s)
+float qansel_rand_s(float s)
  {
         uint32_t tmp;
         memcpy(&tmp, &s, sizeof(uint32_t));
         srand(tmp);
  }
-double qansel_rand_h()
+float qansel_rand_h()
  {
-       return ((double)rand()) / ((double)RAND_MAX);
+       return ((float)rand()) / ((float)RAND_MAX);
  }
-double qansel_rand_t()
+float qansel_rand_t()
  {
         if (RANDOM_FILE)
         {
@@ -45,7 +45,7 @@ double qansel_rand_t()
                 {
                         num = (num << 8) | fgetc(RANDOM_FILE);
                 }
-               return ((double)num) / ((double)UINT32_MAX);
+               return ((float)num) / ((float)UINT32_MAX);
         }
         else
         {
@@ -55,7 +55,7 @@ double qansel_rand_t()
  }
  
  
-double qansel_rand()
+float qansel_rand()
  {
         return HIDDEN_VARIABLE ? qansel_rand_h() : qansel_rand_t();
  }
@@ -149,7 +149,7 @@ void qansel_toffoli(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t bitA, ui
         stateVector->cols = ret.cols;
  }
  
-double* qansel_unitary(double theta, double phi, double lambda)
+float* qansel_unitary(float theta, float phi, float lambda)
  {
         cpx_mtx_t m;
         cpx_t a, b, c, d;
@@ -175,7 +175,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
         cpx_mtx_t gate;
         gate.rows = 2;
         gate.cols = 2;
-       double* gate_ptr;
+       float* gate_ptr;
         switch (instr->n[0])
         {
                 case 'h': gate_ptr = Hadamard; break;
@@ -196,11 +196,11 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
         uint8_t qubit = qubitCount - (instr->q0) - 1;
         if (qubit == 0)
         {
-               memcpy(filter.ptr, gate_ptr, 16 * sizeof(double));
+               memcpy(filter.ptr, gate_ptr, 16 * sizeof(float));
         }
         else
         {
-               memcpy(filter.ptr, Identity, 16 * sizeof(double));
+               memcpy(filter.ptr, Identity, 16 * sizeof(float));
         }
  
         for (uint8_t i = 1; i < qubitCount; i++)
@@ -216,7 +216,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
  
                 tmp.rows = filter.rows * gate.rows;
                 tmp.cols = filter.cols * gate.cols;
-               tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(double));
+               tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(float));
                 cpx_ncpx_knk_mt
                 (
                         tmp.ptr, tmp.rows, tmp.cols,
@@ -265,7 +265,7 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
  {
         uint32_t qubitCountPow2 = (uint32_t)pow(2, qubitCount);
         cpx_t n;
-       double prob0 = 0;
+       float prob0 = 0;
         for (uint32_t i = 0; i < qubitCountPow2; i++)
         {
                 uint8_t bit = (i >> qubit) & 1;
@@ -273,9 +273,9 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
                 if (bit == 0) prob0 += cpx_magsqr(&n);
         }
  
-       double r = qansel_rand();
+       float r = qansel_rand();
         uint8_t newBit = r < prob0 ? 0 : 1;
-       double probTot = 0;
+       float probTot = 0;
         for (uint32_t i = 0; i < qubitCountPow2; i++)
         {
                 uint8_t bit = (i >> qubit) & 1;
@@ -292,7 +292,7 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
                 cpx_mtx_set(stateVector, 0, i, &n);
         }
         
-       double multiplier = sqrt(1 / probTot);
+       float multiplier = sqrt(1 / probTot);
         for (uint32_t i = 0; i < qubitCountPow2; i++)
         {
                 uint8_t bit = (i >> qubit) & 1;
@@ -400,7 +400,7 @@ void qansel_run(uint8_t qubitCount, uint8_t bitCount, QInstr* instr, uint8_t* re
                                         cpx_mtx_set(&tmp, 0, 0, &a);
                                 }
                         }
-                       double multiplier = 0;
+                       float multiplier = 0;
                         cpx_t n;
                         cpx_mtx_get(&tmp, 0, 0, &n);
                         multiplier += cpx_magsqr(&n);
@@ -462,7 +462,7 @@ void qansel_run(uint8_t qubitCount, uint8_t bitCount, QInstr* instr, uint8_t* re
                 }
                 else if (strcmp(instr[i].n, "born") == 0)
                 {
-                       double prob = 0;
+                       float prob = 0;
                         for (uint32_t j = 0; j < qubitCountPow2; j++)
                         {
                                 cpx_t n;
@@ -538,7 +538,7 @@ void process(int argc, char** argv)
  {
         struct timespec ts;
         clock_gettime(CLOCK_MONOTONIC, &ts);
-       double seed = (double)((uint64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec);
+       float seed = (float)((uint64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec);
         qansel_rand_s(seed);
         char** lines = malloc(0);
         uint32_t* lineIDs = malloc(0);
@@ -709,7 +709,7 @@ void process(int argc, char** argv)
                         || memcmp("rz(", lines[i], 3) == 0
                 )
                 {
-                       double angle;
+                       float angle;
                         char ty;
                         if (sscanf(lines[i], "r%c(%f/%f) q[%i]%n", &ty, &a0, &a1, &q0, &matchedCount) == 4)
                         {
@@ -1349,13 +1349,13 @@ void process(int argc, char** argv)
                         }
                         if (fullSample == 255)
                         {
-                               printf(": %.1f%%\n", ((double)stats[i] / (double)shots) * (double)100);
+                               printf(": %.1f%%\n", ((float)stats[i] / (float)shots) * (float)100);
                         }
                 }
                 if (fullSample != 255)
                 {
-                       double prob = ((double)count / (double)shots) * (double)100;
-                       printf("0: %.1f%%\n", ((double)100)-prob);
+                       float prob = ((float)count / (float)shots) * (float)100;
+                       printf("0: %.1f%%\n", ((float)100)-prob);
                         printf("1: %.1f%%\n", prob);
                 }
         }
diff --git a/src/complex.c b/src/complex.c

index c5346b772f3bd97d7135666b29c61756bf8a14d8..54548e307e77e1995c93afc5f1f41a25e6e838b0 100644 (file)
--- a/src/complex.c
+++ b/src/complex.c
@@ -9,12 +9,12 @@
  
  typedef struct
  {
-    double real, imaginary;
+    float real, imaginary;
  } cpx_t;
  
  typedef struct
  {
-    double *ptr;
+    float *ptr;
      size_t rows, cols;
  } cpx_mtx_t;
  
@@ -23,8 +23,8 @@ uint8_t* cpx_str(cpx_t* n)
      uint8_t* r;
      size_t z;
  
-    double rl = n->real;
-    double ig = n->imaginary >= 0 ? n->imaginary : -(n->imaginary);
+    float rl = n->real;
+    float ig = n->imaginary >= 0 ? n->imaginary : -(n->imaginary);
      if (ig == 0)
      {
          z = snprintf(NULL, 0, "%f", rl);
@@ -42,7 +42,7 @@ uint8_t* cpx_str(cpx_t* n)
      return r;
  }
  
-cpx_t cpx_new(double r, double i)
+cpx_t cpx_new(float r, float i)
  {
      cpx_t n;
      n.real = r;
@@ -65,17 +65,17 @@ void cpx_sub(cpx_t* r, cpx_t* a, cpx_t* b)
  void cpx_mul(cpx_t* r, cpx_t* a, cpx_t* b)
  {
      //FOIL
-    double first = a->real * b->real; //real
-    double outer = a->real * b->imaginary; //imaginary
-    double inner = a->imaginary * b->real; //imaginary
-    double last  = -(a->imaginary * b->imaginary); //real
+    float first = a->real * b->real; //real
+    float outer = a->real * b->imaginary; //imaginary
+    float inner = a->imaginary * b->real; //imaginary
+    float last  = -(a->imaginary * b->imaginary); //real
      r->real = first + last;
      r->imaginary = outer + inner;
  }
  
  //non-complex matrix multiply
  //  shared = colsA = rowsB
-void cpx_ncpx_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void cpx_ncpx_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
  {
      size_t colsA = shared;
      size_t rowsB = shared;
@@ -107,9 +107,9 @@ typedef struct
      size_t Loops;
      size_t Continue;
      size_t BlockSize;
-    double* ptrR;
-    double* ptrA;
-    double* ptrB;
+    float* ptrR;
+    float* ptrA;
+    float* ptrB;
      size_t rowsA;
      size_t colsB;
      size_t shared;
@@ -118,9 +118,9 @@ typedef struct
  void* cpx_ncpx_mmul_mtc(void *context)
  {
      cpx_mul_shared* data = (cpx_mul_shared*)context;
-    double* ptrR = data->ptrR;
-    double* ptrA = data->ptrA;
-    double* ptrB = data->ptrB;
+    float* ptrR = data->ptrR;
+    float* ptrA = data->ptrA;
+    float* ptrB = data->ptrB;
      size_t rowsA = data->rowsA;
      size_t colsB = data->colsB;
      size_t shared = data->shared;
@@ -154,7 +154,7 @@ void* cpx_ncpx_mmul_mtc(void *context)
      }
  }
  
-void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void cpx_ncpx_mmul_mt(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
  {
      cpx_mul_shared share;
      share.Threads = get_core_count();
@@ -168,10 +168,10 @@ void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, si
      {
          share.Threads = colsB;
      }
-    share.BlockSize = (size_t)floor(((double)colsB) / ((double)share.Threads));
-    share.Loops = (size_t)floor(((double)colsB) / ((double)share.BlockSize));
+    share.BlockSize = (size_t)floor(((float)colsB) / ((float)share.Threads));
+    share.Loops = (size_t)floor(((float)colsB) / ((float)share.BlockSize));
      share.Last = share.Loops - 1;
-    share.Continue = (size_t)(((double)colsB) - ((double)share.Loops) * ((double)share.BlockSize));
+    share.Continue = (size_t)(((float)colsB) - ((float)share.Loops) * ((float)share.BlockSize));
  
      pthread_t threads[share.Loops];
      cpx_mul_shared contexts[share.Loops];
@@ -204,7 +204,7 @@ void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, si
  }
  
  //non-complex kronecker product
-void cpx_ncpx_mknk(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB)
+void cpx_ncpx_mknk(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB)
  {
      size_t rowsR = rowsA * rowsB;
      size_t colsR = colsA * colsB;
@@ -225,12 +225,12 @@ void cpx_ncpx_mknk(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_
      }
  }
  
-double cpx_magsqr(cpx_t* n)
+float cpx_magsqr(cpx_t* n)
  {
      return (n->real * n->real) + (n->imaginary * n->imaginary);
  }
  
-double cpx_mag(cpx_t* n)
+float cpx_mag(cpx_t* n)
  {
      return sqrt((n->real * n->real) + (n->imaginary * n->imaginary));
  }
@@ -253,7 +253,7 @@ void cpx_mtx_set(cpx_mtx_t* m, size_t row, size_t col, cpx_t* n)
      m->ptr[(col + 1) + (row + 1) * cols] = n->real;
  }
  
-void cpx_mtx_set2(cpx_mtx_t* m, size_t row, size_t col, double real, double imaginary)
+void cpx_mtx_set2(cpx_mtx_t* m, size_t row, size_t col, float real, float imaginary)
  {
      row *= 2;
      col *= 2;
@@ -277,7 +277,7 @@ void cpx_mtx_get(cpx_mtx_t* m, size_t row, size_t col, cpx_t* n)
      //printf("ggg\n");
  }
  
-double cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
+float cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
  {
      row *= 2;
      col *= 2;
@@ -285,7 +285,7 @@ double cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
      return m->ptr[(col + 1) + (row + 1) * cols];
  }
  
-double cpx_mtx_get_imaginary(cpx_mtx_t* m, size_t row, size_t col)
+float cpx_mtx_get_imaginary(cpx_mtx_t* m, size_t row, size_t col)
  {
      row *= 2;
      col *= 2;
@@ -298,7 +298,7 @@ void cpx_mtx_init(cpx_mtx_t* m, size_t rows, size_t cols)
      m->rows = rows;
      m->cols = cols;
      size_t z = (rows * 2) * (cols * 2);
-    m->ptr = malloc(z * sizeof(double));
+    m->ptr = malloc(z * sizeof(float));
      for (size_t i = 0; i < z; i++) m->ptr[i] = 0;
  }
  
@@ -306,7 +306,7 @@ void cpx_mtx_expand_row(cpx_mtx_t* m)
  {
      m->rows += 1;
      size_t z = (m->rows * 2) * (m->cols * 2);
-    m->ptr = realloc(m->ptr, z * sizeof(double));
+    m->ptr = realloc(m->ptr, z * sizeof(float));
  }
  
  
@@ -325,13 +325,13 @@ typedef struct
      size_t Loops;
      size_t Continue;
      size_t BlockSize;
-    double* ptrR;
+    float* ptrR;
      size_t rowsR;
      size_t colsR;
-    double* ptrA;
+    float* ptrA;
      size_t rowsA;
      size_t colsA;
-    double* ptrB;
+    float* ptrB;
      size_t rowsB;
      size_t colsB;
  } cpx_knk_shared;
@@ -339,13 +339,13 @@ typedef struct
  void* cpx_ncpx_knk_mtc(void *context)
  {
      cpx_knk_shared* data = (cpx_knk_shared*)context;
-    double* ptrR = data->ptrR;
+    float* ptrR = data->ptrR;
      size_t rowsR = data->rowsR;
      size_t colsR = data->colsR;
-    double* ptrA = data->ptrA;
+    float* ptrA = data->ptrA;
      size_t rowsA = data->rowsA;
      size_t colsA = data->colsA;
-    double* ptrB = data->ptrB;
+    float* ptrB = data->ptrB;
      size_t rowsB = data->rowsB;
      size_t colsB = data->colsB;
  
@@ -361,15 +361,15 @@ void* cpx_ncpx_knk_mtc(void *context)
              size_t rowB = rowR % rowsB;
              size_t colB = colR % colsB;
  
-            double r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-            double i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-            double r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-            double i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+            float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+            float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+            float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+            float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
  
-            double first = r1 * r2; //real
-            double outer = r1 * i2; //imaginary
-            double inner = i1 * r2; //imaginary
-            double last  = -(i1 * i2); //real
+            float first = r1 * r2; //real
+            float outer = r1 * i2; //imaginary
+            float inner = i1 * r2; //imaginary
+            float last  = -(i1 * i2); //real
              r1 = first + last;
              i1 = outer + inner;
  
@@ -384,13 +384,13 @@ void* cpx_ncpx_knk_mtc(void *context)
  
  void cpx_ncpx_knk_mt
  (    
-    double* ptrR,
+    float* ptrR,
      size_t rowsR,
      size_t colsR,
-    double* ptrA,
+    float* ptrA,
      size_t rowsA,
      size_t colsA,
-    double* ptrB,
+    float* ptrB,
      size_t rowsB,
      size_t colsB
  )
@@ -411,10 +411,10 @@ void cpx_ncpx_knk_mt
      {
          share.Threads = colsR;
      }
-    share.BlockSize = (size_t)floor(((double)colsR) / ((double)share.Threads));
-    share.Loops = (size_t)floor(((double)colsR) / ((double)share.BlockSize));
+    share.BlockSize = (size_t)floor(((float)colsR) / ((float)share.Threads));
+    share.Loops = (size_t)floor(((float)colsR) / ((float)share.BlockSize));
      share.Last = share.Loops - 1;
-    share.Continue = (size_t)(((double)colsR) - ((double)share.Loops) * ((double)share.BlockSize));
+    share.Continue = (size_t)(((float)colsR) - ((float)share.Loops) * ((float)share.BlockSize));
  
      pthread_t threads[share.Loops];
      cpx_knk_shared contexts[share.Loops];
@@ -448,13 +448,13 @@ void cpx_ncpx_knk_mt
  
  void cpx_ncpx_knk
  (
-    double* ptrR,
+    float* ptrR,
      size_t rowsR,
      size_t colsR,
-    double* ptrA,
+    float* ptrA,
      size_t rowsA,
      size_t colsA,
-    double* ptrB,
+    float* ptrB,
      size_t rowsB,
      size_t colsB
  )
@@ -468,15 +468,15 @@ void cpx_ncpx_knk
              size_t rowB = rowR % rowsB;
              size_t colB = colR % colsB;
  
-            double r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-            double i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-            double r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-            double i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+            float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+            float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+            float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+            float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
  
-            double first = r1 * r2; //real
-            double outer = r1 * i2; //imaginary
-            double inner = i1 * r2; //imaginary
-            double last  = -(i1 * i2); //real
+            float first = r1 * r2; //real
+            float outer = r1 * i2; //imaginary
+            float inner = i1 * r2; //imaginary
+            float last  = -(i1 * i2); //real
              r1 = first + last;
              i1 = outer + inner;
  
diff --git a/src/gates.c b/src/gates.c

index bca311d9caefd42f9b1e14f8e45f122d2d86563c..73bb74d3833db6148c14ac37d4b5815527c8ee4f 100644 (file)
--- a/src/gates.c
+++ b/src/gates.c
@@ -1,5 +1,5 @@
  
-double Identity[] = 
+float Identity[] = 
  {
      1, 0,    0, 0,    
      0, 1,    0, 0,
@@ -8,7 +8,7 @@ double Identity[] =
      0, 0,    0, 1
  };
  
-double PauliX[] = 
+float PauliX[] = 
  {
      0, 0,    1, 0,    
      0, 0,    0, 1,
@@ -17,7 +17,7 @@ double PauliX[] =
      0, 1,    0, 0
  };
  
-double PauliY[] =
+float PauliY[] =
  {
      0, 0,    0, 1,
      0, 0,   -1, 0,
@@ -26,7 +26,7 @@ double PauliY[] =
      1, 0,    0, 0,
  };
  
-double PauliZ[] = 
+float PauliZ[] = 
  {
      1, 0,    0, 0,    
      0, 1,    0, 0,
@@ -37,7 +37,7 @@ double PauliZ[] =
  
  //  1/sqrt(2)
  #define R 0.7071067811865475
-double Hadamard[] =
+float Hadamard[] =
  {
      R, 0,   R, 0,
      0, R,   0, R,
@@ -46,7 +46,7 @@ double Hadamard[] =
      0, R,   0,-R,
  };
  
-double PhaseS[] =
+float PhaseS[] =
  {
      1, 0,    0, 0,
      0, 1,    0, 0,
@@ -56,7 +56,7 @@ double PhaseS[] =
  };
  
  // 1/sqrt(2) + 1/sqrt(2)i
-double PhaseT[] =
+float PhaseT[] =
  {
      1, 0,    0, 0,
      0, 1,    0, 0,
@@ -65,7 +65,7 @@ double PhaseT[] =
      0, 0,    R, R
  };
  
-double ControlledNOT[] = 
+float ControlledNOT[] = 
  {
      1, 0,    0, 0,    0, 0,    0, 0,
      0, 1,    0, 0,    0, 0,    0, 0, 
diff --git a/src/gpu.c b/src/gpu.c

index 4d315730f587204e49a4cc87688c9e314e1759aa..ab46aae693d8de2cbe64dcaea5bff7ad6f350eee 100644 (file)
--- a/src/gpu.c
+++ b/src/gpu.c
@@ -64,39 +64,39 @@ void GPU_clean()
         }
  }
  
-void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
  {
         //Create buffers
         size_t sizeA = rowsA * shared;
         size_t sizeB = shared * colsB;
         size_t sizeR = rowsA * colsB;
         cl_int err;
-       cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeA, NULL, &err);
+       cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
                 exit(1);
         }
-       cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeB, NULL, &err);
+       cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
                 exit(1);
         }
-       cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(double) * sizeR, NULL, &err);
+       cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
                 exit(1);
         }
         //Populate buffers
-       err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(double) * sizeA, ptrA, 0, NULL, NULL);
+       err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
                 exit(1);
         }
-       err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(double) * sizeB, ptrB, 0, NULL, NULL);
+       err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
@@ -184,7 +184,7 @@ void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t col
                 exit(1);
         }
         //Read results
-       err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(double) * sizeR, ptrR, 0, NULL, NULL);
+       err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL);
         if (err != CL_SUCCESS)
         {
                 fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
diff --git a/src/gpu_mmul.cl b/src/gpu_mmul.cl

index 508a391392b2e61f949e491d2e5e93b2b0360d06..5f649a241302222ce686b1dacf1a41fd75ed0427 100644 (file)
--- a/src/gpu_mmul.cl
+++ b/src/gpu_mmul.cl
@@ -1,9 +1,8 @@
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-__kernel void gpu_mmul(__global double* ptrR, __global double* ptrA, __global double* ptrB, const int N, const int W)
+__kernel void gpu_mmul(__global float* ptrR, __global float* ptrA, __global float* ptrB, const int N, const int W)
  {
      int row = get_global_id(0);
      int col = get_global_id(1);
-    double sum = 0;
+    float sum = 0;
      for (int i = 0; i < N; i++)
      {
          sum += ptrA[row * W + i] * ptrB[i * W + col];
author	miha-q <>
	Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)
committer	miha-q <>
	Sun, 3 Mar 2024 03:16:09 +0000 (22:16 -0500)
src/QAnsel.c		patch \| blob \| history
src/complex.c		patch \| blob \| history
src/gates.c		patch \| blob \| history
src/gpu.c		patch \| blob \| history
src/gpu_mmul.cl		patch \| blob \| history