From: miha-q <>
Date: Sun, 3 Mar 2024 03:16:09 +0000 (-0500)
Subject: Sat Mar  2 10:16:09 PM EST 2024
X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=211c3710f0d57fc8ad703202ea451aedd5c098bc;p=QAnsel.git

Sat Mar  2 10:16:09 PM EST 2024
---

diff --git a/src/QAnsel.c b/src/QAnsel.c
index 1d084c9..d0748da 100644
--- a/src/QAnsel.c
+++ b/src/QAnsel.c
@@ -23,20 +23,20 @@ typedef struct
 {
 	char n[128];
 	uint8_t q0, q1, q2;
-	double arg0, arg1, arg2;
+	float arg0, arg1, arg2;
 } QInstr;
 
-double qansel_rand_s(float s)
+float qansel_rand_s(float s)
 {
 	uint32_t tmp;
 	memcpy(&tmp, &s, sizeof(uint32_t));
 	srand(tmp);
 }
-double qansel_rand_h()
+float qansel_rand_h()
 {
-	return ((double)rand()) / ((double)RAND_MAX);
+	return ((float)rand()) / ((float)RAND_MAX);
 }
-double qansel_rand_t()
+float qansel_rand_t()
 {
 	if (RANDOM_FILE)
 	{
@@ -45,7 +45,7 @@ double qansel_rand_t()
 		{
 			num = (num << 8) | fgetc(RANDOM_FILE);
 		}
-		return ((double)num) / ((double)UINT32_MAX);
+		return ((float)num) / ((float)UINT32_MAX);
 	}
 	else
 	{
@@ -55,7 +55,7 @@ double qansel_rand_t()
 }
 
 
-double qansel_rand()
+float qansel_rand()
 {
 	return HIDDEN_VARIABLE ? qansel_rand_h() : qansel_rand_t();
 }
@@ -149,7 +149,7 @@ void qansel_toffoli(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t bitA, ui
 	stateVector->cols = ret.cols;
 }
 
-double* qansel_unitary(double theta, double phi, double lambda)
+float* qansel_unitary(float theta, float phi, float lambda)
 {
 	cpx_mtx_t m;
 	cpx_t a, b, c, d;
@@ -175,7 +175,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
 	cpx_mtx_t gate;
 	gate.rows = 2;
 	gate.cols = 2;
-	double* gate_ptr;
+	float* gate_ptr;
 	switch (instr->n[0])
 	{
 		case 'h': gate_ptr = Hadamard; break;
@@ -196,11 +196,11 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
 	uint8_t qubit = qubitCount - (instr->q0) - 1;
 	if (qubit == 0)
 	{
-		memcpy(filter.ptr, gate_ptr, 16 * sizeof(double));
+		memcpy(filter.ptr, gate_ptr, 16 * sizeof(float));
 	}
 	else
 	{
-		memcpy(filter.ptr, Identity, 16 * sizeof(double));
+		memcpy(filter.ptr, Identity, 16 * sizeof(float));
 	}
 
 	for (uint8_t i = 1; i < qubitCount; i++)
@@ -216,7 +216,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, uint8_t qubitCount, QInstr* inst
 
 		tmp.rows = filter.rows * gate.rows;
 		tmp.cols = filter.cols * gate.cols;
-		tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(double));
+		tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(float));
 		cpx_ncpx_knk_mt
 		(
 			tmp.ptr, tmp.rows, tmp.cols,
@@ -265,7 +265,7 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
 {
 	uint32_t qubitCountPow2 = (uint32_t)pow(2, qubitCount);
 	cpx_t n;
-	double prob0 = 0;
+	float prob0 = 0;
 	for (uint32_t i = 0; i < qubitCountPow2; i++)
 	{
 		uint8_t bit = (i >> qubit) & 1;
@@ -273,9 +273,9 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
 		if (bit == 0) prob0 += cpx_magsqr(&n);
 	}
 
-	double r = qansel_rand();
+	float r = qansel_rand();
 	uint8_t newBit = r < prob0 ? 0 : 1;
-	double probTot = 0;
+	float probTot = 0;
 	for (uint32_t i = 0; i < qubitCountPow2; i++)
 	{
 		uint8_t bit = (i >> qubit) & 1;
@@ -292,7 +292,7 @@ uint8_t qansel_measure(cpx_mtx_t* stateVector, uint8_t qubitCount, uint8_t qubit
 		cpx_mtx_set(stateVector, 0, i, &n);
 	}
 	
-	double multiplier = sqrt(1 / probTot);
+	float multiplier = sqrt(1 / probTot);
 	for (uint32_t i = 0; i < qubitCountPow2; i++)
 	{
 		uint8_t bit = (i >> qubit) & 1;
@@ -400,7 +400,7 @@ void qansel_run(uint8_t qubitCount, uint8_t bitCount, QInstr* instr, uint8_t* re
 					cpx_mtx_set(&tmp, 0, 0, &a);
 				}
 			}
-			double multiplier = 0;
+			float multiplier = 0;
 			cpx_t n;
 			cpx_mtx_get(&tmp, 0, 0, &n);
 			multiplier += cpx_magsqr(&n);
@@ -462,7 +462,7 @@ void qansel_run(uint8_t qubitCount, uint8_t bitCount, QInstr* instr, uint8_t* re
 		}
 		else if (strcmp(instr[i].n, "born") == 0)
 		{
-			double prob = 0;
+			float prob = 0;
 			for (uint32_t j = 0; j < qubitCountPow2; j++)
 			{
 				cpx_t n;
@@ -538,7 +538,7 @@ void process(int argc, char** argv)
 {
 	struct timespec ts;
 	clock_gettime(CLOCK_MONOTONIC, &ts);
-	double seed = (double)((uint64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec);
+	float seed = (float)((uint64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec);
 	qansel_rand_s(seed);
 	char** lines = malloc(0);
 	uint32_t* lineIDs = malloc(0);
@@ -709,7 +709,7 @@ void process(int argc, char** argv)
 			|| memcmp("rz(", lines[i], 3) == 0
 		)
 		{
-			double angle;
+			float angle;
 			char ty;
 			if (sscanf(lines[i], "r%c(%f/%f) q[%i]%n", &ty, &a0, &a1, &q0, &matchedCount) == 4)
 			{
@@ -1349,13 +1349,13 @@ void process(int argc, char** argv)
 			}
 			if (fullSample == 255)
 			{
-				printf(": %.1f%%\n", ((double)stats[i] / (double)shots) * (double)100);
+				printf(": %.1f%%\n", ((float)stats[i] / (float)shots) * (float)100);
 			}
 		}
 		if (fullSample != 255)
 		{
-			double prob = ((double)count / (double)shots) * (double)100;
-			printf("0: %.1f%%\n", ((double)100)-prob);
+			float prob = ((float)count / (float)shots) * (float)100;
+			printf("0: %.1f%%\n", ((float)100)-prob);
 			printf("1: %.1f%%\n", prob);
 		}
 	}
diff --git a/src/complex.c b/src/complex.c
index c5346b7..54548e3 100644
--- a/src/complex.c
+++ b/src/complex.c
@@ -9,12 +9,12 @@
 
 typedef struct
 {
-    double real, imaginary;
+    float real, imaginary;
 } cpx_t;
 
 typedef struct
 {
-    double *ptr;
+    float *ptr;
     size_t rows, cols;
 } cpx_mtx_t;
 
@@ -23,8 +23,8 @@ uint8_t* cpx_str(cpx_t* n)
     uint8_t* r;
     size_t z;
 
-    double rl = n->real;
-    double ig = n->imaginary >= 0 ? n->imaginary : -(n->imaginary);
+    float rl = n->real;
+    float ig = n->imaginary >= 0 ? n->imaginary : -(n->imaginary);
     if (ig == 0)
     {
         z = snprintf(NULL, 0, "%f", rl);
@@ -42,7 +42,7 @@ uint8_t* cpx_str(cpx_t* n)
     return r;
 }
 
-cpx_t cpx_new(double r, double i)
+cpx_t cpx_new(float r, float i)
 {
     cpx_t n;
     n.real = r;
@@ -65,17 +65,17 @@ void cpx_sub(cpx_t* r, cpx_t* a, cpx_t* b)
 void cpx_mul(cpx_t* r, cpx_t* a, cpx_t* b)
 {
     //FOIL
-    double first = a->real * b->real; //real
-    double outer = a->real * b->imaginary; //imaginary
-    double inner = a->imaginary * b->real; //imaginary
-    double last  = -(a->imaginary * b->imaginary); //real
+    float first = a->real * b->real; //real
+    float outer = a->real * b->imaginary; //imaginary
+    float inner = a->imaginary * b->real; //imaginary
+    float last  = -(a->imaginary * b->imaginary); //real
     r->real = first + last;
     r->imaginary = outer + inner;
 }
 
 //non-complex matrix multiply
 //  shared = colsA = rowsB
-void cpx_ncpx_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void cpx_ncpx_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
 {
     size_t colsA = shared;
     size_t rowsB = shared;
@@ -107,9 +107,9 @@ typedef struct
     size_t Loops;
     size_t Continue;
     size_t BlockSize;
-    double* ptrR;
-    double* ptrA;
-    double* ptrB;
+    float* ptrR;
+    float* ptrA;
+    float* ptrB;
     size_t rowsA;
     size_t colsB;
     size_t shared;
@@ -118,9 +118,9 @@ typedef struct
 void* cpx_ncpx_mmul_mtc(void *context)
 {
     cpx_mul_shared* data = (cpx_mul_shared*)context;
-    double* ptrR = data->ptrR;
-    double* ptrA = data->ptrA;
-    double* ptrB = data->ptrB;
+    float* ptrR = data->ptrR;
+    float* ptrA = data->ptrA;
+    float* ptrB = data->ptrB;
     size_t rowsA = data->rowsA;
     size_t colsB = data->colsB;
     size_t shared = data->shared;
@@ -154,7 +154,7 @@ void* cpx_ncpx_mmul_mtc(void *context)
     }
 }
 
-void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void cpx_ncpx_mmul_mt(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
 {
     cpx_mul_shared share;
     share.Threads = get_core_count();
@@ -168,10 +168,10 @@ void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, si
     {
         share.Threads = colsB;
     }
-    share.BlockSize = (size_t)floor(((double)colsB) / ((double)share.Threads));
-    share.Loops = (size_t)floor(((double)colsB) / ((double)share.BlockSize));
+    share.BlockSize = (size_t)floor(((float)colsB) / ((float)share.Threads));
+    share.Loops = (size_t)floor(((float)colsB) / ((float)share.BlockSize));
     share.Last = share.Loops - 1;
-    share.Continue = (size_t)(((double)colsB) - ((double)share.Loops) * ((double)share.BlockSize));
+    share.Continue = (size_t)(((float)colsB) - ((float)share.Loops) * ((float)share.BlockSize));
 
     pthread_t threads[share.Loops];
     cpx_mul_shared contexts[share.Loops];
@@ -204,7 +204,7 @@ void cpx_ncpx_mmul_mt(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, si
 }
 
 //non-complex kronecker product
-void cpx_ncpx_mknk(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB)
+void cpx_ncpx_mknk(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB)
 {
     size_t rowsR = rowsA * rowsB;
     size_t colsR = colsA * colsB;
@@ -225,12 +225,12 @@ void cpx_ncpx_mknk(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_
     }
 }
 
-double cpx_magsqr(cpx_t* n)
+float cpx_magsqr(cpx_t* n)
 {
     return (n->real * n->real) + (n->imaginary * n->imaginary);
 }
 
-double cpx_mag(cpx_t* n)
+float cpx_mag(cpx_t* n)
 {
     return sqrt((n->real * n->real) + (n->imaginary * n->imaginary));
 }
@@ -253,7 +253,7 @@ void cpx_mtx_set(cpx_mtx_t* m, size_t row, size_t col, cpx_t* n)
     m->ptr[(col + 1) + (row + 1) * cols] = n->real;
 }
 
-void cpx_mtx_set2(cpx_mtx_t* m, size_t row, size_t col, double real, double imaginary)
+void cpx_mtx_set2(cpx_mtx_t* m, size_t row, size_t col, float real, float imaginary)
 {
     row *= 2;
     col *= 2;
@@ -277,7 +277,7 @@ void cpx_mtx_get(cpx_mtx_t* m, size_t row, size_t col, cpx_t* n)
     //printf("ggg\n");
 }
 
-double cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
+float cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
 {
     row *= 2;
     col *= 2;
@@ -285,7 +285,7 @@ double cpx_mtx_get_real(cpx_mtx_t* m, size_t row, size_t col)
     return m->ptr[(col + 1) + (row + 1) * cols];
 }
 
-double cpx_mtx_get_imaginary(cpx_mtx_t* m, size_t row, size_t col)
+float cpx_mtx_get_imaginary(cpx_mtx_t* m, size_t row, size_t col)
 {
     row *= 2;
     col *= 2;
@@ -298,7 +298,7 @@ void cpx_mtx_init(cpx_mtx_t* m, size_t rows, size_t cols)
     m->rows = rows;
     m->cols = cols;
     size_t z = (rows * 2) * (cols * 2);
-    m->ptr = malloc(z * sizeof(double));
+    m->ptr = malloc(z * sizeof(float));
     for (size_t i = 0; i < z; i++) m->ptr[i] = 0;
 }
 
@@ -306,7 +306,7 @@ void cpx_mtx_expand_row(cpx_mtx_t* m)
 {
     m->rows += 1;
     size_t z = (m->rows * 2) * (m->cols * 2);
-    m->ptr = realloc(m->ptr, z * sizeof(double));
+    m->ptr = realloc(m->ptr, z * sizeof(float));
 }
 
 
@@ -325,13 +325,13 @@ typedef struct
     size_t Loops;
     size_t Continue;
     size_t BlockSize;
-    double* ptrR;
+    float* ptrR;
     size_t rowsR;
     size_t colsR;
-    double* ptrA;
+    float* ptrA;
     size_t rowsA;
     size_t colsA;
-    double* ptrB;
+    float* ptrB;
     size_t rowsB;
     size_t colsB;
 } cpx_knk_shared;
@@ -339,13 +339,13 @@ typedef struct
 void* cpx_ncpx_knk_mtc(void *context)
 {
     cpx_knk_shared* data = (cpx_knk_shared*)context;
-    double* ptrR = data->ptrR;
+    float* ptrR = data->ptrR;
     size_t rowsR = data->rowsR;
     size_t colsR = data->colsR;
-    double* ptrA = data->ptrA;
+    float* ptrA = data->ptrA;
     size_t rowsA = data->rowsA;
     size_t colsA = data->colsA;
-    double* ptrB = data->ptrB;
+    float* ptrB = data->ptrB;
     size_t rowsB = data->rowsB;
     size_t colsB = data->colsB;
 
@@ -361,15 +361,15 @@ void* cpx_ncpx_knk_mtc(void *context)
             size_t rowB = rowR % rowsB;
             size_t colB = colR % colsB;
 
-            double r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-            double i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-            double r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-            double i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+            float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+            float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+            float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+            float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
 
-            double first = r1 * r2; //real
-            double outer = r1 * i2; //imaginary
-            double inner = i1 * r2; //imaginary
-            double last  = -(i1 * i2); //real
+            float first = r1 * r2; //real
+            float outer = r1 * i2; //imaginary
+            float inner = i1 * r2; //imaginary
+            float last  = -(i1 * i2); //real
             r1 = first + last;
             i1 = outer + inner;
 
@@ -384,13 +384,13 @@ void* cpx_ncpx_knk_mtc(void *context)
 
 void cpx_ncpx_knk_mt
 (    
-    double* ptrR,
+    float* ptrR,
     size_t rowsR,
     size_t colsR,
-    double* ptrA,
+    float* ptrA,
     size_t rowsA,
     size_t colsA,
-    double* ptrB,
+    float* ptrB,
     size_t rowsB,
     size_t colsB
 )
@@ -411,10 +411,10 @@ void cpx_ncpx_knk_mt
     {
         share.Threads = colsR;
     }
-    share.BlockSize = (size_t)floor(((double)colsR) / ((double)share.Threads));
-    share.Loops = (size_t)floor(((double)colsR) / ((double)share.BlockSize));
+    share.BlockSize = (size_t)floor(((float)colsR) / ((float)share.Threads));
+    share.Loops = (size_t)floor(((float)colsR) / ((float)share.BlockSize));
     share.Last = share.Loops - 1;
-    share.Continue = (size_t)(((double)colsR) - ((double)share.Loops) * ((double)share.BlockSize));
+    share.Continue = (size_t)(((float)colsR) - ((float)share.Loops) * ((float)share.BlockSize));
 
     pthread_t threads[share.Loops];
     cpx_knk_shared contexts[share.Loops];
@@ -448,13 +448,13 @@ void cpx_ncpx_knk_mt
 
 void cpx_ncpx_knk
 (
-    double* ptrR,
+    float* ptrR,
     size_t rowsR,
     size_t colsR,
-    double* ptrA,
+    float* ptrA,
     size_t rowsA,
     size_t colsA,
-    double* ptrB,
+    float* ptrB,
     size_t rowsB,
     size_t colsB
 )
@@ -468,15 +468,15 @@ void cpx_ncpx_knk
             size_t rowB = rowR % rowsB;
             size_t colB = colR % colsB;
 
-            double r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
-            double i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
-            double r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
-            double i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
+            float r1 = ptrA[((colA * 2) + 1) + ((rowA * 2) + 1) * (colsA * 2)];
+            float i1 = ptrA[(colA * 2) + ((rowA * 2) + 1) * (colsA * 2)];
+            float r2 = ptrB[((colB * 2) + 1) + ((rowB * 2) + 1) * (colsB * 2)];
+            float i2 = ptrB[(colB * 2) + ((rowB * 2) + 1) * (colsB * 2)];
 
-            double first = r1 * r2; //real
-            double outer = r1 * i2; //imaginary
-            double inner = i1 * r2; //imaginary
-            double last  = -(i1 * i2); //real
+            float first = r1 * r2; //real
+            float outer = r1 * i2; //imaginary
+            float inner = i1 * r2; //imaginary
+            float last  = -(i1 * i2); //real
             r1 = first + last;
             i1 = outer + inner;
 
diff --git a/src/gates.c b/src/gates.c
index bca311d..73bb74d 100644
--- a/src/gates.c
+++ b/src/gates.c
@@ -1,5 +1,5 @@
 
-double Identity[] = 
+float Identity[] = 
 {
     1, 0,    0, 0,    
     0, 1,    0, 0,
@@ -8,7 +8,7 @@ double Identity[] =
     0, 0,    0, 1
 };
 
-double PauliX[] = 
+float PauliX[] = 
 {
     0, 0,    1, 0,    
     0, 0,    0, 1,
@@ -17,7 +17,7 @@ double PauliX[] =
     0, 1,    0, 0
 };
 
-double PauliY[] =
+float PauliY[] =
 {
     0, 0,    0, 1,
     0, 0,   -1, 0,
@@ -26,7 +26,7 @@ double PauliY[] =
     1, 0,    0, 0,
 };
 
-double PauliZ[] = 
+float PauliZ[] = 
 {
     1, 0,    0, 0,    
     0, 1,    0, 0,
@@ -37,7 +37,7 @@ double PauliZ[] =
 
 //  1/sqrt(2)
 #define R 0.7071067811865475
-double Hadamard[] =
+float Hadamard[] =
 {
     R, 0,   R, 0,
     0, R,   0, R,
@@ -46,7 +46,7 @@ double Hadamard[] =
     0, R,   0,-R,
 };
 
-double PhaseS[] =
+float PhaseS[] =
 {
     1, 0,    0, 0,
     0, 1,    0, 0,
@@ -56,7 +56,7 @@ double PhaseS[] =
 };
 
 // 1/sqrt(2) + 1/sqrt(2)i
-double PhaseT[] =
+float PhaseT[] =
 {
     1, 0,    0, 0,
     0, 1,    0, 0,
@@ -65,7 +65,7 @@ double PhaseT[] =
     0, 0,    R, R
 };
 
-double ControlledNOT[] = 
+float ControlledNOT[] = 
 {
     1, 0,    0, 0,    0, 0,    0, 0,
     0, 1,    0, 0,    0, 0,    0, 0, 
diff --git a/src/gpu.c b/src/gpu.c
index 4d31573..ab46aae 100644
--- a/src/gpu.c
+++ b/src/gpu.c
@@ -64,39 +64,39 @@ void GPU_clean()
 	}
 }
 
-void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t colsB, size_t shared)
+void GPU_mmul(float* ptrR, float* ptrA, float* ptrB, size_t rowsA, size_t colsB, size_t shared)
 {
 	//Create buffers
 	size_t sizeA = rowsA * shared;
 	size_t sizeB = shared * colsB;
 	size_t sizeR = rowsA * colsB;
 	cl_int err;
-	cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeA, NULL, &err);
+	cl_mem memA = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeA, NULL, &err);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
 		exit(1);
 	}
-	cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(double) * sizeB, NULL, &err);
+	cl_mem memB = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY, sizeof(float) * sizeB, NULL, &err);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
 		exit(1);
 	}
-	cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(double) * sizeR, NULL, &err);
+	cl_mem memR = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(float) * sizeR, NULL, &err);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clCreateBuffer() failed.\n");
 		exit(1);
 	}
 	//Populate buffers
-	err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(double) * sizeA, ptrA, 0, NULL, NULL);
+	err = clEnqueueWriteBuffer(GPU_command_queue, memA, CL_TRUE, 0, sizeof(float) * sizeA, ptrA, 0, NULL, NULL);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
 		exit(1);
 	}
-	err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(double) * sizeB, ptrB, 0, NULL, NULL);
+	err = clEnqueueWriteBuffer(GPU_command_queue, memB, CL_TRUE, 0, sizeof(float) * sizeB, ptrB, 0, NULL, NULL);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
@@ -184,7 +184,7 @@ void GPU_mmul(double* ptrR, double* ptrA, double* ptrB, size_t rowsA, size_t col
 		exit(1);
 	}
 	//Read results
-	err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(double) * sizeR, ptrR, 0, NULL, NULL);
+	err = clEnqueueReadBuffer(GPU_command_queue, memR, CL_TRUE, 0, sizeof(float) * sizeR, ptrR, 0, NULL, NULL);
 	if (err != CL_SUCCESS)
 	{
 		fprintf(stderr, "GPU fatal error: clEnqueueWriteBuffer() failed.\n");
diff --git a/src/gpu_mmul.cl b/src/gpu_mmul.cl
index 508a391..5f649a2 100644
--- a/src/gpu_mmul.cl
+++ b/src/gpu_mmul.cl
@@ -1,9 +1,8 @@
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-__kernel void gpu_mmul(__global double* ptrR, __global double* ptrA, __global double* ptrB, const int N, const int W)
+__kernel void gpu_mmul(__global float* ptrR, __global float* ptrA, __global float* ptrB, const int N, const int W)
 {
     int row = get_global_id(0);
     int col = get_global_id(1);
-    double sum = 0;
+    float sum = 0;
     for (int i = 0; i < N; i++)
     {
         sum += ptrA[row * W + i] * ptrB[i * W + col];