Mon Jan 20 12:00:08 AM EST 2025

author miha-q <>

Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)

committer miha-q <>

Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)
author miha-q <>
Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)
committer miha-q <>
Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)
diff --git a/src/complex.c b/src/complex.c

index 8916788c1dc408d41d4cc59d97494784f727b8af..3ad361a4c1473fea56b8eca133be5c7b1c6e40af 100644 (file)
--- a/src/complex.c
+++ b/src/complex.c
@@ -299,19 +299,19 @@ void cpx_mtx_knk_threads_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, i
  void* cpx_mtx_dot_threads_run(void *context)
  {
         cpx_thread_context* ctx = (cpx_thread_context*)context;
-    for (int i = 0; i < (ctx->rowsA); i++)
-    {
+       for (int i = 0; i < (ctx->rowsA); i++)
+       {
                 for (int j = 0; j < (ctx->delimeterCount); j++)
                 {
                         kernel_dot(ctx->ptrR, ctx->ptrA, ctx->ptrB, ctx->rowsA, ctx->colsA, ctx->rowsB, ctx->colsB, i, j + (ctx->delimeterStart));
-       }
-    }
+               }
+       }
  }
  
  void cpx_mtx_dot_threads(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
  {
         #ifdef __PTHREAD__
-    int delimeter = colsB;
+       int delimeter = colsB;
         int cores = qansel_get_core_count();
         int threadCount = cores;
         if (threadCount > delimeter) threadCount = delimeter;
@@ -319,7 +319,7 @@ void cpx_mtx_dot_threads(float* ptrR, float* ptrA, float* ptrB, int rowsA, int c
         int leftOvers = delimeter % threadCount;
         cpx_thread_context ctx = {ptrR, ptrA, ptrB, rowsA, colsA, rowsB, colsB, 0, 0};
         cpx_thread_context ctxs[threadCount];
-    pthread_t threads[threadCount];
+       pthread_t threads[threadCount];
         for (int i = 0; i < threadCount; i++)
         {
                 ctxs[i].ptrR = ctx.ptrR;
@@ -338,13 +338,13 @@ void cpx_mtx_dot_threads(float* ptrR, float* ptrA, float* ptrB, int rowsA, int c
              exit(1);
          }
         }
-    for (unsigned int i = 0; i < threadCount; i++)
-    {
-        if (pthread_join(threads[i], NULL))
-        {
-            fprintf(stderr, "QAnsel: Thread error. (2)\n");
-        }
-    }
+       for (unsigned int i = 0; i < threadCount; i++)
+       {
+               if (pthread_join(threads[i], NULL))
+               {
+                       fprintf(stderr, "QAnsel: Thread error. (2)\n");
+               }
+       }
         #else
         cpx_mtx_dot_threads(ptrR, ptrA, ptrB, rowsA, colsA, rowsB, colsB);
         #endif
@@ -431,7 +431,7 @@ unsigned char cpx_mtx_begin(unsigned char verbose)
         #ifdef __OPENCL__
         cl_uint count;
         cl_int err;
-       
+
         err = clGetPlatformIDs(1, &cpx_mtx_platform_id, &count);
         if (err != CL_SUCCESS || count == 0)
         {
@@ -511,8 +511,8 @@ void cpx_mtx_clean()
  void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
  {
         #ifdef __OPENCL__
-    int rowsR = rowsA;
-    int colsR = colsB;
+       int rowsR = rowsA;
+       int colsR = colsB;
  
         //Create buffers
         size_t sizeR = ((colsR * 2) * rowsR) * sizeof(float);
@@ -522,7 +522,7 @@ void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
         cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
         cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(err);
         cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(err);
-       
+
         //Populate buffers
         err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL); gpuerr(err);
         err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL); gpuerr(err);
@@ -578,14 +578,14 @@ void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
         err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL);
         gpuerr(err);
  
-       //Wait for completion
-       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
-       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
-
         //Read results
         err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
         gpuerr(err);
  
+       //Wait for completion
+       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
+
         //Clean up
         err = clReleaseKernel(kernel); gpuerr(err);
         err = clReleaseProgram(program); gpuerr(err);
@@ -610,14 +610,14 @@ void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
         cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
         cl_mem memB = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeB, NULL, &err); gpuerr(err);
         cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_WRITE_ONLY, sizeR, NULL, &err); gpuerr(err);
-       
+
         //Populate buffers
         unsigned long long int q = qansel_get_time();
         err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL);
-    gpuerr(err);
+       gpuerr(err);
         err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL);
-    gpuerr(err);
-       
+       gpuerr(err);
+
         //Load and compile program
         cl_program program;
         if (cpx_mtx_cache == NULL)
@@ -674,14 +674,14 @@ void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
         err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR}, NULL, 0, NULL, NULL);
         gpuerr(err);
  
-       //Wait for completion
-       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
-       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
-
         //Read results
         err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
         gpuerr(err);
  
+       //Wait for completion
+       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
+
         //Clean up
         err = clReleaseKernel(kernel); gpuerr(err);
         err = clReleaseProgram(program); gpuerr(err);
@@ -695,7 +695,7 @@ void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
  
  //This only works if ptrA is NxM where both N and X are divisible by two,
  //     and ptrB is 2x2. If both are true, this is much more efficient than
-//     the standard knk_metal() function. 
+//     the standard knk_metal() function.
  void cpx_mtx_knk_metal_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
  {
         #ifdef __OPENCL__
@@ -707,7 +707,7 @@ void cpx_mtx_knk_metal_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, int
         cl_int err;
         cl_mem memA = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeA, NULL, &err); gpuerr(err);
         cl_mem memR = clCreateBuffer(cpx_mtx_context, CL_MEM_READ_ONLY, sizeR, NULL, &err); gpuerr(err);
-       
+
         //Populate buffers
         err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memA, CL_TRUE, 0, sizeA, ptrA, 0, NULL, NULL); gpuerr(err);
  
@@ -781,14 +781,14 @@ void cpx_mtx_knk_metal_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, int
         err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR / 2}, (size_t[]){2}, 0, NULL, NULL);
         gpuerr(err);
  
-       //Wait for completion
-       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
-       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
-
         //Read results
         err = clEnqueueReadBuffer(cpx_mtx_command_queue, memR, CL_TRUE, 0, sizeR, ptrR, 0, NULL, NULL);
         gpuerr(err);
  
+       //Wait for completion
+       err = clFlush(cpx_mtx_command_queue); gpuerr(err);
+       err = clFinish(cpx_mtx_command_queue); gpuerr(err);
+
         //Clean up
         err = clReleaseKernel(kernel); gpuerr(err);
         err = clReleaseProgram(program); gpuerr(err);
author	miha-q <>
	Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)
committer	miha-q <>
	Mon, 20 Jan 2025 05:00:08 +0000 (00:00 -0500)