From: miha-q <> Date: Fri, 8 Mar 2024 02:22:15 +0000 (-0500) Subject: Thu Mar 7 09:22:15 PM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=0e138ded8a218544578d983270edd649fae095c6;p=QAnsel.git Thu Mar 7 09:22:15 PM EST 2024 --- diff --git a/examples/slow.txt b/examples/slow.txt index f4126d6..ecdb1f1 100644 --- a/examples/slow.txt +++ b/examples/slow.txt @@ -1,5 +1,5 @@ //designed to be slow -qreg q[14]; +qreg q[11]; x q[0]; x q[1]; x q[2]; @@ -11,8 +11,8 @@ x q[7]; x q[8]; x q[9]; x q[10]; -x q[11]; -x q[12]; -x q[13]; +//x q[11]; +//x q[12]; +//x q[13]; -born; \ No newline at end of file +//born; \ No newline at end of file diff --git a/src/QAnsel.c b/src/QAnsel.c index 851ec99..a7ae070 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -10,13 +10,17 @@ unsigned char HIDDEN_VARIABLE = 0; FILE* RANDOM_FILE; #define GPU_ENABLED -unsigned char USE_GPU = 1; unsigned char USE_THREADS = 1; -#define SPEED_TEST +#define MODE_BARE 1 +#define MODE_THREADED 2 +#define MODE_METAL 3 +#define MODE_METAL_THREADED 4 +unsigned char MODE = MODE_BARE; +//#define SPEED_TEST typedef struct { - char n[128];//2082378 + char n[128]; unsigned char q0, q1, q2; float arg0, arg1, arg2; } QInstr; @@ -223,22 +227,6 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr cpx_mtx_knk_metal_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); us2 = get_time(); printf("\tMetal2x2: %lu\n", us2 - us1); - - us1 = get_time(); - for (int i = 0; i < filter.rows; i++) - { - for (int j = 0; j < filter.cols; j++) - { - int x = (j * 2) + (i * filter.cols * 2); - int y = (i * 2) + (j * filter.cols * 2); - filter.ptr[x] = filter.ptr[y]; - filter.ptr[x + 1] = filter.ptr[y + 1]; - } - } - us2 = get_time(); - printf("\tTranspose: %lu\n", us2 - us1); - - us1 = get_time(); cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); us2 = get_time(); @@ -261,11 +249,11 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr //us2 = get_time(); //printf("\tTranspose: %lu\n", us2 - us1); #else - if (USE_GPU && tmp.rows >= 512) + if (MODE == MODE_METAL && tmp.cols >= 64) { cpx_mtx_knk_metal_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); } - else if (USE_THREADS && tmp.rows >= 512) + else if ((MODE == MODE_THREADED || MODE == MODE_METAL_THREADED) && tmp.cols >= 64) { cpx_mtx_knk_threads_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); } @@ -299,11 +287,11 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr us2 = get_time(); printf("\tBare: %lu\n", us2 - us1); #else - if (USE_GPU && tmp.cols >= 64) + if ((MODE == MODE_METAL || MODE == MODE_METAL_THREADED) && tmp.cols >= 64) { cpx_mtx_dot_metal(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols); } - else if (USE_THREADS && tmp.cols >= 512) + else if (MODE == MODE_THREADED && tmp.cols >= 64) { cpx_mtx_dot_threads(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols); } @@ -1434,12 +1422,15 @@ void process(int argc, char** argv) void main(int argc, char** argv) { - USE_GPU = cpx_mtx_begin(); + MODE = MODE_METAL; + unsigned char err = cpx_mtx_begin(); + if (err == 0 && (MODE == MODE_METAL_THREADED || MODE == MODE_METAL)) + { + MODE = MODE == MODE_METAL_THREADED ? MODE_THREADED : MODE_BARE; + } RANDOM_FILE = fopen("/dev/TrueRNG0", "r"); if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r"); - USE_GPU = 0; - USE_THREADS = 1; process(argc, argv); fclose(RANDOM_FILE); - if (USE_GPU) cpx_mtx_clean(); + if (MODE_METAL || MODE_METAL_THREADED) cpx_mtx_clean(); } \ No newline at end of file diff --git a/src/complex.c b/src/complex.c index 722eef4..f9a57b7 100644 --- a/src/complex.c +++ b/src/complex.c @@ -575,6 +575,57 @@ void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col err = clReleaseMemObject(memR); gpuerr(err); } +/*typedef struct +{ + float* ptr; + cl_mem* buff; + size_t* buff_size; + size_t offset; + size_t count; +} cpx_copy_context; + +void* cpx_copy_run(void *context) +{ + cpx_copy_context* ccc = (cpx_copy_context*)context; + cl_int err = clEnqueueWriteBuffer(cpx_mtx_command_queue, *(ccc->buff), CL_FALSE, ccc->offset, ccc->count, ccc->ptr, 0, NULL, NULL); + gpuerr(err); +} + +void cpx_copy(float* ptr, cl_mem* buff, size_t* buff_size) +{ + int delimeter = (int)(*buff_size); + int cores = get_core_count(); + int threadCount = cores; + if (threadCount > delimeter) threadCount = delimeter; + int delimetersPerThread = delimeter / threadCount; + int leftOvers = delimeter % threadCount; + + cpx_copy_context ctxs[threadCount]; + pthread_t threads[threadCount]; + + for (int i = 0; i < threadCount; i++) + { + ctxs[i].ptr = ptr; + ctxs[i].buff = buff; + ctxs[i].buff_size = buff_size; + ctxs[i].offset = i * delimetersPerThread; + ctxs[i].count = delimetersPerThread + ((i == threadCount - 1) ? leftOvers : 0); + if (pthread_create(&(threads[i]), NULL, &cpx_copy_run, (void*)&(ctxs[i]))) + { + fprintf(stderr, "QAnsel: Thread error. (3)\n"); + exit(1); + } + } + for (uint32_t i = 0; i < threadCount; i++) + { + if (pthread_join(threads[i], NULL)) + { + fprintf(stderr, "QAnsel: Thread error. (4)\n"); + } + } + clFlush(cpx_mtx_command_queue); +}*/ + void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB) { int rowsR = rowsA * rowsB; @@ -594,8 +645,7 @@ void cpx_mtx_knk_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col gpuerr(err); err = clEnqueueWriteBuffer(cpx_mtx_command_queue, memB, CL_TRUE, 0, sizeB, ptrB, 0, NULL, NULL); gpuerr(err); - printf("%lu!!!!\n", get_time() - q); - + //Load and compile program cl_program program; if (cpx_mtx_cache == NULL)