#define GPU_ENABLED
unsigned char USE_GPU = 1;
unsigned char USE_THREADS = 1;
-#define SPEED_TEST
+//#define SPEED_TEST
typedef struct
{
us1 = get_time();
cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
us2 = get_time();
- printf("\tThreads2x2: %lu\n", us2 - us1);
+ printf("\tThreads: %lu\n", us2 - us1);
us1 = get_time();
cpx_mtx_knk_threads_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
us2 = get_time();
- printf("\tThreads: %lu\n", us2 - us1);
+ printf("\tThreads2x2: %lu\n", us2 - us1);
us1 = get_time();
cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
us2 = get_time();
}
else if (USE_THREADS && tmp.rows >= 512)
{
- cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ cpx_mtx_knk_threads_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
}
else
{
- if (filter.rows / 8 == 0)
- cpx_mtx_knk_2x2_R
- (tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- else
- cpx_mtx_knk_2x2_Rx4
- (tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ cpx_mtx_knk_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
}
#endif
{
cpx_mtx_dot_metal(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
}
- else if (USE_THREADS && tmp.rows >= 512)
+ else if (USE_THREADS && tmp.cols >= 512)
{
cpx_mtx_dot_threads(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
}
RANDOM_FILE = fopen("/dev/TrueRNG0", "r");
if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r");
USE_GPU = 0;
- USE_THREADS = 0;
+ USE_THREADS = 1;
process(argc, argv);
fclose(RANDOM_FILE);
if (USE_GPU) cpx_mtx_clean();