tmp.cols = filter.cols * gate.cols;
tmp.ptr = malloc(tmp.rows * (tmp.cols * 2) * sizeof(float));
+
+ printf("%ix%i (knk)\n", tmp.rows, tmp.cols);
+ unsigned long int us1, us2;
+ us1 = get_time();
+ cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ us2 = get_time();
+ printf("\tMetal: %lu\n", us2 - us1);
+ us1 = get_time();
+ cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ us2 = get_time();
+ printf("\tThreads: %lu\n", us2 - us1);
+ us1 = get_time();
+ cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ us2 = get_time();
+ printf("\tBare: %lu\n", us2 - us1);
+
#ifdef SPEED_TEST
printf("%ix%i (knk)\n", tmp.rows, tmp.cols);
unsigned long int us1, us2;
us2 = get_time();
printf("\tBare: %lu\n", us2 - us1);
#else
- if (USE_GPU && 0)
- {
- cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
- else if (USE_THREADS)
- {
- cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
- else
- {
- cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
#endif