tmp.cols = filter.cols * gate.cols;
tmp.ptr = malloc(tmp.rows * (tmp.cols * 2) * sizeof(float));
- if (USE_GPU && 0)
- {
- cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
- else if (USE_THREADS)
- {
- cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
- else
- {
- cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- }
+
+ printf("%ix%i (knk)\n", tmp.rows, tmp.cols);
+ unsigned long int us1, us2;
+ us1 = get_time();
+ cpx_mtx_knk_metal(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
+ us2 = get_time();
+ printf("\tMetal: %lu\n", us2 - us1);
+ us1 = get_time();
+ cpx_mtx_knk_threads(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
+ us2 = get_time();
+ printf("\tThreads: %lu\n", us2 - us1);
+ us1 = get_time();
+ cpx_mtx_knk(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
+ us2 = get_time();
+ printf("\tBare: %lu\n", us2 - us1);
+
+// if (USE_GPU && 0)
+// {
+// cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+// }
+// else if (USE_THREADS)
+// {
+// cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+// }
+// else
+// {
+// cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+// }
free(filter.ptr);
}
cpx_mtx_init(&tmp, stateVector->rows, stateVector->cols);
- printf("%ix%i\n", tmp.rows, tmp.cols);
- unsigned long int us1, us2;
+ printf("%ix%i (dot)\n", tmp.rows, tmp.cols);
+ unsigned long int us1, us2;
us1 = get_time();
cpx_mtx_dot_metal(tmp.ptr, stateVector->ptr, filter.ptr, stateVector->rows, stateVector->cols, filter.rows, filter.cols);
us2 = get_time();