tmp.ptr = malloc((tmp.rows * 2) * (tmp.cols * 2) * sizeof(float));
#ifdef GPU_ENABLED
- if (USE_GPU && (tmp.rows >= 4096 || tmp.cols >= 4096))
- {
- GPU_knk
+ if (USE_GPU && (tmp.rows >= 512 || tmp.cols >= 512))
+ {
+ //GPU_knk slower for some reason?
+ //(
+ // tmp.ptr, tmp.rows, tmp.cols,
+ // filter.ptr, filter.rows, filter.cols,
+ // gate.ptr, gate.rows, gate.cols
+ //);
+ cpx_ncpx_knk_mt
(
tmp.ptr, tmp.rows, tmp.cols,
filter.ptr, filter.rows, filter.cols,