481 assert(dims.
n() > 0);
482 assert(
nullptr != d_data);
483 assert((
nullptr != d_positions) || (
nullptr != d_values));
485 int xHalf = dims.
x() / 2;
486 int yHalf = dims.
y() / 2;
487 assert((2 * xHalf) > maxDist);
488 assert((2 * yHalf) > maxDist);
491 size_t maxThreads = 512;
492 size_t windowWidth = 2 * maxDist;
494 size_t threads = (windowWidth < maxThreads) ?
ceilPow2(windowWidth) : maxThreads;
495 dim3 dimBlock(threads, 1, 1);
496 dim3 dimGrid(dims.
n(), 1, 1);
497 auto stream = *(cudaStream_t*)gpu.
stream();
500 int smemSize = 2 * threads *
sizeof(T);
503 return findUniversal2DNearCenter<T, 512><<< dimGrid, dimBlock, smemSize, stream>>>(
504 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
506 return findUniversal2DNearCenter<T, 256><<< dimGrid, dimBlock, smemSize, stream>>>(
507 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
509 return findUniversal2DNearCenter<T, 128><<< dimGrid, dimBlock, smemSize, stream>>>(
510 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
512 return findUniversal2DNearCenter<T, 64><<< dimGrid, dimBlock, smemSize, stream>>>(
513 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
515 return findUniversal2DNearCenter<T, 32><<< dimGrid, dimBlock, smemSize, stream>>>(
516 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
518 return findUniversal2DNearCenter<T, 16><<< dimGrid, dimBlock, smemSize, stream>>>(
519 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
521 return findUniversal2DNearCenter<T, 8><<< dimGrid, dimBlock, smemSize, stream>>>(
522 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
524 return findUniversal2DNearCenter<T, 4><<< dimGrid, dimBlock, smemSize, stream>>>(
525 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
527 return findUniversal2DNearCenter<T, 2><<< dimGrid, dimBlock, smemSize, stream>>>(
528 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
530 return findUniversal2DNearCenter<T, 1><<< dimGrid, dimBlock, smemSize, stream>>>(
531 comp, startVal, d_data, d_positions, d_values, dims.
x(), dims.
y(), maxDist);
CUDA_HD constexpr bool is2D() const
Case or algorithm not implemented yet.
#define REPORT_ERROR(nerr, ErrormMsg)
constexpr bool isPadded() const
CUDA_HD constexpr size_t x() const
CUDA_HD constexpr size_t sizeSingle() const
CUDA_HD constexpr size_t y() const
CUDA_HD constexpr size_t n() const
static size_t ceilPow2(size_t x)