29 #include <cuda_runtime_api.h> 31 #include <thrust/transform_reduce.h> 32 #include <thrust/for_each.h> 33 #include <thrust/remove.h> 34 #include <thrust/execution_policy.h> 39 #include "cuda_volume_restoration_kernels.cu" 43 template<
typename T >
45 T weightPower,
int weightFun) {
47 unsigned block_size = 256;
49 dim3 dimBlock{ block_size };
50 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
52 computeWeightsKernel<<<dimGrid, dimBlock>>>(d_Vfiltered1, d_Vfiltered2, d_V1r, d_V2r, d_S, cdf_mN.
d_x,
57 template<
typename T >
59 unsigned block_size = 256;
61 dim3 dimBlock{ block_size };
62 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
64 filterFourierVolumeKernel<<<dimGrid, dimBlock>>>(d_R2, (
vec2_type<T>*)d_fV, (
vec2_type<T>*)d_buffer, volume_size, w2, w2Step);
68 template<
typename T >
70 unsigned block_size = 256;
72 dim3 dimBlock{ block_size };
73 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
75 computeAveragePositivityKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, d_S, volume_size,
static_cast<T
>(1.0) / volume_size);
79 template<
typename T >
81 unsigned block_size = 256;
83 dim3 dimBlock{ block_size };
84 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
86 computeAveragePositivityKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, d_S, d_mask, volume_size,
static_cast<T
>(1.0) / volume_size);
90 template<
typename T >
92 unsigned block_size = 256;
94 dim3 dimBlock{ block_size };
95 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
97 filterSKernel<<<dimGrid, dimBlock>>>(d_R2, (
vec2_type<T>*)d_fVol, volume_size);
101 template<
typename T >
103 auto k = [] __device__ (
int x) {
107 thrust::copy_if(thrust::device, d_S, d_S + volume_size, d_mask, d_aux,
k);
110 template<
typename T >
112 unsigned block_size = 256;
114 dim3 dimBlock{ block_size };
115 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
117 maskWithNoiseProbabilityKernel<<<dimGrid, dimBlock>>>(d_V, cdf_S.
d_x,
123 template<
typename T >
125 unsigned block_size = 256;
127 dim3 dimBlock{ block_size };
128 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(fourier_size) / dimBlock.x) ) };
130 deconvolveRestoredKernel<<<dimGrid, dimBlock>>>((
vec2_type<T>*)d_fVol, (
vec2_type<T>*)d_fV1, (
vec2_type<T>*)d_fV2, d_R2, K1, K2, lambda, fourier_size, static_cast<T>(1.0) / volume_size);
134 template<
typename T >
136 unsigned block_size = 256;
138 dim3 dimBlock{ block_size };
139 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
141 convolveFourierVolumeKernel<<<dimGrid, dimBlock>>>((
vec2_type<T>*)d_fVol, d_R2, K, volume_size);
145 template<
typename T >
147 unsigned block_size = 256;
149 dim3 dimBlock{ block_size };
150 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
152 normalizeForFFTKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, volume_size,
static_cast<T
>(1.0) / volume_size);
156 template<
typename T >
158 unsigned block_size = 256;
160 dim3 dimBlock{ block_size };
161 dim3 dimGrid{
static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
163 normalizeForFFTKernel<<<dimGrid, dimBlock>>>(d_V1, volume_size,
static_cast<T
>(1.0) / volume_size);
167 template<
typename T >
169 const T inv_size = 1.0 / (2 * fourier_size);
175 auto error_func = [=] __device__ (
int n) {
176 const T R2n = d_R2[
n];
178 const T H1 = exp(K1 * R2n);
179 const T H2 = exp(K2 * R2n);
181 const T diff1_x = (d_fVol[
n].x*H1 - d_fV1[
n].x) * inv_size;
182 const T diff1_y = (d_fVol[
n].y*H1 - d_fV1[
n].y) * inv_size;
184 const T diff2_x = (d_fVol[
n].x*H2 - d_fV2[
n].x) * inv_size;
185 const T diff2_y = (d_fVol[
n].y*H2 - d_fV2[
n].y) * inv_size;
187 return sqrt(diff1_x*diff1_x + diff1_y*diff1_y) +
sqrt(diff2_x*diff2_x + diff2_y*diff2_y);
190 return static_cast<T
>(0.0);
193 error = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(fourier_size), error_func, static_cast<T>(0), thrust::plus<T>());
196 template<
typename T >
198 auto k = [=] __device__ (
int n) {
199 d_N[
n] = d_V1[
n] - d_V2[
n];
200 d_S[
n] = (d_V1[
n] + d_V2[
n]) * static_cast<T>(0.5);
203 thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size,
k);
206 template<
typename T >
211 std = std / size - avg * avg;
212 std *=
static_cast<T
>(size) / (size - 1);
221 template<
typename T >
223 const T avg = thrust::reduce(thrust::device, d_N, d_N + volume_size);
225 auto square_kernel = [=] __device__ (T
x) {
229 const T
std = thrust::transform_reduce(thrust::device, d_N, d_N + volume_size, square_kernel, static_cast<T>(0), thrust::plus<T>());
234 template<
typename T >
236 auto masked_k = [=] __device__ (
int n) {
240 return static_cast<T
>(0);
244 const T avg = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(volume_size), masked_k,
245 static_cast<T>(0), thrust::plus<T>());
247 auto masked_square_k = [=] __device__ (
int n) {
249 return d_N[
n] * d_N[
n];
251 return static_cast<T
>(0);
255 const T
std = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(volume_size), masked_square_k,
256 static_cast<T>(0), thrust::plus<T>());
261 template<
typename T >
263 auto ker = [=] __device__ (
int n) {
265 const T
w = exp(k * Nn * Nn);
267 d_V1[
n] = s + (d_V1[
n] - s) * w;
268 d_V2[
n] = s + (d_V2[
n] - s) * w;
271 thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size, ker);
274 template<
typename T >
276 return thrust::reduce(thrust::device, d_mask, d_mask + volume_size);
279 template<
typename T >
281 auto k = [=] __device__ (
int n) {
282 d_array[
n] = d_array[
n] *
c;
285 thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size,
k);
static void deconvolveRestored(std::complex< T > *d_fVol, std::complex< T > *d_fV1, std::complex< T > *d_fV2, const T *d_R2, T K1, T K2, T lambda, size_t volume_size, size_t fourier_size)
void sqrt(Image< double > &op)
static std::pair< T, T > computeAvgStdWithMask(const T *__restrict__ d_N, const int *__restrict__ d_mask, size_t mask_size, size_t volume_size)
static void maskWithNoiseProbability(T *d_V, const Gpu::CDF< T > &cdf_S, const Gpu::CDF< T > &cdf_N, size_t volume_size)
void abs(Image< double > &op)
static void normalizeForFFT(T *d_V1, T *d_V2, size_t volume_size)
static void computeAveragePositivity(const T *d_V1, const T *d_V2, T *d_S, size_t volume_size)
static void restorationSigmaCostError(T &error, const std::complex< T > *d_fVol, const std::complex< T > *d_fV1, const std::complex< T > *d_fV2, const T *__restrict__ d_R2, T K1, T K2, size_t fourier_size)
static size_t computeMaskSize(const int *__restrict__ d_mask, size_t volume_size)
static void filterS(const T *d_R2, std::complex< T > *d_fVol, size_t volume_size)
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
static void computeDifference(T *__restrict__ d_V1, T *__restrict__ d_V2, const T *__restrict__ d_S, const T *__restrict__ d_N, T k, size_t volume_size)
std::pair< T, T > normAvgStd(T avg, T std, size_t size)
static void maskForCDF(T *__restrict__ d_aux, const T *__restrict__ d_S, const int *__restrict__ d_mask, size_t volume_size)
static void multiplyByConstant(T *__restrict__ d_array, T c, size_t volume_size)
typename Vec2< T >::type vec2_type
static void convolveFourierVolume(std::complex< T > *d_fVol, const T *d_R2, T K, size_t volume_size)
static void computeDiffAndAverage(const T *__restrict__ d_V1, const T *__restrict__ d_V2, T *__restrict__ d_S, T *__restrict__ d_N, size_t volume_size)
static void filterFourierVolume(const T *d_R2, const std::complex< T > *d_fV, std::complex< T > *d_buffer, size_t volume_size, T w2, T w2Step)
static std::pair< T, T > computeAvgStd(const T *__restrict__ d_N, size_t volume_size)
static void computeWeights(const T *d_Vfiltered1, const T *d_Vfiltered2, T *d_V1r, T *d_V2r, T *d_S, size_t volume_size, const Gpu::CDF< T > &cdf_mN, T weightPower, int weightFun)