Xmipp  v3.23.11-Nereus
cuda_volume_restoration_kernels.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Martin Horacek (horacek1martin@gmail.com)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
26 
27 #include <iostream>
28 
29 #include <cuda_runtime_api.h>
30 
31 #include <thrust/transform_reduce.h>
32 #include <thrust/for_each.h>
33 #include <thrust/remove.h>
34 #include <thrust/execution_policy.h>
35 
36 #include "cuda_asserts.h"
37 #include "cuda_vec2.h"
38 
39 #include "cuda_volume_restoration_kernels.cu"
40 
41 namespace Gpu {
42 
43 template< typename T >
44 void VolumeRestorationKernels<T>::computeWeights(const T* d_Vfiltered1, const T* d_Vfiltered2, T* d_V1r, T* d_V2r, T* d_S, size_t volume_size, const Gpu::CDF<T>& cdf_mN,
45  T weightPower, int weightFun) {
46 
47  unsigned block_size = 256;
48 
49  dim3 dimBlock{ block_size };
50  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
51 
52  computeWeightsKernel<<<dimGrid, dimBlock>>>(d_Vfiltered1, d_Vfiltered2, d_V1r, d_V2r, d_S, cdf_mN.d_x,
53  cdf_mN.d_probXLessThanx, cdf_mN.d_V, volume_size, cdf_mN.Nsteps, weightPower, weightFun);
54  gpuErrchk(cudaPeekAtLastError());
55 }
56 
57 template< typename T >
58 void VolumeRestorationKernels<T>::filterFourierVolume(const T* d_R2, const std::complex<T>* d_fV, std::complex<T>* d_buffer, size_t volume_size, T w2, T w2Step) {
59  unsigned block_size = 256;
60 
61  dim3 dimBlock{ block_size };
62  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
63 
64  filterFourierVolumeKernel<<<dimGrid, dimBlock>>>(d_R2, (vec2_type<T>*)d_fV, (vec2_type<T>*)d_buffer, volume_size, w2, w2Step);
65  gpuErrchk(cudaPeekAtLastError());
66 }
67 
68 template< typename T >
69 void VolumeRestorationKernels<T>::computeAveragePositivity(const T* d_V1, const T* d_V2, T* d_S, size_t volume_size) {
70  unsigned block_size = 256;
71 
72  dim3 dimBlock{ block_size };
73  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
74 
75  computeAveragePositivityKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, d_S, volume_size, static_cast<T>(1.0) / volume_size);
76  gpuErrchk(cudaPeekAtLastError());
77 }
78 
79 template< typename T >
80 void VolumeRestorationKernels<T>::computeAveragePositivity(const T* d_V1, const T* d_V2, T* d_S, const int* d_mask, size_t volume_size) {
81  unsigned block_size = 256;
82 
83  dim3 dimBlock{ block_size };
84  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
85 
86  computeAveragePositivityKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, d_S, d_mask, volume_size, static_cast<T>(1.0) / volume_size);
87  gpuErrchk(cudaPeekAtLastError());
88 }
89 
90 template< typename T >
91 void VolumeRestorationKernels<T>::filterS(const T* d_R2, std::complex<T>* d_fVol, size_t volume_size) {
92  unsigned block_size = 256;
93 
94  dim3 dimBlock{ block_size };
95  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
96 
97  filterSKernel<<<dimGrid, dimBlock>>>(d_R2, (vec2_type<T>*)d_fVol, volume_size);
98  gpuErrchk(cudaPeekAtLastError());
99 }
100 
101 template< typename T >
102 void VolumeRestorationKernels<T>::maskForCDF(T* __restrict__ d_aux, const T* __restrict__ d_S, const int* __restrict__ d_mask, size_t volume_size) {
103  auto k = [] __device__ (int x) {
104  return x;
105  };
106 
107  thrust::copy_if(thrust::device, d_S, d_S + volume_size, d_mask, d_aux, k);
108 }
109 
110 template< typename T >
111 void VolumeRestorationKernels<T>::maskWithNoiseProbability(T* d_V, const Gpu::CDF<T>& cdf_S, const Gpu::CDF<T>& cdf_N, size_t volume_size) {
112  unsigned block_size = 256;
113 
114  dim3 dimBlock{ block_size };
115  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
116 
117  maskWithNoiseProbabilityKernel<<<dimGrid, dimBlock>>>(d_V, cdf_S.d_x,
118  cdf_S.d_probXLessThanx, cdf_S.d_V, cdf_S.Nsteps, cdf_S.volume_size, cdf_N.d_x,
119  cdf_N.d_probXLessThanx, cdf_N.d_V, cdf_N.Nsteps, cdf_N.volume_size);
120  gpuErrchk(cudaPeekAtLastError());
121 }
122 
123 template< typename T >
124 void VolumeRestorationKernels<T>::deconvolveRestored(std::complex<T>* d_fVol, std::complex<T>* d_fV1, std::complex<T>* d_fV2, const T* d_R2, T K1, T K2, T lambda, size_t volume_size, size_t fourier_size) {
125  unsigned block_size = 256;
126 
127  dim3 dimBlock{ block_size };
128  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(fourier_size) / dimBlock.x) ) };
129 
130  deconvolveRestoredKernel<<<dimGrid, dimBlock>>>((vec2_type<T>*)d_fVol, (vec2_type<T>*)d_fV1, (vec2_type<T>*)d_fV2, d_R2, K1, K2, lambda, fourier_size, static_cast<T>(1.0) / volume_size);
131  gpuErrchk(cudaPeekAtLastError());
132 }
133 
134 template< typename T >
135 void VolumeRestorationKernels<T>::convolveFourierVolume(std::complex<T>* d_fVol, const T* d_R2, T K, size_t volume_size) {
136  unsigned block_size = 256;
137 
138  dim3 dimBlock{ block_size };
139  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
140 
141  convolveFourierVolumeKernel<<<dimGrid, dimBlock>>>((vec2_type<T>*)d_fVol, d_R2, K, volume_size);
142  gpuErrchk(cudaPeekAtLastError());
143 }
144 
145 template< typename T >
146 void VolumeRestorationKernels<T>::normalizeForFFT(T* d_V1, T* d_V2, size_t volume_size) {
147  unsigned block_size = 256;
148 
149  dim3 dimBlock{ block_size };
150  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
151 
152  normalizeForFFTKernel<<<dimGrid, dimBlock>>>(d_V1, d_V2, volume_size, static_cast<T>(1.0) / volume_size);
153  gpuErrchk(cudaPeekAtLastError());
154 }
155 
156 template< typename T >
157 void VolumeRestorationKernels<T>::normalizeForFFT(T* d_V1, size_t volume_size) {
158  unsigned block_size = 256;
159 
160  dim3 dimBlock{ block_size };
161  dim3 dimGrid{ static_cast<unsigned>( ceil( static_cast<double>(volume_size) / dimBlock.x) ) };
162 
163  normalizeForFFTKernel<<<dimGrid, dimBlock>>>(d_V1, volume_size, static_cast<T>(1.0) / volume_size);
164  gpuErrchk(cudaPeekAtLastError());
165 }
166 
167 template< typename T >
168 void VolumeRestorationKernels<T>::restorationSigmaCostError(T& error, const std::complex<T>* _d_fVol, const std::complex<T>* _d_fV1, const std::complex<T>* _d_fV2, const T* __restrict__ d_R2, T K1, T K2, size_t fourier_size) {
169  const T inv_size = 1.0 / (2 * fourier_size);
170 
171  const vec2_type<T>* __restrict__ d_fVol = (vec2_type<T>*)_d_fVol;
172  const vec2_type<T>* __restrict__ d_fV1 = (vec2_type<T>*)_d_fV1;
173  const vec2_type<T>* __restrict__ d_fV2 = (vec2_type<T>*)_d_fV2;
174 
175  auto error_func = [=] __device__ (int n) {
176  const T R2n = d_R2[n];
177  if (R2n <= 0.25) {
178  const T H1 = exp(K1 * R2n);
179  const T H2 = exp(K2 * R2n);
180 
181  const T diff1_x = (d_fVol[n].x*H1 - d_fV1[n].x) * inv_size;
182  const T diff1_y = (d_fVol[n].y*H1 - d_fV1[n].y) * inv_size;
183 
184  const T diff2_x = (d_fVol[n].x*H2 - d_fV2[n].x) * inv_size;
185  const T diff2_y = (d_fVol[n].y*H2 - d_fV2[n].y) * inv_size;
186 
187  return sqrt(diff1_x*diff1_x + diff1_y*diff1_y) + sqrt(diff2_x*diff2_x + diff2_y*diff2_y);
188  }
189 
190  return static_cast<T>(0.0);
191  };
192 
193  error = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(fourier_size), error_func, static_cast<T>(0), thrust::plus<T>());
194 }
195 
196 template< typename T >
197 void VolumeRestorationKernels<T>::computeDiffAndAverage(const T* __restrict__ d_V1, const T* __restrict__ d_V2, T* __restrict__ d_S, T* __restrict__ d_N, size_t volume_size) {
198  auto k = [=] __device__ (int n) {
199  d_N[n] = d_V1[n] - d_V2[n];
200  d_S[n] = (d_V1[n] + d_V2[n]) * static_cast<T>(0.5);
201  };
202 
203  thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size, k);
204 }
205 
206 template< typename T >
207 std::pair<T, T> normAvgStd(T avg, T std, size_t size) {
208  avg /= size;
209 
210  if (size > 1) {
211  std = std / size - avg * avg;
212  std *= static_cast<T>(size) / (size - 1);
213  std = sqrt(abs(std));
214  } else {
215  std = 0;
216  }
217 
218  return { avg, std };
219 }
220 
221 template< typename T >
222 std::pair<T, T> VolumeRestorationKernels<T>::computeAvgStd(const T* __restrict__ d_N, size_t volume_size) {
223  const T avg = thrust::reduce(thrust::device, d_N, d_N + volume_size);
224 
225  auto square_kernel = [=] __device__ (T x) {
226  return x * x;
227  };
228 
229  const T std = thrust::transform_reduce(thrust::device, d_N, d_N + volume_size, square_kernel, static_cast<T>(0), thrust::plus<T>());
230 
231  return normAvgStd(avg, std, volume_size);
232 }
233 
234 template< typename T >
235 std::pair<T, T> VolumeRestorationKernels<T>::computeAvgStdWithMask(const T* __restrict__ d_N, const int* __restrict__ d_mask, size_t mask_size, size_t volume_size) {
236  auto masked_k = [=] __device__ (int n) {
237  if (d_mask[n]) {
238  return d_N[n];
239  } else {
240  return static_cast<T>(0);
241  }
242  };
243 
244  const T avg = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(volume_size), masked_k,
245  static_cast<T>(0), thrust::plus<T>());
246 
247  auto masked_square_k = [=] __device__ (int n) {
248  if (d_mask[n]) {
249  return d_N[n] * d_N[n];
250  } else {
251  return static_cast<T>(0);
252  }
253  };
254 
255  const T std = thrust::transform_reduce(thrust::device, thrust::counting_iterator<int>(0), thrust::counting_iterator<int>(volume_size), masked_square_k,
256  static_cast<T>(0), thrust::plus<T>());
257 
258  return normAvgStd(avg, std, mask_size);
259 }
260 
261 template< typename T >
262 void VolumeRestorationKernels<T>::computeDifference(T* __restrict__ d_V1, T* __restrict__ d_V2, const T* __restrict__ d_S, const T* __restrict__ d_N, T k, size_t volume_size) {
263  auto ker = [=] __device__ (int n) {
264  const T Nn = d_N[n];
265  const T w = exp(k * Nn * Nn);
266  const T s = d_S[n];
267  d_V1[n] = s + (d_V1[n] - s) * w;
268  d_V2[n] = s + (d_V2[n] - s) * w;
269  };
270 
271  thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size, ker);
272 }
273 
274 template< typename T >
275 size_t VolumeRestorationKernels<T>::computeMaskSize(const int* __restrict__ d_mask, size_t volume_size) {
276  return thrust::reduce(thrust::device, d_mask, d_mask + volume_size);
277 }
278 
279 template< typename T >
280 void VolumeRestorationKernels<T>::multiplyByConstant(T* __restrict__ d_array, T c, size_t volume_size) {
281  auto k = [=] __device__ (int n) {
282  d_array[n] = d_array[n] * c;
283  };
284 
285  thrust::for_each_n(thrust::device, thrust::counting_iterator<int>(0), volume_size, k);
286 }
287 
288 template class VolumeRestorationKernels<double>;
289 template class VolumeRestorationKernels<float>;
290 
291 } // namespace Gpu
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
doublereal * c
static void deconvolveRestored(std::complex< T > *d_fVol, std::complex< T > *d_fV1, std::complex< T > *d_fV2, const T *d_R2, T K1, T K2, T lambda, size_t volume_size, size_t fourier_size)
void sqrt(Image< double > &op)
static std::pair< T, T > computeAvgStdWithMask(const T *__restrict__ d_N, const int *__restrict__ d_mask, size_t mask_size, size_t volume_size)
static void maskWithNoiseProbability(T *d_V, const Gpu::CDF< T > &cdf_S, const Gpu::CDF< T > &cdf_N, size_t volume_size)
doublereal * w
void abs(Image< double > &op)
static void normalizeForFFT(T *d_V1, T *d_V2, size_t volume_size)
static void computeAveragePositivity(const T *d_V1, const T *d_V2, T *d_S, size_t volume_size)
static void restorationSigmaCostError(T &error, const std::complex< T > *d_fVol, const std::complex< T > *d_fV1, const std::complex< T > *d_fV2, const T *__restrict__ d_R2, T K1, T K2, size_t fourier_size)
static size_t computeMaskSize(const int *__restrict__ d_mask, size_t volume_size)
static void filterS(const T *d_R2, std::complex< T > *d_fVol, size_t volume_size)
doublereal * x
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
static void computeDifference(T *__restrict__ d_V1, T *__restrict__ d_V2, const T *__restrict__ d_S, const T *__restrict__ d_N, T k, size_t volume_size)
std::pair< T, T > normAvgStd(T avg, T std, size_t size)
T * d_x
Definition: cuda_cdf.h:37
static void maskForCDF(T *__restrict__ d_aux, const T *__restrict__ d_S, const int *__restrict__ d_mask, size_t volume_size)
double * lambda
size_t volume_size
Definition: cuda_cdf.h:40
Definition: cuda_cdf.cpp:38
T * d_probXLessThanx
Definition: cuda_cdf.h:38
static void multiplyByConstant(T *__restrict__ d_array, T c, size_t volume_size)
typename Vec2< T >::type vec2_type
Definition: cuda_vec2.h:20
static void convolveFourierVolume(std::complex< T > *d_fVol, const T *d_R2, T K, size_t volume_size)
T * d_V
Definition: cuda_cdf.h:36
static void computeDiffAndAverage(const T *__restrict__ d_V1, const T *__restrict__ d_V2, T *__restrict__ d_S, T *__restrict__ d_N, size_t volume_size)
constexpr int K
T Nsteps
Definition: cuda_cdf.h:43
static void filterFourierVolume(const T *d_R2, const std::complex< T > *d_fV, std::complex< T > *d_buffer, size_t volume_size, T w2, T w2Step)
int * n
static std::pair< T, T > computeAvgStd(const T *__restrict__ d_N, size_t volume_size)
static void computeWeights(const T *d_Vfiltered1, const T *d_Vfiltered2, T *d_V1r, T *d_V2r, T *d_S, size_t volume_size, const Gpu::CDF< T > &cdf_mN, T weightPower, int weightFun)