Xmipp  v3.23.11-Nereus
cuda_gpu_geo_shift_transformer.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: David Strelak (davidstrelak@gmail.com)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include <cuda_runtime_api.h>
28 #include "cuda_xmipp_utils.cpp"
29 #include "cuda_gpu_geo_shift_transformer.cu"
30 
31 template class GeoShiftTransformer<float> ;
32 
33 template<typename T>
35  delete imgs;
36  imgs = nullptr;
37  delete ffts;
38  ffts = nullptr;
39 
40  fftHandle.clear();
41  ifftHandle.clear();
42 
43  // do not release stream, it has been passed by pointer, so we don't own it
44  stream = nullptr;
45  device = -1;
46 
47  m_gpu = nullptr;
48 
49  isReady = false;
50 }
51 
52 template<typename T>
53 void GeoShiftTransformer<T>::init(const GPU &gpu, size_t x, size_t y, size_t n, int device,
54  myStreamHandle* stream) {
55  release();
56 
57  this->device = device;
58  this->stream = stream;
59 
60  m_gpu = &gpu; // FIXME DS implement set() check
61  this->imgs = new GpuMultidimArrayAtGpu<T>(x, y, 1, n);
62  this->ffts = new GpuMultidimArrayAtGpu<std::complex<T> >(x / 2 + 1, y, 1, n);
63 
64  fftHandle.ptr = new cufftHandle;
65  ifftHandle.ptr = new cufftHandle;
66  if (this->stream) {
67  createPlanFFTStream(x, y, n, 1, true, (cufftHandle*)fftHandle.ptr, *stream);
68  createPlanFFTStream(x, y, n, 1, false, (cufftHandle*)ifftHandle.ptr, *stream);
69  } else {
70  createPlanFFT(x, y, n, 1, true, (cufftHandle*)fftHandle.ptr);
71  createPlanFFT(x, y, n, 1, false, (cufftHandle*)ifftHandle.ptr);
72  }
73 
74  isReady = true;
75 }
76 
77 template<typename T>
78 void GeoShiftTransformer<T>::initLazy(const GPU &gpu, size_t x, size_t y, size_t n, int device,
79  myStreamHandle* stream) {
80  if (!isReady) {
81  init(gpu, x, y, n, device, stream);
82  }
83 }
84 
85 template<typename T>
86 template<typename T_IN>
88  const MultidimArray<T_IN> &input, T shiftX, T shiftY) {
89  checkRestrictions(output, input);
90  if (output.xdim == 0) {
91  output.resizeNoCopy(input.ndim, input.zdim, input.ydim, input.xdim);
92  }
93  if (shiftX == 0 && shiftY == 0) {
94  typeCast(input, output);
95  return;
96  }
97 
99 
100  MultidimArray<T> tmp;
101  typeCast(input, tmp);
102  imgs->copyToGpu(tmp.data);
103  if(stream) {
104  imgs->fftStream(*ffts, fftHandle, *stream, false, mask);
105  } else {
106  imgs->fft(*ffts, fftHandle);
107  }
108 
109  dim3 dimBlock(BLOCK_DIM_X, BLOCK_DIM_X);
110  dim3 dimGrid(ceil(ffts->Xdim / (T) dimBlock.x), ceil(ffts->Ydim / (T) dimBlock.y));
111 
112  if (stream) {
113  shiftFFT2D<true><<<dimGrid, dimBlock, 0, *(cudaStream_t*)stream->ptr>>>(
114  (float2*)ffts->d_data, ffts->Ndim, ffts->Xdim, imgs->Xdim,
115  imgs->Ydim, shiftX, shiftY);
116  } else {
117  shiftFFT2D<true><<<dimGrid, dimBlock>>>((float2*)ffts->d_data,
118  ffts->Ndim, ffts->Xdim, imgs->Xdim, imgs->Ydim, shiftX, shiftY);
119  }
120  gpuErrchk(cudaPeekAtLastError());
121 
122  if (stream) {
123  ffts->ifftStream(*imgs, ifftHandle, *stream, false, mask);
124  } else {
125  ffts->ifft(*imgs, ifftHandle);
126  }
127  imgs->copyToCpu(output.data);
128 }
129 
130 template<typename T>
132  int offsetX = 5;
133  int offsetY = -7;
134  size_t xSize = 2048;
135  size_t ySize = 3072;
136  MultidimArray<float> resGpu(ySize, xSize);
137  MultidimArray<float> expected(ySize, xSize);
138  MultidimArray<float> input(ySize, xSize);
139  for (int y = 10; y < 15; ++y) {
140  for (int x = 10; x < 15; ++x) {
141  int indexIn = (y * input.xdim) + x;
142  int indexExp = ((y + offsetY) * input.xdim) + (x + offsetX);
143  input.data[indexIn] = 10;
144  expected.data[indexExp] = 10;
145  }
146  }
147 
149  auto gpu = GPU();
150  tr.initLazy(gpu, input.xdim, input.ydim, 1, 0);
151  tr.applyShift(resGpu, input, offsetX, offsetY);
152 
153  bool error = false;
154  for (int y = 0; y < expected.ydim; ++y) {
155  for (int x = 0; x < expected.xdim; ++x) {
156  int index = (y * input.xdim) + x;
157  float gpu = resGpu.data[index];
158  float cpu = expected.data[index];
159  float threshold = std::max(gpu, cpu) / 1000.f;
160  float diff = std::abs(cpu - gpu);
161  if (diff > threshold && diff > 0.001) {
162  error = true;
163  printf("%d gpu %.4f cpu %.4f (%f > %f)\n", index, gpu, cpu, diff, threshold);
164  }
165  }
166  }
167 #ifdef DEBUG
168  Image<float> img(expected.xdim, expected.ydim);
169  img.data = expected;
170  img.write("expected.vol");
171  img.data = resGpu;
172  img.write("resGpu.vol");
173 #endif
174  printf("\n SHIFT %s\n", error ? "FAILED" : "OK");
175 }
176 
177 template<typename T>
178 template<typename T_IN>
180  const MultidimArray<T_IN> &input) {
181  if (!isReady)
182  throw std::logic_error("Shift transformer: Not initialized");
183 
184  if (input.nzyxdim == 0)
185  throw std::invalid_argument("Shift transformer: Input is empty");
186 
187  if ((imgs->Xdim != input.xdim) || (imgs->Ydim != input.ydim)
188  || (imgs->Zdim != input.zdim) || (imgs->Ndim != input.ndim))
189  throw std::logic_error(
190  "Shift transformer: Initialized for different sizes");
191 
192  if (((output.xdim != input.xdim) || (output.ydim != input.ydim)
193  || (output.zdim != input.zdim) || (output.ndim != input.ndim))
194  && (output.nzyxdim != 0))
195  throw std::logic_error(
196  "Shift transformer: Input/output dimensions do not match");
197 
198  if (&input == (MultidimArray<T_IN>*) &output)
199  throw std::invalid_argument(
200  "Shift transformer: The input array cannot be the same as the output array");
201 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
void applyShift(MultidimArray< T > &output, const MultidimArray< T_IN > &input, T shiftX, T shiftY)
void resizeNoCopy(const MultidimArray< T1 > &v)
static double * y
void write(const FileName &name="", size_t select_img=ALL_IMAGES, bool isStack=false, int mode=WRITE_OVERWRITE, CastWriteMode castMode=CW_CAST, int _swapWrite=0)
void abs(Image< double > &op)
void createPlanFFTStream(int Xdim, int Ydim, int Ndim, int Zdim, bool forward, cufftHandle *plan, myStreamHandle &myStream)
#define BLOCK_DIM_X
doublereal * x
int cufftHandle
Definition: cuda_fft.h:41
void threshold(double *phi, unsigned long nvox, double limit)
Definition: lib_vwk.cpp:524
MultidimArray< T > data
Definition: xmipp_image.h:55
viol index
void max(Image< double > &op1, const Image< double > &op2)
void init(const GPU &gpu, size_t x, size_t y, size_t n, int device, myStreamHandle *stream)
void typeCast(const Matrix1D< T1 > &v1, Matrix1D< T2 > &v2)
Definition: matrix1d.h:1227
void createPlanFFT(int Xdim, int Ydim, int Ndim, int Zdim, bool forward, cufftHandle *plan)
Definition: gpu.h:36
int * n
void initLazy(const GPU &gpu, size_t x, size_t y, size_t n, int device, myStreamHandle *stream=NULL)