Xmipp  v3.23.11-Nereus
fftwT.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: David Strelak (davidstrelak@gmail.com)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include "../data/fftwT.h"
27 
28 #include "core/xmipp_error.h"
29 #include <array>
30 #include <mutex>
31 
32 // Make sure that the class is initialized
34 
35 static inline auto fftwtMutex = std::mutex();
36 
37 template<typename T>
38 bool FFTwT<T>::needsAuxArray(const FFTSettings<T> &settings) {
39  return (settings.isInPlace() && (0 != settings.sDim().n() % settings.batch()))
40  || !settings.isForward(); // for inverse transforms to preserve input;
41 }
42 
43 template<typename T>
44 void FFTwT<T>::init(const HW &cpu, const FFTSettings<T> &settings, bool reuse) {
45  bool canReuse = m_isInit
46  && reuse
47  // we can reuse if the helper arrays are bigger, or non-existent
48  && (m_settings->sBytesBatch() >= settings.sBytesBatch())
49  && (m_settings->fBytesBatch() >= settings.fBytesBatch())
50  && (needsAuxArray(*m_settings) == needsAuxArray(settings));
51  bool mustAllocate = !canReuse;
52  if (mustAllocate) {
53  release();
54  }
55  // previous plan has to be released, otherwise we will get CPU memory leak
56  release(m_plan);
57  delete m_settings;
58 
59  m_settings = new FFTSettings<T>(settings);
60  try {
61  m_cpu = &dynamic_cast<const CPU&>(cpu);
62  } catch (std::bad_cast&) {
63  REPORT_ERROR(ERR_ARG_INCORRECT, "Instance of CPU expected");
64  }
65 
66  check();
67 
68  m_plan = createPlan(*m_cpu, *m_settings, true);
69  if (mustAllocate) {
70  allocate();
71  }
72 
73  m_isInit = true;
74 }
75 
76 template<typename T>
78  // FIXME DS measure if this is true
79  // assuming that the plan will need 'batch' size
80  return settings.maxBytesBatch();
81 }
82 
83 template<>
84 void* FFTwT<float>::allocateAligned(size_t bytes) {
85  return fftwf_malloc(bytes);
86 }
87 
88 template<>
89 void* FFTwT<double>::allocateAligned(size_t bytes) {
90  return fftw_malloc(bytes);
91 }
92 
93 template<>
94 template<>
95 void FFTwT<double>::release(double *alignedData) {
96  fftw_free(alignedData);
97 }
98 
99 template<>
100 template<>
101 void FFTwT<float>::release(float *alignedData) {
102  fftwf_free(alignedData);
103 }
104 
105 template<>
106 template<>
107 void FFTwT<double>::release(std::complex<double> *alignedData) {
108  fftw_free(alignedData);
109 }
110 
111 template<>
112 template<>
113 void FFTwT<float>::release(std::complex<float> *alignedData) {
114  fftwf_free(alignedData);
115 }
116 
117 template<>
118 void FFTwT<float>::allocate() {
119  if (needsAuxArray(*m_settings)) {
120  m_SD = (float*)allocateAligned(m_settings->sBytesBatch());
121  if (m_settings->isInPlace()) {
122  // input data holds also the output
123  m_FD = (std::complex<float>*)m_SD;
124  } else {
125  // allocate also the output buffer
126  m_FD = (std::complex<float>*)allocateAligned(m_settings->fBytesBatch());
127  }
128  }
129 }
130 
131 template<>
133  if (needsAuxArray(*m_settings)) {
134  m_SD = (double*)allocateAligned(m_settings->sBytesBatch());
135  if (m_settings->isInPlace()) {
136  // input data holds also the output
137  m_FD = (std::complex<double>*)m_SD;
138  } else {
139  // allocate also the output buffer
140  m_FD = (std::complex<double>*)allocateAligned(m_settings->fBytesBatch());
141  }
142  }
143 }
144 
145 template<typename T>
146 void FFTwT<T>::release(void *plan) {
147  if (nullptr != plan) {
148  FFTwT<T>::release(cast(plan));
149  }
150 }
151 
152 template<typename T>
153 void FFTwT<T>::check() {
154  if (m_settings->sDim().x() < 1) {
155  REPORT_ERROR(ERR_LOGIC_ERROR, "X dim must be at least 1 (one)");
156  }
157  if ((m_settings->sDim().y() > 1)
158  && (m_settings->sDim().x() < 2)) {
159  REPORT_ERROR(ERR_LOGIC_ERROR, "X dim must be at least 2 (two) for 2D/3D transformations");
160  }
161  if ((m_settings->sDim().z() > 1)
162  && (m_settings->sDim().y() < 2)) {
163  REPORT_ERROR(ERR_LOGIC_ERROR, "Y dim must be at least 2 (two) for 3D transformations");
164  }
165  if (m_settings->batch() > m_settings->sDim().n()) {
166  REPORT_ERROR(ERR_LOGIC_ERROR, "Batch size must be smaller or equal to number of signals");
167  }
168 }
169 
170 template<typename T>
171 void FFTwT<T>::setDefault() {
172  m_isInit = false;
173  m_settings = nullptr;
174  m_SD = nullptr;
175  m_FD = nullptr;
176  m_plan = nullptr;
177 }
178 
179 template<typename T>
181  release(m_SD, m_FD);
182  release(m_plan);
183  delete m_settings;
184  setDefault();
185 }
186 
187 template<>
188 void FFTwT<double>::release(double *SD, std::complex<double> *FD) {
189  fftw_free(m_SD);
190  if ((void*)m_FD != (void*)m_SD) {
191  fftw_free(m_FD);
192  }
193 }
194 
195 template<>
196 void FFTwT<float>::release(float *SD, std::complex<float> *FD) {
197  fftwf_free(m_SD);
198  if ((void*)m_FD != (void*)m_SD) {
199  fftwf_free(m_FD);
200  }
201 }
202 
203 template<typename T>
204 std::complex<T>* FFTwT<T>::fft(T *inOut) {
205  return fft(inOut, (std::complex<T>*) inOut);
206 }
207 
208 template<typename T>
209 std::complex<T>* FFTwT<T>::fft(const T *in,
210  std::complex<T> *out) {
211  auto isReady = m_isInit && m_settings->isForward();
212  if ( ! isReady) {
213  REPORT_ERROR(ERR_LOGIC_ERROR, "Not ready to perform Fourier Transform. "
214  "Call init() function first");
215  }
216 
217  // process signals in batches
218  for (size_t offset = 0; offset < m_settings->sDim().n(); offset += m_settings->batch()) {
219  size_t signalsToProcess = std::min(m_settings->batch(), m_settings->sDim().n() - offset);
220  size_t signalsToSkip = m_settings->batch() - signalsToProcess;
221  bool useAux = (m_settings->batch() != signalsToProcess) && needsAuxArray(*m_settings);
222 
223  // set in / out pointers for normal case
224  const T *batchSrc = in
225  + (offset - signalsToSkip) * m_settings->sDim().xyzPadded();
226  std::complex<T> *batchDest = out
227  + (offset - signalsToSkip) * m_settings->fDim().xyzPadded();
228 
229  if (useAux) {
230  // copy data to aux array
231  memcpy(m_SD,
232  in + offset * m_settings->sDim().xyzPadded(),
233  signalsToProcess * m_settings->sBytesSingle());
234  // set pointers (we should be here only for in-place transforms)
235  batchSrc = (const T*)m_SD;
236  batchDest = (std::complex<T>*)m_SD;
237  }
238 
239  fft(cast(m_plan), batchSrc, batchDest);
240 
241  if (useAux) {
242  // copy data from aux array (skip already processed data)
243  memcpy(out + offset * m_settings->fDim().xyzPadded(),
244  batchDest,
245  signalsToProcess * m_settings->fBytesSingle());
246  }
247  }
248  return out;
249 }
250 
251 template<typename T>
252 T* FFTwT<T>::ifft(std::complex<T> *inOut) {
253  return ifft(inOut, (T*) inOut);
254 }
255 
256 template<typename T>
257 T* FFTwT<T>::ifft(const std::complex<T> *in,
258  T *out) {
259  auto isReady = m_isInit && ( ! m_settings->isForward());
260  if ( ! isReady) {
261  REPORT_ERROR(ERR_LOGIC_ERROR, "Not ready to perform Inverse Fourier Transform. "
262  "Call init() function first");
263  }
264 
265  // process signals in batches
266  for (size_t offset = 0; offset < m_settings->fDim().n(); offset += m_settings->batch()) {
267  size_t signalsToProcess = std::min(m_settings->batch(), m_settings->fDim().n() - offset);
268  size_t signalsToSkip = m_settings->batch() - signalsToProcess;
269  bool useAux = (m_settings->batch() != signalsToProcess) || needsAuxArray(*m_settings);
270 
271  // copy data
272  memcpy(m_FD,
273  in + offset * m_settings->fDim().xyzPadded(),
274  signalsToProcess * m_settings->fBytesSingle());
275 
276  // set the destination
277  T *batchDest = useAux
278  ? m_SD
279  : out + (offset - signalsToSkip) * m_settings->sDim().xyzPadded();
280 
281  ifft(cast(m_plan), m_FD, batchDest);
282 
283  if (useAux) {
284  // copy data from aux array
285  memcpy(out + offset * m_settings->sDim().xyzPadded(),
286  batchDest,
287  signalsToProcess * m_settings->sBytesSingle());
288  }
289  }
290  return out;
291 }
292 
293 template<>
294 const fftwf_plan FFTwT<float>::createPlan(const CPU &cpu,
295  const FFTSettings<float> &settings,
296  bool isDataAligned) {
297  auto f = [&] (int rank, const int *n, int howmany,
298  void *in, const int *inembed,
299  int istride, int idist,
300  void *out, const int *onembed,
301  int ostride, int odist,
302  unsigned flags) {
303  if (settings.isForward()) {
304  return fftwf_plan_many_dft_r2c(rank, n, howmany,
305  (float *)in, nullptr,
306  1, idist,
307  (fftwf_complex *)out, nullptr,
308  1, odist,
309  flags);
310  } else {
311  return fftwf_plan_many_dft_c2r(rank, n, howmany,
312  (fftwf_complex *)in, nullptr,
313  1, idist,
314  (float *)out, nullptr,
315  1, odist,
316  flags);
317  }
318  };
319  return planHelper<const fftwf_plan>(settings, f, cpu.noOfParallUnits(), isDataAligned);
320 }
321 
322 template<>
323 const fftw_plan FFTwT<double>::createPlan(const CPU &cpu,
324  const FFTSettings<double> &settings,
325  bool isDataAligned) {
326  auto f = [&] (int rank, const int *n, int howmany,
327  void *in, const int *inembed,
328  int istride, int idist,
329  void *out, const int *onembed,
330  int ostride, int odist,
331  unsigned flags) {
332  if (settings.isForward()) {
333  return fftw_plan_many_dft_r2c(rank, n, howmany,
334  (double *)in, nullptr,
335  1, idist,
336  (fftw_complex *)out, nullptr,
337  1, odist,
338  flags);
339  } else {
340  return fftw_plan_many_dft_c2r(rank, n, howmany,
341  (fftw_complex *)in, nullptr,
342  1, idist,
343  (double *)out, nullptr,
344  1, odist,
345  flags);
346  }
347  };
348  auto result = planHelper<const fftw_plan>(settings, f, cpu.noOfParallUnits(), isDataAligned);
349  return result;
350 }
351 
352 template<typename T>
353 template<typename U, typename F>
354 U FFTwT<T>::planHelper(const FFTSettings<T> &settings, F function,
355  int threads,
356  bool isDataAligned) {
357  auto n = std::array<int, 3>{(int)settings.sDim().z(), (int)settings.sDim().y(), (int)settings.sDim().x()};
358  int rank = 3;
359  if (settings.sDim().z() == 1) rank--;
360  if ((2 == rank) && (settings.sDim().y() == 1)) rank--;
361  int offset = 3 - rank;
362 
363  void *in = nullptr;
364  void *out = settings.isInPlace() ? in : &m_mockOut;
365 
366  // no input-preserving algorithms are implemented for multi-dimensional c2r transforms
367  // see http://www.fftw.org/fftw3_doc/Planner-Flags.html#Planner-Flags
368  auto flags = FFTW_ESTIMATE
369  | (settings.isForward() ? FFTW_PRESERVE_INPUT : FFTW_DESTROY_INPUT);
370  if ( ! isDataAligned) {
371  flags = flags | FFTW_UNALIGNED;
372  }
373 
374  int idist;
375  int odist;
376  if (settings.isForward()) {
377  idist = settings.sDim().xyzPadded();
378  odist = settings.fDim().xyzPadded();
379  } else {
380  idist = settings.fDim().xyzPadded();
381  odist = settings.sDim().xyzPadded();
382  }
383 
384  // planning is not thread safe -> lock it
385  std::lock_guard lck(fftwtMutex);
386 
387  // set threads
388  fftw_plan_with_nthreads(threads);
389  fftwf_plan_with_nthreads(threads);
390 
391  auto tmp = function(rank, &n[offset], settings.batch(),
392  in, nullptr,
393  1, idist,
394  out, nullptr,
395  1, odist,
396  flags);
397  return tmp;
398 }
399 
400 template<typename T>
401 void* FFTwT<T>::m_mockOut = {};
402 
403 template<>
404 template<>
405 void FFTwT<float>::release(fftwf_plan plan) {
406  std::lock_guard lck(fftwtMutex);
407  fftwf_destroy_plan(plan);
408  plan = nullptr;
409 }
410 
411 template<>
412 template<>
413 void FFTwT<double>::release(fftw_plan plan) {
414  std::lock_guard lck(fftwtMutex);
415  fftw_destroy_plan(plan);
416  plan = nullptr;
417 }
418 
419 template<>
420 template<>
421 std::complex<float>* FFTwT<float>::fft(const fftwf_plan plan, const float *in, std::complex<float> *out) {
422  // we can remove the const cast, as our plans do not touch input array
423  fftwf_execute_dft_r2c(plan, (float*)in, (fftwf_complex*) out);
424  return out;
425 }
426 
427 template<>
428 template<>
429 std::complex<double>* FFTwT<double>::fft(const fftw_plan plan, const double *in, std::complex<double> *out) {
430  // we can remove the const cast, as our plans do not touch input array
431  fftw_execute_dft_r2c(plan, (double*)in, (fftw_complex*) out);
432  return out;
433 }
434 
435 template<typename T>
436 template<typename P>
437 std::complex<T>* FFTwT<T>::fft(const P plan, T *inOut) {
438  return fft(plan, inOut, (std::complex<T>*)inOut);
439 }
440 
441 template<>
442 template<>
443 float* FFTwT<float>::ifft(const fftwf_plan plan, std::complex<float> *in, float *out) {
444  // we can remove the const cast, as our plans do not touch input array
445  fftwf_execute_dft_c2r(plan, (fftwf_complex*)in, (float*) out);
446  return out;
447 }
448 
449 template<>
450 template<>
451 double* FFTwT<double>::ifft(const fftw_plan plan, std::complex<double> *in, double *out) {
452  // we can remove the const cast, as our plans do not touch input array
453  fftw_execute_dft_c2r(plan, (fftw_complex*)in, (double*) out);
454  return out;
455 }
456 
457 template<typename T>
458 template<typename P>
459 T* FFTwT<T>::ifft(const P plan, std::complex<T> *inOut) {
460  return ifft(plan, inOut, (T*)inOut);
461 }
462 
463 // explicit instantiation
464 template class FFTwT<float>;
465 template class FFTwT<double>;
466 
Mutex mutex
constexpr size_t xyzPadded() const
Definition: dimensions.h:95
void min(Image< double > &op1, const Image< double > &op2)
size_t estimatePlanBytes(const FFTSettings< T > &settings)
Definition: fftwT.cpp:77
constexpr bool isInPlace() const
Definition: fft_settings.h:126
void init(const HW &cpu, const FFTSettings< T > &settings, bool reuse=true)
Definition: fftwT.cpp:44
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
CUDA_HD constexpr size_t z() const
Definition: dimensions.h:69
constexpr size_t fBytesBatch() const
Definition: fft_settings.h:98
static const fftw_plan createPlan(const CPU &cpu, const FFTSettings< double > &settings, bool isDataAligned=false)
void release()
Definition: fftwT.cpp:180
constexpr Dimensions fDim() const
Definition: fft_settings.h:82
Definition: hw.h:35
std::complex< T > * fft(T *inOut)
Definition: fftwT.cpp:204
unsigned noOfParallUnits() const
Definition: hw.h:44
constexpr size_t batch() const
Definition: fft_settings.h:86
CUDA_HD constexpr size_t x() const
Definition: dimensions.h:51
int in
double * f
Incorrect argument received.
Definition: xmipp_error.h:113
static void * allocateAligned(size_t bytes)
FFTwT_Startup fftwt_startup
Definition: fftwT.cpp:33
CUDA_HD constexpr size_t y() const
Definition: dimensions.h:60
constexpr size_t maxBytesBatch() const
Definition: fft_settings.h:130
CUDA_HD constexpr size_t n() const
Definition: dimensions.h:78
constexpr Dimensions sDim() const
Definition: fft_settings.h:78
T * ifft(std::complex< T > *inOut)
Definition: fftwT.cpp:252
Definition: cpu.h:37
constexpr bool isForward() const
Definition: fft_settings.h:122
int * n
constexpr size_t sBytesBatch() const
Definition: fft_settings.h:114
Definition: fftwT.h:60
check(nparam, nf, nfsr, &Linfty, nineq, nineqn, neq, neqn, ncsrl, ncsrn, mode, &modem, eps, bgbnd, param)
Some logical error in the pipeline.
Definition: xmipp_error.h:147