Xmipp  v3.23.11-Nereus
cuda_bspline_geo_transformer.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: David Strelak (davidstrelak@gmail.com)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include <cuda_runtime_api.h>
29 #include "cuda_geo_linear_interpolator.cu"
30 
31 template<typename T>
32 void CudaBSplineGeoTransformer<T>::setSrc(const T *h_data) {
33  const auto& s = this->getSettings();
34  gpuErrchk(cudaMemcpy(
35  m_d_src,
36  h_data,
37  this->getSettings().dims.size() * sizeof(T),
38  cudaMemcpyHostToDevice));
39  this->setIsSrcSet(true);
40 }
41 
42 template<typename T>
43 void CudaBSplineGeoTransformer<T>::initialize(bool doAllocation) {
44  if (doAllocation) {
45  release();
46  }
47  const auto &s = this->getSettings();
48  m_stream = dynamic_cast<GPU*>(s.hw.at(0));
49  if (nullptr == m_stream) {
50  REPORT_ERROR(ERR_LOGIC_ERROR, "Instance of GPU is expected");
51  }
52  m_stream->set();
53 
54 
55  if (doAllocation) {
56  allocate();
57  }
58 }
59 
60 template<typename T>
62  const auto &s = this->getSettings();
63  if (InterpolationDegree::Linear != s.degree) {
64  REPORT_ERROR(ERR_NOT_IMPLEMENTED, "Only linear interpolation is available");
65  }
66  if (s.doWrap) {
67  REPORT_ERROR(ERR_NOT_IMPLEMENTED, "Wrapping is not yet implemented");
68  }
69  if (1 != s.hw.size()) {
70  REPORT_ERROR(ERR_LOGIC_ERROR, "Single GPU (stream) expected");
71  }
72  if ( ! s.keepSrcCopy) {
73  REPORT_ERROR(ERR_LOGIC_ERROR, "'keepSrcCopy' must be set to true");
74  }
75 }
76 
77 template<typename T>
79  bool isReady = this->isInitialized()
80  && this->isSrcSet();
81  if ( ! isReady) {
82  REPORT_ERROR(ERR_LOGIC_ERROR, "Instance is either not initialized or the 'src' has not been set.");
83  }
84  gpuErrchk(cudaMemcpy(
85  m_d_dest,
86  m_d_src,
87  this->getSettings().dims.size() * sizeof(T),
88  cudaMemcpyDeviceToDevice)); // FIXME DS use proper stream
89 }
90 
91 template<typename T>
93  m_d_src = nullptr;
94  m_d_dest = nullptr;
95  m_stream = nullptr;
96  m_d_matrices = nullptr;
97 }
98 
99 template<typename T>
101  gpuErrchk(cudaFree(m_d_src));
102  gpuErrchk(cudaFree(m_d_dest));
103  gpuErrchk(cudaFree(m_d_matrices));
104 
105  setDefault();
106 }
107 
108 template<typename T>
110  const auto& s = this->getSettings();
111  const Dimensions dims = s.dims;
112  size_t bytes = dims.sizePadded() * sizeof(T);
113  gpuErrchk(cudaMalloc(&m_d_src, bytes));
114  gpuErrchk(cudaMalloc(&m_d_dest, bytes));
115 
116  bytes = dims.n() * 9 * sizeof(float);
117  gpuErrchk(cudaMalloc(&m_d_matrices, bytes));
118 }
119 
120 template<typename T>
121 T *CudaBSplineGeoTransformer<T>::interpolate(const std::vector<float> &matrices) {
122  const auto& dims = this->getSettings().dims;
123  size_t bytes = dims.n() * 9 * sizeof(float);
124  auto stream = *(cudaStream_t*)m_stream->stream();
125  gpuErrchk(cudaMemcpyAsync(
126  m_d_matrices,
127  matrices.data(),
128  bytes,
129  cudaMemcpyHostToDevice, stream));
130  dim3 dimBlock(64, 1, 1);
131  dim3 dimGrid(
132  std::ceil(dims.x() / (float)dimBlock.x),
133  std::ceil(dims.y() / (float)dimBlock.y),
134  dims.n());
135 
136  interpolateKernel<<<dimGrid, dimBlock, 0, stream>>> (
137  m_d_src, m_d_dest, m_d_matrices,
138  dims.x(), dims.y());
139  m_stream->synch();
140 
141  return m_d_dest;
142 }
143 
144 template<typename T>
146  const std::vector<float> &weights, size_t firstN,
147  T norm) {
148  const auto& dims = this->getSettings().dims.copyForN(firstN);
149  assert(weights.size() == dims.n());
150  assert(0 < dims.n());
151  dim3 dimBlock(32, 32, 1);
152  dim3 dimGrid(
153  std::ceil(dims.x() / (float)dimBlock.x),
154  std::ceil(dims.y() / (float)dimBlock.y),
155  1);
156  auto stream = *(cudaStream_t*)m_stream->stream();
157 
158  // copy weights
159  T *d_weights;
160  size_t bytesWeight = weights.size() * sizeof(float);
161  gpuErrchk(cudaMalloc(&d_weights, bytesWeight));
162  gpuErrchk(cudaMemcpyAsync(
163  d_weights,
164  weights.data(),
165  bytesWeight,
166  cudaMemcpyHostToDevice, stream));
167 
168  sumKernel<<<dimGrid, dimBlock, 0, stream>>> (
169  m_d_dest, m_d_src, d_weights,// FIXME DS this will damage src data
170  dims.x(), dims.y(), dims.n(), norm);
171 
172  size_t bytes = dims.sizeSingle() * sizeof(T);
173  gpuErrchk(cudaMemcpyAsync(
174  dest,
175  m_d_src,
176  bytes,
177  cudaMemcpyDeviceToHost, stream));
178 
179  gpuErrchk(cudaFree(d_weights));
180  m_stream->synch();
181 
182 }
183 
184 // explicit instantiation
185 template class CudaBSplineGeoTransformer<float>;
186 template class CudaBSplineGeoTransformer<double>;
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
T * interpolate(const std::vector< float > &matrices) override
Case or algorithm not implemented yet.
Definition: xmipp_error.h:177
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
T norm(const std::vector< T > &v)
Definition: vector_ops.h:399
constexpr size_t sizePadded() const
Definition: dimensions.h:108
CUDA_HD constexpr size_t n() const
Definition: dimensions.h:78
void setSrc(const T *data) override
Definition: gpu.h:36
void sum(T *dest, const std::vector< float > &weights, size_t firstN, T norm) override
Some logical error in the pipeline.
Definition: xmipp_error.h:147