mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
psdFilterCuda.hpp
Go to the documentation of this file.
1 /** \file psdFilterCuda.hpp
2  * \brief Declares and defines a class for filtering with PSDs on CUDA GPUs
3  * \ingroup signal_processing_files
4  * \author Jared R. Males (jaredmales@gmail.com)
5  *
6  */
7 
8 //***********************************************************************//
9 // Copyright 2018 Jared R. Males (jaredmales@gmail.com)
10 //
11 // This file is part of mxlib.
12 //
13 // mxlib is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
17 //
18 // mxlib is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
22 //
23 // You should have received a copy of the GNU General Public License
24 // along with mxlib. If not, see <http://www.gnu.org/licenses/>.
25 //***********************************************************************//
26 
27 #ifndef psdFilterCuda_hpp
28 #define psdFilterCuda_hpp
29 
30 #include "../cuda/templateCudaPtr.hpp"
31 #include "../cuda/templateCuda.hpp"
32 
33 #include "psdFilter.hpp"
34 
35 #include <cufft.h>
36 #include <cufftXt.h>
37 //#include <helper_functions.h>
38 #include <helper_cuda.h>
39 
40 
41 namespace mx
42 {
43 namespace sigproc
44 {
45 
46 
47 /// A class for filtering noise with PSDs using CUDA cuFFT
48 /** The square-root of the PSD is maintained by this class, either as a pointer to an external array or using internally allocated memory (which will be
49  * de-allocated on destruction.
50  *
51  * PSD Normalization: the PSD used for this needs to be normalized properly to produce filtered noise with the correct statistics. Given an
52  * array of size N which contains the PSD as "power per unit frequency" vs frequency (i.e. the PSD) on some frequency scale with uniform spacing \f$ \Delta f \f$,
53  * the normalization is
54  \f[
55  PSD_{norm} = PSD * N * \Delta f
56  \f]
57  * for a 1-dimensional PSD and
58  \f[
59  PSD_{norm} = PSD * N_0 * \Delta f_0 * N_1 * \Delta f_1
60  \f]
61  * for a 2-dimensional PSD. Remember that these are applied before taking the square root.
62  *
63  *
64  * \ingroup psd_filter
65  *
66  * \todo once fftT has a plan interface with pointers for working memory, use it.
67  */
68 template<typename _realT>
69 class psdFilter<_realT, 1>
70 {
71 public:
72 
73  typedef _realT realT; ///< Real floating point type
74  typedef std::complex<_realT> complexT; ///< Complex floating point type.
75  typedef Eigen::Array<realT, Eigen::Dynamic, Eigen::Dynamic> realArrayT; ///< Eigen array type with Scalar==realT
76  typedef Eigen::Array<complexT, Eigen::Dynamic, Eigen::Dynamic> complexArrayT; ///< Eigen array type with Scalar==complexT
77 
78  typedef realT deviceRealPtrT;
79  typedef complexT deviceComplexPtrT;
80 
81 protected:
82 
83  int m_rows {0}; ///< The number of rows in the filter, and the required number of rows in the noise array.
84  int m_cols {0}; ///< The number of columns in the filter, and the required number of columns in the noise array.
85 
86  mx::cuda::cudaPtr<complexT> m_psdSqrt; ///< Pointer to the real array containing the square root of the PSD.
87 
88 
89  mx::cuda::cudaPtr<complexT> m_scale; ///< the scale factor.
90 
91  ///Cuda FFT plan. We only need one since the forward/inverse is part of execution.
92  cufftHandle m_fftPlan {0};
93 
94 public:
95 
96  ///C'tor.
98 
99  ///Destructor
101 
102 protected:
103 
104  ///Set the size of the filter.
105  /** Handles allocation of the _ftWork array and fftw planning.
106  *
107  * Requires _psdSqrt to be set first. This is called by the psdSqrt() and psd() methods.
108  *
109  */
110  int setSize();
111 
112 public:
113 
114  ///Get the number of rows in the filter
115  /**
116  * \returns the current value of m_rows.
117  */
118  int rows();
119 
120  ///Get the number of columns in the filter
121  /**
122  * \returns the current value of m_cols.
123  */
124  int cols();
125 
126  ///Set the sqaure-root of the PSD.
127  /** This allocates _npsdSqrt and fills it with th evalues in the array.
128  *
129  * See the discussion of PSD normalization above.
130  *
131  * \returns 0 on success
132  * \returns -1 on error
133  */
134  int psdSqrt( const realArrayT & npsdSqrt /**< [in] an array containing the square root of the PSD.*/ );
135 
136  int psdSqrt( const cuda::cudaPtr<std::complex<realT>> & npsdSqrt, /**< [in] an array containing the square root of the PSD.*/
137  size_t rows,
138  size_t cols
139  );
140 
141  ///De-allocate all working memory and reset to initial state.
142  void clear();
143 
144  ///Apply the filter.
145  /**
146  *
147  * \returns 0 on success
148  * \returns -1 on error
149  */
150  int filter( deviceComplexPtrT * noise ///< [in.out] the noise field of size rows() X cols(), which is filtered in-place.
151  );
152 
153  ///Apply the filter.
154  /**
155  * \returns 0 on success
156  * \returns -1 on error
157  */
158  int operator()( realArrayT & noise /**< [in.out] the noise field of size rows() X cols(), which is filtered in-place. */ );
159 
160  ///Apply the filter.
161  /**
162  * \returns 0 on success
163  * \returns -1 on error
164  */
165  int operator()( realArrayT & noise, ///< [in.out] the noise field of size rows() X cols(), which is filtered in-place.
166  realArrayT & noiseIm ///< [out] [optional] an array to fill with the imaginary output of the filter, allowing 2-for-1 calculation.
167  );
168 };
169 
170 template<typename realT>
171 psdFilter<realT,1>::psdFilter()
172 {
173 }
174 
175 template<typename realT>
176 psdFilter<realT,1>::~psdFilter()
177 {
178  if( m_fftPlan )
179  {
180  checkCudaErrors(cufftDestroy(m_fftPlan));
181  }
182 }
183 
184 template<typename realT>
185 int psdFilter<realT,1>::setSize()
186 {
187 
188  m_scale.resize(1);
189  std::complex<realT> scale(1./(m_rows*m_cols),0);
190  m_scale.upload(&scale,1);
191 
192  checkCudaErrors(cufftPlan2d(&m_fftPlan, m_rows, m_cols, CUFFT_C2C));
193 
194  return 0;
195 }
196 
197 
198 
199 template<typename realT>
200 int psdFilter<realT,1>::rows()
201 {
202  return m_rows;
203 }
204 
205 template<typename realT>
206 int psdFilter<realT,1>::cols()
207 {
208  return m_cols;
209 }
210 
211 // template<typename realT>
212 // int psdFilter<realT,1>::psdSqrt( realArrayT * npsdSqrt )
213 // {
214 // if(_psdSqrt && _owner)
215 // {
216 // delete _psdSqrt;
217 // }
218 //
219 // _psdSqrt = npsdSqrt;
220 // _owner = false;
221 //
222 // setSize();
223 //
224 // return 0;
225 // }
226 
227 template<typename realT>
228 int psdFilter<realT,1>::psdSqrt( const realArrayT & npsdSqrt )
229 {
230  m_rows = npsdSqrt.rows();
231  m_cols = npsdSqrt.cols();
232 
233 
234  complexArrayT tmp( m_rows, m_cols);
235  tmp.setZero();
236  tmp.real() = npsdSqrt;
237 
238  m_psdSqrt.upload(tmp.data(), m_rows*m_cols);
239 
240  setSize();
241 
242  return 0;
243 }
244 
245 template<typename realT>
246 int psdFilter<realT,1>::psdSqrt( const cuda::cudaPtr<std::complex<realT>> & npsdSqrt,
247  size_t rows,
248  size_t cols
249  )
250 {
251  m_rows = rows;
252  m_cols = cols;
253 
254  m_psdSqrt.resize(m_rows*m_cols);
255 
256  ///\todo move this into cudaPtr
257  cudaMemcpy( m_psdSqrt.m_devicePtr, npsdSqrt.m_devicePtr, m_rows*m_cols*sizeof(std::complex<realT>), cudaMemcpyDeviceToDevice);
258 
259  setSize();
260 
261  return 0;
262 }
263 
264 template<typename realT>
265 void psdFilter<realT,1>::clear()
266 {
267  m_scale.resize(0);
268  m_rows = 0;
269  m_cols = 0;
270 
271  m_psdSqrt.resize(0);
272 }
273 
274 template<typename realT>
275 int psdFilter<realT,1>::filter( deviceComplexPtrT * noise )
276 {
277 
278  //Transform complex noise to Fourier domain.
279 
280  cufftExecC2C(m_fftPlan, (cuComplex *) noise, (cuComplex *) noise, CUFFT_FORWARD);
281 
282  //Apply the filter.
283  mx::cuda::pointwiseMul<cuComplex><<<32, 256>>>((cuComplex *) noise, (cufftComplex *) m_psdSqrt.m_devicePtr, m_rows*m_cols);
284 
285  cufftExecC2C(m_fftPlan, (cuComplex *)noise, (cuComplex *)noise, CUFFT_INVERSE);
286 
287 
288  mx::cuda::scalarMul<cuComplex><<<32, 256>>>((cuComplex *) noise, (cuComplex *) m_scale.m_devicePtr, m_rows*m_cols);
289  //Now take the real part, and normalize.
290 
291  //noise = noise/(noise.rows()*noise.cols());
292 
293 
294  return 0;
295 }
296 
297 template<typename realT>
298 int psdFilter<realT,1>::operator()( realArrayT & noise )
299 {
300  return filter(noise);
301 }
302 
303 template<typename realT>
304 int psdFilter<realT,1>::operator()( realArrayT & noise,
305  realArrayT & noiseIm
306  )
307 {
308  return filter(noise, &noiseIm);
309 }
310 
311 } //namespace sigproc
312 } //namespace mx
313 
314 #endif //psdFilterCuda_hpp
int psdSqrt(const realArrayT &npsdSqrt)
Set the sqaure-root of the PSD.
_realT realT
Real floating point type.
int filter(deviceComplexPtrT *noise)
Apply the filter.
int cols()
Get the number of columns in the filter.
std::complex< _realT > complexT
Complex floating point type.
Eigen::Array< realT, Eigen::Dynamic, Eigen::Dynamic > realArrayT
Eigen array type with Scalar==realT.
int operator()(realArrayT &noise, realArrayT &noiseIm)
Apply the filter.
mx::cuda::cudaPtr< complexT > m_scale
the scale factor.
mx::cuda::cudaPtr< complexT > m_psdSqrt
Pointer to the real array containing the square root of the PSD.
int setSize()
Set the size of the filter.
int psdSqrt(const cuda::cudaPtr< std::complex< realT >> &npsdSqrt, size_t rows, size_t cols)
Eigen::Array< complexT, Eigen::Dynamic, Eigen::Dynamic > complexArrayT
Eigen array type with Scalar==complexT.
int operator()(realArrayT &noise)
Apply the filter.
void clear()
De-allocate all working memory and reset to initial state.
int rows()
Get the number of rows in the filter.
The mxlib c++ namespace.
Definition: mxError.hpp:107
Declares and defines a class for filtering with PSDs.