mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
cudaPtr.hpp
Go to the documentation of this file.
1 /** \file cudaPtr.hpp
2  * \author Jared R. Males
3  * \brief A wrapper for cuda device pointers
4  * \ingroup cuda_files
5  *
6  */
7 
8 //***********************************************************************//
9 // Copyright 2019,2020 Jared R. Males (jaredmales@gmail.com)
10 //
11 // This file is part of mxlib.
12 //
13 // mxlib is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
17 //
18 // mxlib is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
22 //
23 // You should have received a copy of the GNU General Public License
24 // along with mxlib. If not, see <http://www.gnu.org/licenses/>.
25 //***********************************************************************//
26 
27 #ifndef math_cudaPtr_hpp
28 #define math_cudaPtr_hpp
29 
30 #include <iostream>
31 
32 #include <cuda_runtime.h>
33 
34 #include "templateCuda.hpp"
35 
36 namespace mx
37 {
38 namespace cuda
39 {
40 
41 /// A smart-pointer wrapper for cuda device pointers.
42 /**
43  * \ingroup cuda
44  */
45 template<typename T>
46 struct cudaPtr
47 {
48  ///The host data type.
49  //typedef typename cudaType<T>::hostType hostPtrT;
50  typedef T hostPtrT;
51 
52  ///The device data type
53  //typedef typename cudaType<T>::deviceType devicePtrT;
54  typedef T devicePtrT;
55 
56  ///The device pointer
57  devicePtrT * m_devicePtr {nullptr};
58 
59  ///The allocated size
60  size_t m_size {0};
61 
62  ///Destructor, frees memory if allocated.
64 
65  size_t size()
66  {
67  return m_size;
68  }
69 
70  ///Resize the memory allocation, in 1D
71  /** If no size change, this is a no-op.
72  *
73  * \returns 0 on success.
74  * \returns a cuda error code otherwise.
75  *
76  * \test Scenario: scaling a vector with cublas \ref test_math_templateCublas_scal "[test doc]"
77  * \test Scenario: scaling and accumulating a vector with cublas \ref test_math_templateCublas_axpy "[test doc]"
78  * \test Scenario: multiplying two vectors element by element \ref test_math_templateCublas_elementwiseXxY "[test doc]"
79  */
80  int resize( size_t sz /**< [in] the new size */);
81 
82  ///Resize the memory allocation, in 2D
83  /** If no size change, this is a no-op.
84  *
85  * \returns 0 on success.
86  * \returns a cuda error code otherwise.
87  *
88  */
89  int resize( size_t x_sz, ///< [in] the new x size,
90  size_t y_sz ///< [in] the new y size
91  );
92 
93  ///Resize the memory allocation, in 3D
94  /** If no size change, this is a no-op.
95  *
96  * \returns 0 on success.
97  * \returns a cuda error code otherwise.
98  *
99  */
100  int resize( size_t x_sz, ///< [in] the new x size,
101  size_t y_sz, ///< [in] the new y size,
102  size_t z_sz ///< [in] the new z size
103  );
104 
105  /// Initialize the array bytes to 0.
106  /** Just a wrapper to cudaMemset.
107  *
108  */
109  cudaError_t initialize();
110 
111  ///Free the memory allocation
112  /**
113  * \returns 0 on success.
114  * \returns a cuda error code otherwise.
115  *
116  */
117  int free();
118 
119  ///Copy from the host to the device, after allocation.
120  /**
121  * The device pointer must be allocated.
122  *
123  * \returns 0 on success.
124  * \returns a cuda error code otherwise.
125  *
126  * \test Scenario: multiplying two vectors element by element \ref test_math_templateCublas_elementwiseXxY "[test doc]"
127  */
128  int upload( const hostPtrT * src /**< [in] The host location */);
129 
130  ///Copy from the host to the device with allocation.
131  /**
132  * The device pointer will be re-allocated as needed.
133  *
134  * \returns 0 on success.
135  * \returns a cuda error code otherwise.
136  *
137  * \test Scenario: scaling a vector with cublas \ref test_math_templateCublas_scal "[test doc]"
138  * \test Scenario: scaling and accumulating a vector with cublas \ref test_math_templateCublas_axpy "[test doc]"
139  * \test Scenario: multiplying two vectors element by element \ref test_math_templateCublas_elementwiseXxY "[test doc]"
140  */
141  int upload( const hostPtrT * src, ///< [in] The host location
142  size_t sz ///< [in] The size of the array
143  );
144 
145  ///Copy from the device to the host.
146  /**
147  *
148  * \test Scenario: scaling a vector with cublas \ref test_math_templateCublas_scal "[test doc]"
149  * \test Scenario: scaling and accumulating a vector with cublas \ref test_math_templateCublas_axpy "[test doc]"
150  * \test Scenario: multiplying two vectors element by element \ref test_math_templateCublas_elementwiseXxY "[test doc]"
151  */
152  int download( hostPtrT * dest /**< [in] The host location, allocated.*/ );
153 
154  ///Conversion operator, accesses the device pointer for use in Cuda functions.
155  /**
156  * \test Scenario: scaling and accumulating a vector with cublas \ref test_math_templateCublas_axpy "[test doc]"
157  * \test Scenario: multiplying two vectors element by element \ref test_math_templateCublas_elementwiseXxY "[test doc]"
158  */
159  typename cpp2cudaType<devicePtrT>::cudaType* operator()()
160  {
161  return (typename cpp2cudaType<devicePtrT>::cudaType*) m_devicePtr;
162  }
163 
164  /// Conversion operator, accesses the device pointer for use in Cuda functions.
165  /**
166  *
167  * \test Scenario: scaling a vector with cublas \ref test_math_templateCublas_scal "[test doc]"
168  */
169  const typename cpp2cudaType<devicePtrT>::cudaType* operator()() const
170  {
171  return (typename cpp2cudaType<devicePtrT>::cudaType*) m_devicePtr;
172  }
173 
174 };
175 
176 template<typename T>
178 {
179  free();
180 }
181 
182 template<typename T>
183 int cudaPtr<T>::resize( size_t sz )
184 {
185  if( m_size == sz ) return 0;
186 
187  m_size = sz;
188 
189  cudaError_t rv = cudaMalloc((void **)&m_devicePtr, sz*sizeof(devicePtrT));
190 
191  if(rv != cudaSuccess)
192  {
193  std::cerr << "Error from cudaMalloc: ";
194  printf("[%s] %s\n", cudaGetErrorName(rv), cudaGetErrorString(rv));
195  }
196 
197  return 0;
198 
199 }
200 
201 template<typename T>
202 int cudaPtr<T>::resize( size_t x_sz,
203  size_t y_sz
204  )
205 {
206  return resize(x_sz*y_sz);
207 }
208 
209 template<typename T>
210 int cudaPtr<T>::resize( size_t x_sz,
211  size_t y_sz,
212  size_t z_sz
213  )
214 {
215  return resize(x_sz*y_sz*z_sz);
216 }
217 
218 template<typename T>
220 {
221  return ::cudaMemset(m_devicePtr, 0, m_size*sizeof(devicePtrT));
222 }
223 
224 template<typename T>
226 {
227  if(m_devicePtr)
228  {
229  int rv = cudaFree(m_devicePtr);
230 
231  if(rv != cudaSuccess)
232  {
233  std::cerr << "Cuda Free Error \n";
234  return rv;
235  }
236  }
237 
238  m_devicePtr = 0;
239  m_size = 0;
240 
241  return 0;
242 }
243 
244 template<typename T>
245 int cudaPtr<T>::upload( const hostPtrT * src )
246 {
247  // Copy host memory to device
248  int rv = cudaMemcpy( m_devicePtr, src, m_size*sizeof(devicePtrT), cudaMemcpyHostToDevice);
249 
250  if(rv != cudaSuccess)
251  {
252  std::cerr << "Cuda Memcpy error \n";
253  return rv;
254  }
255 
256  return 0;
257 }
258 
259 template<typename T>
260 int cudaPtr<T>::upload( const hostPtrT * src,
261  size_t sz
262  )
263 {
264  int rv;
265 
266  rv = resize(sz);
267 
268  if(rv) return rv;
269 
270  return upload(src);
271 }
272 
273 template<typename T>
275 {
276  // Copy device memory to host
277  int rv = cudaMemcpy( dest, m_devicePtr, m_size*sizeof(devicePtrT), cudaMemcpyDeviceToHost);
278 
279  if(rv != cudaSuccess)
280  {
281  std::cerr << "Cuda Memcpy error \n";
282  return rv;
283  }
284 
285  return 0;
286 }
287 
288 }//namespace cuda
289 }//namespace mx
290 #endif // math_cudaPtr_hpp
The mxlib c++ namespace.
Definition: mxError.hpp:107
A smart-pointer wrapper for cuda device pointers.
Definition: cudaPtr.hpp:47
int resize(size_t x_sz, size_t y_sz)
Resize the memory allocation, in 2D.
Definition: cudaPtr.hpp:202
cpp2cudaType< devicePtrT >::cudaType * operator()()
Conversion operator, accesses the device pointer for use in Cuda functions.
Definition: cudaPtr.hpp:159
cudaError_t initialize()
Initialize the array bytes to 0.
Definition: cudaPtr.hpp:219
T devicePtrT
The device data type.
Definition: cudaPtr.hpp:54
const cpp2cudaType< devicePtrT >::cudaType * operator()() const
Conversion operator, accesses the device pointer for use in Cuda functions.
Definition: cudaPtr.hpp:169
size_t m_size
The allocated size.
Definition: cudaPtr.hpp:60
int download(hostPtrT *dest)
Copy from the device to the host.
Definition: cudaPtr.hpp:274
int upload(const hostPtrT *src)
Copy from the host to the device, after allocation.
Definition: cudaPtr.hpp:245
devicePtrT * m_devicePtr
The device pointer.
Definition: cudaPtr.hpp:57
int free()
Free the memory allocation.
Definition: cudaPtr.hpp:225
T hostPtrT
The host data type.
Definition: cudaPtr.hpp:50
int resize(size_t x_sz, size_t y_sz, size_t z_sz)
Resize the memory allocation, in 3D.
Definition: cudaPtr.hpp:210
int upload(const hostPtrT *src, size_t sz)
Copy from the host to the device with allocation.
Definition: cudaPtr.hpp:260
int resize(size_t sz)
Resize the memory allocation, in 1D.
Definition: cudaPtr.hpp:183
~cudaPtr()
Destructor, frees memory if allocated.
Definition: cudaPtr.hpp:177
Utilities for a template interface to cuda.