mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
cudaPtr.hpp
Go to the documentation of this file.
1/** \file cudaPtr.hpp
2 * \author Jared R. Males
3 * \brief A wrapper for cuda device pointers
4 * \ingroup cuda_files
5 *
6 */
7
8//***********************************************************************//
9// Copyright 2019,2020 Jared R. Males (jaredmales@gmail.com)
10//
11// This file is part of mxlib.
12//
13// mxlib is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// mxlib is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
25//***********************************************************************//
26
27#ifndef math_cudaPtr_hpp
28#define math_cudaPtr_hpp
29
30#ifdef MXLIB_CUDA
31
32#include <iostream>
33#include <cstdint>
34
35#include <cuda_runtime.h>
36
37#include "../../mxlib.hpp"
38
39#include "templateCuda.hpp"
40
41namespace mx
42{
43namespace cuda
44{
45
46/// A smart-pointer wrapper for cuda device pointers.
47/**
48 * \ingroup cuda
49 */
50template <typename T, class verboseT = verbose::d>
51struct cudaPtr
52{
53
54 /// The host data type.
55 // typedef typename cudaType<T>::hostType hostPtrT;
56 typedef T hostPtrT;
57
58 /// The device data type
59 // typedef typename cudaType<T>::deviceType devicePtrT;
60 typedef T devicePtrT;
61
62 /// The device pointer
63 devicePtrT *m_devicePtr{ nullptr };
64
65 /// The allocated size
66 size_t m_size{ 0 };
67
68 /// The number of rows set on allocation
69 uint32_t m_rows {0};
70
71 /// The number of columns set on allocation
72 uint32_t m_cols {0};
73
74 /// The number of planes set on allocation
75 uint32_t m_planes {0};
76
77 /// Destructor, frees memory if allocated.
78 ~cudaPtr();
79
80 size_t size()
81 {
82 return m_size;
83 }
84
85 uint32_t rows()
86 {
87 return m_rows;
88 }
89
90 uint32_t cols()
91 {
92 return m_cols;
93 }
94
95 uint32_t planes()
96 {
97 return m_planes;
98 }
99
100 private:
101 /// Resize the memory allocation, in 1D
102 /** If no size change, this is a no-op.
103 *
104 * \returns 0 on success.
105 * \returns a cuda error code otherwise.
106 *
107 */
108 error_t resizeImpl( size_t sz /**< [in] the new size */ );
109
110 public:
111 /// Resize the memory allocation, in 1D
112 /** If no size change, this is a no-op.
113 *
114 * \returns 0 on success.
115 * \returns a cuda error code otherwise.
116 *
117 */
118 error_t resize( size_t sz /**< [in] the new size */ );
119
120 /// Resize the memory allocation, in 2D
121 /** If no size change, this is a no-op.
122 *
123 * \returns 0 on success.
124 * \returns a cuda error code otherwise.
125 *
126 */
127 error_t resize( uint32_t x_sz, ///< [in] the new x size,
128 uint32_t y_sz ///< [in] the new y size
129 );
130
131 /// Resize the memory allocation, in 3D
132 /** If no size change, this is a no-op.
133 *
134 * \returns 0 on success.
135 * \returns a cuda error code otherwise.
136 *
137 */
138 error_t resize( uint32_t x_sz, ///< [in] the new x size,
139 uint32_t y_sz, ///< [in] the new y size,
140 uint32_t z_sz ///< [in] the new z size
141 );
142
143 /// Initialize the array bytes to 0.
144 /** Same as setZero, just a wrapper to cudaMemset.
145 *
146 */
147 error_t initialize();
148
149 /// Initialize the array bytes to 0.
150 /** Same as initialize, just a wrapper to cudaMemset.
151 *
152 */
153 error_t setZero();
154
155 /// Free the memory allocation
156 /**
157 * \returns 0 on success.
158 * \returns a cuda error code otherwise.
159 *
160 */
161 error_t free();
162
163 /// Copy from the host to the device, after allocation.
164 /**
165 * The device pointer must be allocated.
166 *
167 * \returns 0 on success.
168 * \returns a cuda error code otherwise.
169 *
170 */
171 error_t upload( const hostPtrT *src /**< [in] The host location */ );
172
173 /// Copy from the host to the device with 1D allocation.
174 /**
175 * The device pointer will be re-allocated as needed.
176 *
177 * \returns 0 on success.
178 * \returns a cuda error code otherwise.
179 *
180 */
181 error_t upload( const hostPtrT *src, ///< [in] The host location
182 size_t x_sz ///< [in] The x size of the array
183 );
184
185 /// Copy from the host to the device with 2D allocation.
186 /**
187 * The device pointer will be re-allocated as needed.
188 *
189 * \returns 0 on success.
190 * \returns a cuda error code otherwise.
191 *
192 */
193 error_t upload( const hostPtrT *src, ///< [in] The host location
194 uint32_t x_sz, ///< [in] The x size of the array
195 uint32_t y_sz ///< [in] The x size of the array
196 );
197
198 /// Copy from the host to the device with #D allocation.
199 /**
200 * The device pointer will be re-allocated as needed.
201 *
202 * \returns 0 on success.
203 * \returns a cuda error code otherwise.
204 *
205 */
206 error_t upload( const hostPtrT *src, ///< [in] The host location
207 uint32_t x_sz, ///< [in] The x size of the array
208 uint32_t y_sz, ///< [in] The x size of the array
209 uint32_t z_sz ///< [in] The x size of the array
210 );
211
212 /// Copy from the device to the host.
213 /**
214 *
215 */
216 error_t download( hostPtrT *dest /**< [in] The host location, allocated.*/ );
217
218 /// Accesses the device pointer for use in Cuda functions.
219 /**
220 */
221 typename cpp2cudaType<devicePtrT>::cudaType *data()
222 {
223 return reinterpret_cast<typename cpp2cudaType<devicePtrT>::cudaType *>(m_devicePtr);
224 }
225
226 /// Conversion operator, accesses the device pointer for use in Cuda functions.
227 /**
228 */
229 typename cpp2cudaType<devicePtrT>::cudaType *operator()()
230 {
231 return reinterpret_cast<typename cpp2cudaType<devicePtrT>::cudaType *>(m_devicePtr);
232 }
233
234 /// Conversion operator, accesses the device pointer for use in Cfuda functions.
235 /**
236 *
237 */
238 const typename cpp2cudaType<devicePtrT>::cudaType *operator()() const
239 {
240 return reinterpret_cast<typename cpp2cudaType<devicePtrT>::cudaType *>(m_devicePtr);
241 }
242};
243
244template <typename T, class verboseT>
245cudaPtr<T, verboseT>::~cudaPtr()
246{
247 free();
248}
249
250template <typename T, class verboseT>
251error_t cudaPtr<T, verboseT>::resizeImpl( size_t sz )
252{
253 if( m_size == sz )
254 {
255 return error_t::noerror;
256 }
257
258 m_size = sz;
259
260 cudaError_t rv = cudaMalloc( (void **)&m_devicePtr, sz * sizeof( devicePtrT ) );
261
262 if( rv != cudaSuccess )
263 {
264 return internal::mxlib_error_report<verboseT>(cudaError2error_t(rv), "cudaMalloc");
265 }
266
267 return error_t::noerror;
268}
269
270
271template <typename T, class verboseT>
272error_t cudaPtr<T, verboseT>::resize( size_t sz )
273{
274 m_rows =sz;
275 m_cols = 1;
276 m_planes = 1;
277 return resizeImpl(sz);
278}
279
280template <typename T, class verboseT>
281error_t cudaPtr<T, verboseT>::resize( uint32_t x_sz, uint32_t y_sz )
282{
283 m_rows = x_sz;
284 m_cols = y_sz;
285 m_planes = 1;
286 return resizeImpl( x_sz * y_sz );
287}
288
289template <typename T, class verboseT>
290error_t cudaPtr<T, verboseT>::resize( uint32_t x_sz, uint32_t y_sz, uint32_t z_sz )
291{
292 m_rows = x_sz;
293 m_cols = y_sz;
294 m_planes = z_sz;
295 return resize( x_sz * y_sz * z_sz );
296}
297
298template <typename T, class verboseT>
299error_t cudaPtr<T, verboseT>::initialize()
300{
301 cudaError_t rv = ::cudaMemset( m_devicePtr, 0, m_size * sizeof( devicePtrT ) );
302
303 if(rv != cudaSuccess)
304 {
305 return internal::mxlib_error_report<verboseT>(cudaError2error_t(rv), "cudaMemset");
306 }
307
308 return error_t::noerror;
309}
310
311template <typename T, class verboseT>
312error_t cudaPtr<T, verboseT>::setZero()
313{
314 return initialize();
315}
316
317template <typename T, class verboseT>
318error_t cudaPtr<T, verboseT>::free()
319{
320 if( m_devicePtr )
321 {
322 cudaError_t rv = cudaFree( m_devicePtr );
323
324 if( rv != cudaSuccess )
325 {
326 return internal::mxlib_error_report<verboseT>(cudaError2error_t(rv), "cudaFree");
327 }
328 return error_t::noerror;
329 }
330
331 m_devicePtr = 0;
332 m_size = 0;
333
334 return error_t::noerror;
335}
336
337template <typename T, class verboseT>
338error_t cudaPtr<T, verboseT>::upload( const hostPtrT *src )
339{
340 // Copy host memory to device
341 cudaError_t rv = cudaMemcpy( m_devicePtr, src, m_size * sizeof( devicePtrT ), cudaMemcpyHostToDevice );
342
343 if( rv != cudaSuccess )
344 {
345 return internal::mxlib_error_report<verboseT>(cudaError2error_t(rv), "cudaMemcpy");
346 }
347
348 return error_t::noerror;
349}
350
351template <typename T, class verboseT>
352error_t cudaPtr<T, verboseT>::upload( const hostPtrT *src, size_t x_sz )
353{
354 error_t rv;
355
356 rv = resize( x_sz );
357
358 if( !!rv )
359 {
360 return internal::mxlib_error_report<verboseT>(rv);
361 }
362
363 return upload( src );
364}
365
366template <typename T, class verboseT>
367error_t cudaPtr<T, verboseT>::upload( const hostPtrT *src, uint32_t x_sz, uint32_t y_sz )
368{
369 error_t rv;
370
371 rv = resize( x_sz, y_sz );
372
373 if( !!rv )
374 {
375 return internal::mxlib_error_report<verboseT>(rv);
376 }
377
378 return upload( src );
379}
380
381template <typename T, class verboseT>
382error_t cudaPtr<T, verboseT>::upload( const hostPtrT *src, uint32_t x_sz, uint32_t y_sz, uint32_t z_sz )
383{
384 error_t rv;
385
386 rv = resize( x_sz, y_sz, z_sz );
387
388 if( !!rv )
389 {
390 return internal::mxlib_error_report<verboseT>(rv);
391 }
392
393 return upload( src );
394}
395
396template <typename T, class verboseT>
397error_t cudaPtr<T, verboseT>::download( hostPtrT *dest )
398{
399 // Copy device memory to host
400 cudaError_t rv = cudaMemcpy( dest, m_devicePtr, m_size * sizeof( devicePtrT ), cudaMemcpyDeviceToHost );
401
402 if( rv != cudaSuccess )
403 {
404 return internal::mxlib_error_report<verboseT>(cudaError2error_t(rv), "cudaMemcpy");
405 }
406
407 return error_t::noerror;
408}
409
410} // namespace cuda
411} // namespace mx
412
413#endif // MXLIB_CUDA
414#endif // math_cudaPtr_hpp
error_t
The mxlib error codes.
Definition error_t.hpp:26
The mxlib c++ namespace.
Definition mxlib.hpp:37
Utilities for a template interface to cuda.