mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
syevdxT.hpp
Go to the documentation of this file.
1/** \file syevdxT.hpp
2 * \author Jared R. Males
3 * \brief An interface to cuSOLVER Xsyevdx
4 * \ingroup cuda_files
5 *
6 */
7
8//***********************************************************************//
9// Copyright 2025 Jared R. Males (jaredmales@gmail.com)
10//
11// This file is part of mxlib.
12//
13// mxlib is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// mxlib is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
25//***********************************************************************//
26
27#ifndef math_syevdxT_hpp
28#define math_syevdxT_hpp
29
30#ifdef MXLIB_CUDA
31
32#include <Eigen/Dense>
33
34#include <cmath>
35#include <functional>
36#include <iostream>
37#include <random>
38#include <stdexcept>
39#include <string>
40
41#include <cuda_runtime.h>
42#include <cusolverDn.h>
43
44// #include <cuComplex.h>
45// #include <cuda_runtime_api.h>
46// #include <cublas_api.h>
47// #include <library_types.h>
48
49// #include "/home/jrmales/Source/CUDALibrarySamples/cuSOLVER/utils/cusolver_utils.h"
50
51
52namespace mx
53{
54namespace cuda
55{
56
57#define MXCUDA_EXCEPTION( ec, explan ) \
58 std::string msg = "Cuda Error: ("; \
59 msg += cudaGetErrorName( ec ); \
60 msg += ") "; \
61 msg += cudaGetErrorString( ec ); \
62 msg += "\nContext: " explan; \
63 msg += ".\nAt line " + std::to_string( __LINE__ ); \
64 msg += " in " __FILE__; \
65 throw std::runtime_error( msg );
66
67#define MXCUSOLVER_EXCEPTION( ec, explan ) \
68 std::string msg = "cusolver Error: ("; \
69 msg += std::to_string( static_cast<int>( ec ) ); \
70 msg += ") "; \
71 msg += "\nContext: " explan; \
72 msg += ".\nAt line " + std::to_string( __LINE__ ); \
73 msg += " in " __FILE__; \
74 throw std::runtime_error( msg );
75
76template <typename floatT>
77struct cusolver_traits;
78
79template <>
80struct cusolver_traits<float>
81{
82 static constexpr cudaDataType cuda_data_type = CUDA_R_32F;
83};
84
85template <>
86struct cusolver_traits<double>
87{
88 static constexpr cudaDataType cuda_data_type = CUDA_R_64F;
89};
90
91template <typename floatT>
92struct syevdxT
93{
94
95 cusolverDnHandle_t m_cusolverH{ nullptr };
96 cusolverDnParams_t m_params{ nullptr };
97 cudaStream_t m_stream{ nullptr };
98
99 int m_allocations{ 0 };
100
101 floatT *m_dev_A{ nullptr };
102 floatT *m_dev_W{ nullptr };
103 int *m_dev_info{ nullptr };
104
105 cusolverEigRange_t m_range;
106 cublasFillMode_t m_uplo;
107 int64_t m_n{ 0 };
108 int64_t m_lda{ 0 };
109
110 floatT m_vu{ 0 };
111 floatT m_vl{ 0 };
112 int64_t m_il;
113 int64_t m_iu;
114
115 void *m_dev_work{ nullptr }; /* device workspace */
116 size_t m_dev_wsBytes{ 0 };
117 void *m_host_work{ nullptr }; /* device workspace */
118 size_t m_host_wsBytes{ 0 };
119
120 syevdxT();
121
122 syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = nullptr );
123
124 ~syevdxT();
125
126 void free( bool null = true );
127
128 void setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = nullptr );
129
130 void allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER );
131
132 int execute( int64_t &numEig, floatT *A );
133
134 template <typename eigenT>
135 int calcEigenVecs(eigenT &evecs, /**< [out] on exit contains the eigen vectors*/
136 eigenT &evals, /**< [out] on exit contains the eigen vectors*/
137 eigenT &cv, /**< [in] a lower-triangle (in the Lapack sense) square
138 covariance matrix.*/
139 bool normalize = false, /**< [in] [opt] flag specifying whether or not to
140 normalize the eigenvectors.*/
141 bool check = false /**< [in] [opt] flag specifying whether or not to
142 check the eigenvalues/vectors for
143 validity. Requires normalize=true.*/ );
144};
145
146template <typename floatT>
147syevdxT<floatT>::syevdxT()
148{
149}
150
151template <typename floatT>
152syevdxT<floatT>::syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
153 : m_cusolverH{ cusolverH }, m_params{ params }, m_stream{ stream }
154{
155}
156
157template <typename floatT>
158syevdxT<floatT>::~syevdxT()
159{
160 free( false );
161}
162
163template <typename floatT>
164void syevdxT<floatT>::free( bool null )
165{
166 if( m_dev_A != nullptr )
167 {
168 cudaError_t ec = cudaFree( m_dev_A );
169 if( ec != cudaSuccess )
170 {
171 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for matrix" );
172 }
173 }
174
175 if( m_dev_W != nullptr )
176 {
177 cudaError_t ec = cudaFree( m_dev_W );
178 if( ec != cudaSuccess )
179 {
180 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for eigen values" );
181 }
182 }
183
184 if( m_dev_info != nullptr )
185 {
186 cudaError_t ec = cudaFree( m_dev_info );
187 if( ec != cudaSuccess )
188 {
189 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for info" );
190 }
191 }
192
193 if( m_dev_work != nullptr )
194 {
195 cudaError_t ec = cudaFree( m_dev_work );
196 if( ec != cudaSuccess )
197 {
198 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for work" );
199 }
200 }
201
202 if( m_host_work != nullptr )
203 {
204 ::free( m_host_work );
205 }
206
207 if( null )
208 {
209 m_dev_A = nullptr;
210 m_dev_W = nullptr;
211 m_dev_info = nullptr;
212 m_dev_work = nullptr;
213 m_host_work = nullptr;
214 }
215}
216
217template <typename floatT>
218void syevdxT<floatT>::setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
219{
220 m_cusolverH = cusolverH;
221 m_params = params;
222 m_stream = stream;
223}
224
225template <typename floatT>
226void syevdxT<floatT>::allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo )
227
228{
229 if( m_cusolverH == nullptr )
230 {
231 throw std::runtime_error( "syevdx::allocate: m_cusolverH is null" );
232 }
233
234 if( m_params == nullptr )
235 {
236 throw std::runtime_error( "syevdx::allocate: m_params is null" );
237 }
238
239 cusolverEigRange_t range;
240 int64_t il, iu;
241 if( nVecs == 0 || nVecs == n )
242 {
243 range = CUSOLVER_EIG_RANGE_ALL;
244 il = 0;
245 iu = 0;
246 }
247 else
248 {
249 range = CUSOLVER_EIG_RANGE_I;
250 if( nVecs > n )
251 {
252 nVecs = n;
253 }
254 il = n - nVecs + 1;
255 iu = n;
256 }
257
258 if( n == m_n && range == m_range && il == m_il && iu == m_iu && uplo == m_uplo && m_dev_A && m_dev_W &&
259 m_dev_info && m_dev_work )
260 {
261 return;
262 }
263
264 m_n = n;
265 m_lda = n;
266
267 m_range = range;
268 m_il = il;
269 m_iu = iu;
270
271 m_uplo = uplo;
272
273 free( false );
274
275 cudaError_t ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_A ), sizeof( floatT ) * ( n * n ) );
276 if( ec != cudaSuccess )
277 {
278 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for matrix" );
279 }
280
281 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_W ), sizeof( floatT ) * n );
282 if( ec != cudaSuccess )
283 {
284 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for eigen values" );
285 }
286
287 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_info ), sizeof( int ) );
288 if( ec != cudaSuccess )
289 {
290 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for info" );
291 }
292
293 int64_t h_meig;
294
295 cusolverStatus_t csec = cusolverDnXsyevdx_bufferSize( m_cusolverH,
296 m_params,
297 CUSOLVER_EIG_MODE_VECTOR,
298 m_range,
299 m_uplo,
300 m_n,
301 cusolver_traits<floatT>::cuda_data_type,
302 m_dev_A,
303 m_lda,
304 &m_vl,
305 &m_vu,
306 m_il,
307 m_iu,
308 &h_meig,
309 cusolver_traits<floatT>::cuda_data_type,
310 m_dev_W,
311 cusolver_traits<floatT>::cuda_data_type,
312 &m_dev_wsBytes,
313 &m_host_wsBytes );
314 if( csec != CUSOLVER_STATUS_SUCCESS )
315 {
316 MXCUSOLVER_EXCEPTION( csec, "syevdxT::allocate: call to cusolverDnXsyevdx_bufferSize" );
317 }
318
319 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_work ), m_dev_wsBytes );
320 if( ec != cudaSuccess )
321 {
322 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for work" );
323 }
324
325 if( m_host_wsBytes > 0 )
326 {
327 m_host_work = reinterpret_cast<void *>( malloc( m_host_wsBytes ) );
328 if( m_host_work == nullptr )
329 {
330 throw std::runtime_error("syevdxT::allocate: allocating host memory for work" );
331 //mxThrowException( err::allocerr, "syevdxT::allocate:", "allocating host memory for work" );
332 }
333 }
334 else
335 {
336 m_host_work = nullptr; // not set by free
337 }
338
339 ++m_allocations;
340}
341
342template <typename floatT>
343int syevdxT<floatT>::execute( int64_t &numEig, floatT *A )
344{
345 if( m_cusolverH == nullptr )
346 {
347 throw std::runtime_error( "syevdx::execute: m_cusolverH is null" );
348 }
349
350 if( m_params == nullptr )
351 {
352 throw std::runtime_error( "syevdx::execute: m_params is null" );
353 }
354
355 cudaError_t ec = cudaMemcpyAsync( m_dev_A, A, sizeof( floatT ) * m_n * m_n, cudaMemcpyHostToDevice, m_stream );
356 if( ec != cudaSuccess )
357 {
358 MXCUDA_EXCEPTION( ec, "syevdxT::execute: copying matrix to device" );
359 }
360
361 cusolverStatus_t csec = cusolverDnXsyevdx( m_cusolverH,
362 m_params,
363 CUSOLVER_EIG_MODE_VECTOR,
364 m_range,
365 m_uplo,
366 m_n,
367 cusolver_traits<floatT>::cuda_data_type,
368 m_dev_A,
369 m_lda,
370 &m_vl,
371 &m_vu,
372 m_il,
373 m_iu,
374 &numEig,
375 cusolver_traits<floatT>::cuda_data_type,
376 m_dev_W,
377 cusolver_traits<floatT>::cuda_data_type,
378 m_dev_work,
379 m_dev_wsBytes,
380 m_host_work,
381 m_host_wsBytes,
382 m_dev_info );
383
384 if( csec != CUSOLVER_STATUS_SUCCESS )
385 {
386 MXCUSOLVER_EXCEPTION( csec, "syevdxT::execute: call to cusolverDnXsyevdx" );
387 }
388
389 int info = 0;
390 ec = cudaMemcpyAsync( &info, m_dev_info, sizeof( int ), cudaMemcpyDeviceToHost, m_stream );
391 if( ec != cudaSuccess )
392 {
393 MXCUDA_EXCEPTION( ec, "syevdxT::execute: copying info from device" );
394 }
395
396 ec = cudaStreamSynchronize( m_stream );
397 if( ec != cudaSuccess )
398 {
399 MXCUDA_EXCEPTION( ec, "syevdxT::execute: synchronizing" );
400 }
401
402 return info;
403}
404
405template <typename floatT>
406template <typename eigenT>
407int syevdxT<floatT>::calcEigenVecs( eigenT &evecs, eigenT &evals, eigenT &cv, bool normalize, bool check )
408{
409 if( m_cusolverH == nullptr )
410 {
411 throw std::runtime_error( "syevdx::calcEigenVecs: m_cusolverH is null" );
412 }
413
414 if( m_params == nullptr )
415 {
416 throw std::runtime_error( "syevdx::calcEigenVecs: m_params is null" );
417 }
418
419 int64_t nVecs;
420
421 int info = execute( nVecs, cv.data() );
422
423 evecs.resize( m_n, nVecs );
424 cudaError_t ec =
425 cudaMemcpyAsync( evecs.data(), m_dev_A, sizeof( floatT ) * m_n * nVecs, cudaMemcpyDeviceToHost, m_stream );
426 if( ec != cudaSuccess )
427 {
428 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: copying eigenvectors from device" );
429 }
430 evals.resize( 1, nVecs );
431 ec = cudaMemcpyAsync( evals.data(), m_dev_W, sizeof( floatT ) * nVecs, cudaMemcpyDeviceToHost, m_stream );
432 if( ec != cudaSuccess )
433 {
434 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: copying eigenvalues from device" );
435 }
436
437 ec = cudaStreamSynchronize( m_stream );
438 if( ec != cudaSuccess )
439 {
440 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: synchronizing" );
441 }
442
443 if( normalize )
444 {
445 // Normalize the eigenvectors
446 if( !check )
447 {
448 for( int i = 0; i < nVecs; ++i )
449 {
450 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
451 }
452 }
453 else // here we check for invalid results and 0 things out
454 {
455 for( int i = 0; i < nVecs; ++i )
456 {
457 if( evals( i ) == 0 )
458 {
459 std::cerr << "got 0 eigenvalue (# " << i << ")\n";
460 evecs.col( i ) *= 0;
461 }
462 else if( evals( i ) < 0 )
463 {
464 std::cerr << "got < 0 eigenvalue (# " << i << ")\n";
465 evecs.col( i ) *= 0;
466 }
467 else if( !std::isfinite( evals( i ) ) )
468 {
469 std::cerr << "got not-normal eigenvalue (# " << i << ")\n";
470 evecs.col( i ) *= 0;
471 }
472 else
473 {
474 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
475 }
476
477 for( int r = 0; r < evecs.rows(); ++r )
478 {
479 if( !std::isfinite( evecs.col( i )( r ) ) )
480 {
481 std::cerr << "got not-normal eigenvector entry (# " << i << "," << r
482 << ") = " << evecs.col( i )( r ) << "\n";
483 evecs.col( i ) *= 0;
484 continue;
485 }
486 }
487 }
488 }
489 }
490
491 return info;
492}
493
494} // namespace cuda
495} // namespace mx
496
497#endif //#MXLIB_CUDA
498
499#endif // math_syevdxT_hpp
MXLAPACK_INT calcEigenVecs(eigenT &evecs, eigenT &evals, eigenT &cv, int nVecs=0, bool normalize=false, bool check=false, syevrMem< _evCalcT > *mem=0, double *t_eigenv=nullptr)
Calculate the eigenvectors and eigenvalues given a triangular matrix.
The mxlib c++ namespace.
Definition mxlib.hpp:37