mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
syevdxT.hpp
Go to the documentation of this file.
1/** \file syevdxT.hpp
2 * \author Jared R. Males
3 * \brief An interface to cuSOLVER Xsyevdx
4 * \ingroup cuda_files
5 *
6 */
7
8//***********************************************************************//
9// Copyright 2025 Jared R. Males (jaredmales@gmail.com)
10//
11// This file is part of mxlib.
12//
13// mxlib is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// mxlib is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
25//***********************************************************************//
26
27#ifndef math_syevdxT_hpp
28#define math_syevdxT_hpp
29
30#include <Eigen/Dense>
31
32#include <cmath>
33#include <functional>
34#include <iostream>
35#include <random>
36#include <stdexcept>
37#include <string>
38
39#include <cuda_runtime.h>
40#include <cusolverDn.h>
41
42// #include <cuComplex.h>
43// #include <cuda_runtime_api.h>
44// #include <cublas_api.h>
45// #include <library_types.h>
46
47// #include "/home/jrmales/Source/CUDALibrarySamples/cuSOLVER/utils/cusolver_utils.h"
48
49#include "../../mxException.hpp"
50
51namespace mx
52{
53namespace cuda
54{
55
56#define MXCUDA_EXCEPTION( ec, explan ) \
57 std::string msg = "Cuda Error: ("; \
58 msg += cudaGetErrorName( ec ); \
59 msg += ") "; \
60 msg += cudaGetErrorString( ec ); \
61 msg += "\nContext: " explan; \
62 msg += ".\nAt line " + std::to_string( __LINE__ ); \
63 msg += " in " __FILE__; \
64 throw std::runtime_error( msg );
65
66#define MXCUSOLVER_EXCEPTION( ec, explan ) \
67 std::string msg = "cusolver Error: ("; \
68 msg += std::to_string( static_cast<int>( ec ) ); \
69 msg += ") "; \
70 msg += "\nContext: " explan; \
71 msg += ".\nAt line " + std::to_string( __LINE__ ); \
72 msg += " in " __FILE__; \
73 throw std::runtime_error( msg );
74
75template <typename floatT>
76struct cusolver_traits;
77
78template <>
79struct cusolver_traits<float>
80{
81 static constexpr cudaDataType cuda_data_type = CUDA_R_32F;
82};
83
84template <>
85struct cusolver_traits<double>
86{
87 static constexpr cudaDataType cuda_data_type = CUDA_R_64F;
88};
89
90template <typename floatT>
91struct syevdxT
92{
93
94 cusolverDnHandle_t m_cusolverH{ nullptr };
95 cusolverDnParams_t m_params{ nullptr };
96 cudaStream_t m_stream{ nullptr };
97
98 int m_allocations{ 0 };
99
100 floatT *m_dev_A{ nullptr };
101 floatT *m_dev_W{ nullptr };
102 int *m_dev_info{ nullptr };
103
104 cusolverEigRange_t m_range;
105 cublasFillMode_t m_uplo;
106 int64_t m_n{ 0 };
107 int64_t m_lda{ 0 };
108
109 floatT m_vu{ 0 };
110 floatT m_vl{ 0 };
111 int64_t m_il;
112 int64_t m_iu;
113
114 void *m_dev_work{ nullptr }; /* device workspace */
115 size_t m_dev_wsBytes{ 0 };
116 void *m_host_work{ nullptr }; /* device workspace */
117 size_t m_host_wsBytes{ 0 };
118
119 syevdxT();
120
121 syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = nullptr );
122
123 ~syevdxT();
124
125 void free( bool null = true );
126
127 void setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = nullptr );
128
129 void allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER );
130
131 int execute( int64_t &numEig, floatT *A );
132
133 template <typename eigenT>
134 int calcEigenVecs(eigenT &evecs, /**< [out] on exit contains the eigen vectors*/
135 eigenT &evals, /**< [out] on exit contains the eigen vectors*/
136 eigenT &cv, /**< [in] a lower-triangle (in the Lapack sense) square
137 covariance matrix.*/
138 bool normalize = false, /**< [in] [opt] flag specifying whether or not to
139 normalize the eigenvectors.*/
140 bool check = false /**< [in] [opt] flag specifying whether or not to
141 check the eigenvalues/vectors for
142 validity. Requires normalize=true.*/ );
143};
144
145template <typename floatT>
146syevdxT<floatT>::syevdxT()
147{
148}
149
150template <typename floatT>
151syevdxT<floatT>::syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
152 : m_cusolverH{ cusolverH }, m_params{ params }, m_stream{ stream }
153{
154}
155
156template <typename floatT>
157syevdxT<floatT>::~syevdxT()
158{
159 free( false );
160}
161
162template <typename floatT>
163void syevdxT<floatT>::free( bool null )
164{
165 if( m_dev_A != nullptr )
166 {
167 cudaError_t ec = cudaFree( m_dev_A );
168 if( ec != cudaSuccess )
169 {
170 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for matrix" );
171 }
172 }
173
174 if( m_dev_W != nullptr )
175 {
176 cudaError_t ec = cudaFree( m_dev_W );
177 if( ec != cudaSuccess )
178 {
179 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for eigen values" );
180 }
181 }
182
183 if( m_dev_info != nullptr )
184 {
185 cudaError_t ec = cudaFree( m_dev_info );
186 if( ec != cudaSuccess )
187 {
188 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for info" );
189 }
190 }
191
192 if( m_dev_work != nullptr )
193 {
194 cudaError_t ec = cudaFree( m_dev_work );
195 if( ec != cudaSuccess )
196 {
197 MXCUDA_EXCEPTION( ec, "syevdxT::free: free-ing device memory for work" );
198 }
199 }
200
201 if( m_host_work != nullptr )
202 {
203 ::free( m_host_work );
204 }
205
206 if( null )
207 {
208 m_dev_A = nullptr;
209 m_dev_W = nullptr;
210 m_dev_info = nullptr;
211 m_dev_work = nullptr;
212 m_host_work = nullptr;
213 }
214}
215
216template <typename floatT>
217void syevdxT<floatT>::setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
218{
219 m_cusolverH = cusolverH;
220 m_params = params;
221 m_stream = stream;
222}
223
224template <typename floatT>
225void syevdxT<floatT>::allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo )
226
227{
228 if( m_cusolverH == nullptr )
229 {
230 throw std::runtime_error( "syevdx::allocate: m_cusolverH is null" );
231 }
232
233 if( m_params == nullptr )
234 {
235 throw std::runtime_error( "syevdx::allocate: m_params is null" );
236 }
237
238 cusolverEigRange_t range;
239 int64_t il, iu;
240 if( nVecs == 0 || nVecs == n )
241 {
242 range = CUSOLVER_EIG_RANGE_ALL;
243 il = 0;
244 iu = 0;
245 }
246 else
247 {
248 range = CUSOLVER_EIG_RANGE_I;
249 if( nVecs > n )
250 {
251 nVecs = n;
252 }
253 il = n - nVecs + 1;
254 iu = n;
255 }
256
257 if( n == m_n && range == m_range && il == m_il && iu == m_iu && uplo == m_uplo && m_dev_A && m_dev_W &&
258 m_dev_info && m_dev_work )
259 {
260 return;
261 }
262
263 m_n = n;
264 m_lda = n;
265
266 m_range = range;
267 m_il = il;
268 m_iu = iu;
269
270 m_uplo = uplo;
271
272 free( false );
273
274 cudaError_t ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_A ), sizeof( floatT ) * ( n * n ) );
275 if( ec != cudaSuccess )
276 {
277 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for matrix" );
278 }
279
280 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_W ), sizeof( floatT ) * n );
281 if( ec != cudaSuccess )
282 {
283 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for eigen values" );
284 }
285
286 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_info ), sizeof( int ) );
287 if( ec != cudaSuccess )
288 {
289 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for info" );
290 }
291
292 int64_t h_meig;
293
294 cusolverStatus_t csec = cusolverDnXsyevdx_bufferSize( m_cusolverH,
295 m_params,
296 CUSOLVER_EIG_MODE_VECTOR,
297 m_range,
298 m_uplo,
299 m_n,
300 cusolver_traits<floatT>::cuda_data_type,
301 m_dev_A,
302 m_lda,
303 &m_vl,
304 &m_vu,
305 m_il,
306 m_iu,
307 &h_meig,
308 cusolver_traits<floatT>::cuda_data_type,
309 m_dev_W,
310 cusolver_traits<floatT>::cuda_data_type,
311 &m_dev_wsBytes,
312 &m_host_wsBytes );
313 if( csec != CUSOLVER_STATUS_SUCCESS )
314 {
315 MXCUSOLVER_EXCEPTION( csec, "syevdxT::allocate: call to cusolverDnXsyevdx_bufferSize" );
316 }
317
318 ec = cudaMalloc( reinterpret_cast<void **>( &m_dev_work ), m_dev_wsBytes );
319 if( ec != cudaSuccess )
320 {
321 MXCUDA_EXCEPTION( ec, "syevdxT::allocate: allocating device memory for work" );
322 }
323
324 if( m_host_wsBytes > 0 )
325 {
326 m_host_work = reinterpret_cast<void *>( malloc( m_host_wsBytes ) );
327 if( m_host_work == nullptr )
328 {
329 mxThrowException( err::allocerr, "syevdxT::allocate:", "allocating host memory for work" );
330 }
331 }
332 else
333 {
334 m_host_work = nullptr; // not set by free
335 }
336
337 ++m_allocations;
338}
339
340template <typename floatT>
341int syevdxT<floatT>::execute( int64_t &numEig, floatT *A )
342{
343 if( m_cusolverH == nullptr )
344 {
345 throw std::runtime_error( "syevdx::execute: m_cusolverH is null" );
346 }
347
348 if( m_params == nullptr )
349 {
350 throw std::runtime_error( "syevdx::execute: m_params is null" );
351 }
352
353 cudaError_t ec = cudaMemcpyAsync( m_dev_A, A, sizeof( floatT ) * m_n * m_n, cudaMemcpyHostToDevice, m_stream );
354 if( ec != cudaSuccess )
355 {
356 MXCUDA_EXCEPTION( ec, "syevdxT::execute: copying matrix to device" );
357 }
358
359 cusolverStatus_t csec = cusolverDnXsyevdx( m_cusolverH,
360 m_params,
361 CUSOLVER_EIG_MODE_VECTOR,
362 m_range,
363 m_uplo,
364 m_n,
365 cusolver_traits<floatT>::cuda_data_type,
366 m_dev_A,
367 m_lda,
368 &m_vl,
369 &m_vu,
370 m_il,
371 m_iu,
372 &numEig,
373 cusolver_traits<floatT>::cuda_data_type,
374 m_dev_W,
375 cusolver_traits<floatT>::cuda_data_type,
376 m_dev_work,
377 m_dev_wsBytes,
378 m_host_work,
379 m_host_wsBytes,
380 m_dev_info );
381
382 if( csec != CUSOLVER_STATUS_SUCCESS )
383 {
384 MXCUSOLVER_EXCEPTION( csec, "syevdxT::execute: call to cusolverDnXsyevdx" );
385 }
386
387 int info = 0;
388 ec = cudaMemcpyAsync( &info, m_dev_info, sizeof( int ), cudaMemcpyDeviceToHost, m_stream );
389 if( ec != cudaSuccess )
390 {
391 MXCUDA_EXCEPTION( ec, "syevdxT::execute: copying info from device" );
392 }
393
394 ec = cudaStreamSynchronize( m_stream );
395 if( ec != cudaSuccess )
396 {
397 MXCUDA_EXCEPTION( ec, "syevdxT::execute: synchronizing" );
398 }
399
400 return info;
401}
402
403template <typename floatT>
404template <typename eigenT>
405int syevdxT<floatT>::calcEigenVecs( eigenT &evecs, eigenT &evals, eigenT &cv, bool normalize, bool check )
406{
407 if( m_cusolverH == nullptr )
408 {
409 throw std::runtime_error( "syevdx::calcEigenVecs: m_cusolverH is null" );
410 }
411
412 if( m_params == nullptr )
413 {
414 throw std::runtime_error( "syevdx::calcEigenVecs: m_params is null" );
415 }
416
417 int64_t nVecs;
418
419 int info = execute( nVecs, cv.data() );
420
421 evecs.resize( m_n, nVecs );
422 cudaError_t ec =
423 cudaMemcpyAsync( evecs.data(), m_dev_A, sizeof( floatT ) * m_n * nVecs, cudaMemcpyDeviceToHost, m_stream );
424 if( ec != cudaSuccess )
425 {
426 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: copying eigenvectors from device" );
427 }
428 evals.resize( 1, nVecs );
429 ec = cudaMemcpyAsync( evals.data(), m_dev_W, sizeof( floatT ) * nVecs, cudaMemcpyDeviceToHost, m_stream );
430 if( ec != cudaSuccess )
431 {
432 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: copying eigenvalues from device" );
433 }
434
435 ec = cudaStreamSynchronize( m_stream );
436 if( ec != cudaSuccess )
437 {
438 MXCUDA_EXCEPTION( ec, "syevdxT::calcEigenVecs: synchronizing" );
439 }
440
441 if( normalize )
442 {
443 // Normalize the eigenvectors
444 if( !check )
445 {
446 for( int i = 0; i < nVecs; ++i )
447 {
448 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
449 }
450 }
451 else // here we check for invalid results and 0 things out
452 {
453 for( int i = 0; i < nVecs; ++i )
454 {
455 if( evals( i ) == 0 )
456 {
457 std::cerr << "got 0 eigenvalue (# " << i << ")\n";
458 evecs.col( i ) *= 0;
459 }
460 else if( evals( i ) < 0 )
461 {
462 std::cerr << "got < 0 eigenvalue (# " << i << ")\n";
463 evecs.col( i ) *= 0;
464 }
465 else if( !std::isfinite( evals( i ) ) )
466 {
467 std::cerr << "got not-normal eigenvalue (# " << i << ")\n";
468 evecs.col( i ) *= 0;
469 }
470 else
471 {
472 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
473 }
474
475 for( int r = 0; r < evecs.rows(); ++r )
476 {
477 if( !std::isfinite( evecs.col( i )( r ) ) )
478 {
479 std::cerr << "got not-normal eigenvector entry (# " << i << "," << r
480 << ") = " << evecs.col( i )( r ) << "\n";
481 evecs.col( i ) *= 0;
482 continue;
483 }
484 }
485 }
486 }
487 }
488
489 return info;
490}
491
492} // namespace cuda
493} // namespace mx
494#endif // math_syevdxT_hpp
MXLAPACK_INT calcEigenVecs(eigenT &evecs, eigenT &evals, eigenT &cv, int nVecs=0, bool normalize=false, bool check=false, syevrMem< _evCalcT > *mem=0, double *t_eigenv=nullptr)
Calculate the eigenvectors and eigenvalues given a triangular matrix.
The mxlib c++ namespace.
Definition mxError.hpp:106