27#ifndef math_syevdxT_hpp
28#define math_syevdxT_hpp
41#include <cuda_runtime.h>
42#include <cusolverDn.h>
57#define MXCUDA_EXCEPTION( ec, explan ) \
58 std::string msg = "Cuda Error: ("; \
59 msg += cudaGetErrorName( ec ); \
61 msg += cudaGetErrorString( ec ); \
62 msg += "\nContext: " explan; \
63 msg += ".\nAt line " + std::to_string( __LINE__ ); \
64 msg += " in " __FILE__; \
65 throw std::runtime_error( msg );
67#define MXCUSOLVER_EXCEPTION( ec, explan ) \
68 std::string msg = "cusolver Error: ("; \
69 msg += std::to_string( static_cast<int>( ec ) ); \
71 msg += "\nContext: " explan; \
72 msg += ".\nAt line " + std::to_string( __LINE__ ); \
73 msg += " in " __FILE__; \
74 throw std::runtime_error( msg );
76template <
typename floatT>
77struct cusolver_traits;
80struct cusolver_traits<float>
82 static constexpr cudaDataType cuda_data_type = CUDA_R_32F;
86struct cusolver_traits<double>
88 static constexpr cudaDataType cuda_data_type = CUDA_R_64F;
91template <
typename floatT>
95 cusolverDnHandle_t m_cusolverH{
nullptr };
96 cusolverDnParams_t m_params{
nullptr };
97 cudaStream_t m_stream{
nullptr };
99 int m_allocations{ 0 };
101 floatT *m_dev_A{
nullptr };
102 floatT *m_dev_W{
nullptr };
103 int *m_dev_info{
nullptr };
105 cusolverEigRange_t m_range;
106 cublasFillMode_t m_uplo;
115 void *m_dev_work{
nullptr };
116 size_t m_dev_wsBytes{ 0 };
117 void *m_host_work{
nullptr };
118 size_t m_host_wsBytes{ 0 };
122 syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream =
nullptr );
126 void free(
bool null =
true );
128 void setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream =
nullptr );
130 void allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER );
132 int execute( int64_t &numEig, floatT *A );
134 template <
typename eigenT>
139 bool normalize =
false,
146template <
typename floatT>
147syevdxT<floatT>::syevdxT()
151template <
typename floatT>
152syevdxT<floatT>::syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
153 : m_cusolverH{ cusolverH }, m_params{ params }, m_stream{ stream }
157template <
typename floatT>
158syevdxT<floatT>::~syevdxT()
163template <
typename floatT>
164void syevdxT<floatT>::free(
bool null )
166 if( m_dev_A !=
nullptr )
168 cudaError_t ec = cudaFree( m_dev_A );
169 if( ec != cudaSuccess )
171 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for matrix" );
175 if( m_dev_W !=
nullptr )
177 cudaError_t ec = cudaFree( m_dev_W );
178 if( ec != cudaSuccess )
180 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for eigen values" );
184 if( m_dev_info !=
nullptr )
186 cudaError_t ec = cudaFree( m_dev_info );
187 if( ec != cudaSuccess )
189 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for info" );
193 if( m_dev_work !=
nullptr )
195 cudaError_t ec = cudaFree( m_dev_work );
196 if( ec != cudaSuccess )
198 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for work" );
202 if( m_host_work !=
nullptr )
204 ::free( m_host_work );
211 m_dev_info =
nullptr;
212 m_dev_work =
nullptr;
213 m_host_work =
nullptr;
217template <
typename floatT>
218void syevdxT<floatT>::setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
220 m_cusolverH = cusolverH;
225template <
typename floatT>
226void syevdxT<floatT>::allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo )
229 if( m_cusolverH ==
nullptr )
231 throw std::runtime_error(
"syevdx::allocate: m_cusolverH is null" );
234 if( m_params ==
nullptr )
236 throw std::runtime_error(
"syevdx::allocate: m_params is null" );
239 cusolverEigRange_t range;
241 if( nVecs == 0 || nVecs == n )
243 range = CUSOLVER_EIG_RANGE_ALL;
249 range = CUSOLVER_EIG_RANGE_I;
258 if( n == m_n && range == m_range && il == m_il && iu == m_iu && uplo == m_uplo && m_dev_A && m_dev_W &&
259 m_dev_info && m_dev_work )
275 cudaError_t ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_A ),
sizeof( floatT ) * ( n * n ) );
276 if( ec != cudaSuccess )
278 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for matrix" );
281 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_W ),
sizeof( floatT ) * n );
282 if( ec != cudaSuccess )
284 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for eigen values" );
287 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_info ),
sizeof(
int ) );
288 if( ec != cudaSuccess )
290 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for info" );
295 cusolverStatus_t csec = cusolverDnXsyevdx_bufferSize( m_cusolverH,
297 CUSOLVER_EIG_MODE_VECTOR,
301 cusolver_traits<floatT>::cuda_data_type,
309 cusolver_traits<floatT>::cuda_data_type,
311 cusolver_traits<floatT>::cuda_data_type,
314 if( csec != CUSOLVER_STATUS_SUCCESS )
316 MXCUSOLVER_EXCEPTION( csec,
"syevdxT::allocate: call to cusolverDnXsyevdx_bufferSize" );
319 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_work ), m_dev_wsBytes );
320 if( ec != cudaSuccess )
322 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for work" );
325 if( m_host_wsBytes > 0 )
327 m_host_work =
reinterpret_cast<void *
>( malloc( m_host_wsBytes ) );
328 if( m_host_work ==
nullptr )
330 throw std::runtime_error(
"syevdxT::allocate: allocating host memory for work" );
336 m_host_work =
nullptr;
342template <
typename floatT>
343int syevdxT<floatT>::execute( int64_t &numEig, floatT *A )
345 if( m_cusolverH ==
nullptr )
347 throw std::runtime_error(
"syevdx::execute: m_cusolverH is null" );
350 if( m_params ==
nullptr )
352 throw std::runtime_error(
"syevdx::execute: m_params is null" );
355 cudaError_t ec = cudaMemcpyAsync( m_dev_A, A,
sizeof( floatT ) * m_n * m_n, cudaMemcpyHostToDevice, m_stream );
356 if( ec != cudaSuccess )
358 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: copying matrix to device" );
361 cusolverStatus_t csec = cusolverDnXsyevdx( m_cusolverH,
363 CUSOLVER_EIG_MODE_VECTOR,
367 cusolver_traits<floatT>::cuda_data_type,
375 cusolver_traits<floatT>::cuda_data_type,
377 cusolver_traits<floatT>::cuda_data_type,
384 if( csec != CUSOLVER_STATUS_SUCCESS )
386 MXCUSOLVER_EXCEPTION( csec,
"syevdxT::execute: call to cusolverDnXsyevdx" );
390 ec = cudaMemcpyAsync( &info, m_dev_info,
sizeof(
int ), cudaMemcpyDeviceToHost, m_stream );
391 if( ec != cudaSuccess )
393 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: copying info from device" );
396 ec = cudaStreamSynchronize( m_stream );
397 if( ec != cudaSuccess )
399 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: synchronizing" );
405template <
typename floatT>
406template <
typename eigenT>
407int syevdxT<floatT>::calcEigenVecs( eigenT &evecs, eigenT &evals, eigenT &cv,
bool normalize,
bool check )
409 if( m_cusolverH ==
nullptr )
411 throw std::runtime_error(
"syevdx::calcEigenVecs: m_cusolverH is null" );
414 if( m_params ==
nullptr )
416 throw std::runtime_error(
"syevdx::calcEigenVecs: m_params is null" );
421 int info = execute( nVecs, cv.data() );
423 evecs.resize( m_n, nVecs );
425 cudaMemcpyAsync( evecs.data(), m_dev_A,
sizeof( floatT ) * m_n * nVecs, cudaMemcpyDeviceToHost, m_stream );
426 if( ec != cudaSuccess )
428 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: copying eigenvectors from device" );
430 evals.resize( 1, nVecs );
431 ec = cudaMemcpyAsync( evals.data(), m_dev_W,
sizeof( floatT ) * nVecs, cudaMemcpyDeviceToHost, m_stream );
432 if( ec != cudaSuccess )
434 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: copying eigenvalues from device" );
437 ec = cudaStreamSynchronize( m_stream );
438 if( ec != cudaSuccess )
440 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: synchronizing" );
448 for(
int i = 0; i < nVecs; ++i )
450 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
455 for(
int i = 0; i < nVecs; ++i )
457 if( evals( i ) == 0 )
459 std::cerr <<
"got 0 eigenvalue (# " << i <<
")\n";
462 else if( evals( i ) < 0 )
464 std::cerr <<
"got < 0 eigenvalue (# " << i <<
")\n";
467 else if( !std::isfinite( evals( i ) ) )
469 std::cerr <<
"got not-normal eigenvalue (# " << i <<
")\n";
474 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
477 for(
int r = 0; r < evecs.rows(); ++r )
479 if( !std::isfinite( evecs.col( i )( r ) ) )
481 std::cerr <<
"got not-normal eigenvector entry (# " << i <<
"," << r
482 <<
") = " << evecs.col( i )( r ) <<
"\n";
MXLAPACK_INT calcEigenVecs(eigenT &evecs, eigenT &evals, eigenT &cv, int nVecs=0, bool normalize=false, bool check=false, syevrMem< _evCalcT > *mem=0, double *t_eigenv=nullptr)
Calculate the eigenvectors and eigenvalues given a triangular matrix.