27#ifndef math_syevdxT_hpp
28#define math_syevdxT_hpp
39#include <cuda_runtime.h>
40#include <cusolverDn.h>
49#include "../../mxException.hpp"
56#define MXCUDA_EXCEPTION( ec, explan ) \
57 std::string msg = "Cuda Error: ("; \
58 msg += cudaGetErrorName( ec ); \
60 msg += cudaGetErrorString( ec ); \
61 msg += "\nContext: " explan; \
62 msg += ".\nAt line " + std::to_string( __LINE__ ); \
63 msg += " in " __FILE__; \
64 throw std::runtime_error( msg );
66#define MXCUSOLVER_EXCEPTION( ec, explan ) \
67 std::string msg = "cusolver Error: ("; \
68 msg += std::to_string( static_cast<int>( ec ) ); \
70 msg += "\nContext: " explan; \
71 msg += ".\nAt line " + std::to_string( __LINE__ ); \
72 msg += " in " __FILE__; \
73 throw std::runtime_error( msg );
75template <
typename floatT>
76struct cusolver_traits;
79struct cusolver_traits<float>
81 static constexpr cudaDataType cuda_data_type = CUDA_R_32F;
85struct cusolver_traits<double>
87 static constexpr cudaDataType cuda_data_type = CUDA_R_64F;
90template <
typename floatT>
94 cusolverDnHandle_t m_cusolverH{
nullptr };
95 cusolverDnParams_t m_params{
nullptr };
96 cudaStream_t m_stream{
nullptr };
98 int m_allocations{ 0 };
100 floatT *m_dev_A{
nullptr };
101 floatT *m_dev_W{
nullptr };
102 int *m_dev_info{
nullptr };
104 cusolverEigRange_t m_range;
105 cublasFillMode_t m_uplo;
114 void *m_dev_work{
nullptr };
115 size_t m_dev_wsBytes{ 0 };
116 void *m_host_work{
nullptr };
117 size_t m_host_wsBytes{ 0 };
121 syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream =
nullptr );
125 void free(
bool null =
true );
127 void setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream =
nullptr );
129 void allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER );
131 int execute( int64_t &numEig, floatT *A );
133 template <
typename eigenT>
138 bool normalize =
false,
145template <
typename floatT>
146syevdxT<floatT>::syevdxT()
150template <
typename floatT>
151syevdxT<floatT>::syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
152 : m_cusolverH{ cusolverH }, m_params{ params }, m_stream{ stream }
156template <
typename floatT>
157syevdxT<floatT>::~syevdxT()
162template <
typename floatT>
163void syevdxT<floatT>::free(
bool null )
165 if( m_dev_A !=
nullptr )
167 cudaError_t ec = cudaFree( m_dev_A );
168 if( ec != cudaSuccess )
170 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for matrix" );
174 if( m_dev_W !=
nullptr )
176 cudaError_t ec = cudaFree( m_dev_W );
177 if( ec != cudaSuccess )
179 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for eigen values" );
183 if( m_dev_info !=
nullptr )
185 cudaError_t ec = cudaFree( m_dev_info );
186 if( ec != cudaSuccess )
188 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for info" );
192 if( m_dev_work !=
nullptr )
194 cudaError_t ec = cudaFree( m_dev_work );
195 if( ec != cudaSuccess )
197 MXCUDA_EXCEPTION( ec,
"syevdxT::free: free-ing device memory for work" );
201 if( m_host_work !=
nullptr )
203 ::free( m_host_work );
210 m_dev_info =
nullptr;
211 m_dev_work =
nullptr;
212 m_host_work =
nullptr;
216template <
typename floatT>
217void syevdxT<floatT>::setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
219 m_cusolverH = cusolverH;
224template <
typename floatT>
225void syevdxT<floatT>::allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo )
228 if( m_cusolverH ==
nullptr )
230 throw std::runtime_error(
"syevdx::allocate: m_cusolverH is null" );
233 if( m_params ==
nullptr )
235 throw std::runtime_error(
"syevdx::allocate: m_params is null" );
238 cusolverEigRange_t range;
240 if( nVecs == 0 || nVecs == n )
242 range = CUSOLVER_EIG_RANGE_ALL;
248 range = CUSOLVER_EIG_RANGE_I;
257 if( n == m_n && range == m_range && il == m_il && iu == m_iu && uplo == m_uplo && m_dev_A && m_dev_W &&
258 m_dev_info && m_dev_work )
274 cudaError_t ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_A ),
sizeof( floatT ) * ( n * n ) );
275 if( ec != cudaSuccess )
277 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for matrix" );
280 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_W ),
sizeof( floatT ) * n );
281 if( ec != cudaSuccess )
283 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for eigen values" );
286 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_info ),
sizeof(
int ) );
287 if( ec != cudaSuccess )
289 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for info" );
294 cusolverStatus_t csec = cusolverDnXsyevdx_bufferSize( m_cusolverH,
296 CUSOLVER_EIG_MODE_VECTOR,
300 cusolver_traits<floatT>::cuda_data_type,
308 cusolver_traits<floatT>::cuda_data_type,
310 cusolver_traits<floatT>::cuda_data_type,
313 if( csec != CUSOLVER_STATUS_SUCCESS )
315 MXCUSOLVER_EXCEPTION( csec,
"syevdxT::allocate: call to cusolverDnXsyevdx_bufferSize" );
318 ec = cudaMalloc(
reinterpret_cast<void **
>( &m_dev_work ), m_dev_wsBytes );
319 if( ec != cudaSuccess )
321 MXCUDA_EXCEPTION( ec,
"syevdxT::allocate: allocating device memory for work" );
324 if( m_host_wsBytes > 0 )
326 m_host_work =
reinterpret_cast<void *
>( malloc( m_host_wsBytes ) );
327 if( m_host_work ==
nullptr )
329 mxThrowException( err::allocerr,
"syevdxT::allocate:",
"allocating host memory for work" );
334 m_host_work =
nullptr;
340template <
typename floatT>
341int syevdxT<floatT>::execute( int64_t &numEig, floatT *A )
343 if( m_cusolverH ==
nullptr )
345 throw std::runtime_error(
"syevdx::execute: m_cusolverH is null" );
348 if( m_params ==
nullptr )
350 throw std::runtime_error(
"syevdx::execute: m_params is null" );
353 cudaError_t ec = cudaMemcpyAsync( m_dev_A, A,
sizeof( floatT ) * m_n * m_n, cudaMemcpyHostToDevice, m_stream );
354 if( ec != cudaSuccess )
356 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: copying matrix to device" );
359 cusolverStatus_t csec = cusolverDnXsyevdx( m_cusolverH,
361 CUSOLVER_EIG_MODE_VECTOR,
365 cusolver_traits<floatT>::cuda_data_type,
373 cusolver_traits<floatT>::cuda_data_type,
375 cusolver_traits<floatT>::cuda_data_type,
382 if( csec != CUSOLVER_STATUS_SUCCESS )
384 MXCUSOLVER_EXCEPTION( csec,
"syevdxT::execute: call to cusolverDnXsyevdx" );
388 ec = cudaMemcpyAsync( &info, m_dev_info,
sizeof(
int ), cudaMemcpyDeviceToHost, m_stream );
389 if( ec != cudaSuccess )
391 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: copying info from device" );
394 ec = cudaStreamSynchronize( m_stream );
395 if( ec != cudaSuccess )
397 MXCUDA_EXCEPTION( ec,
"syevdxT::execute: synchronizing" );
403template <
typename floatT>
404template <
typename eigenT>
405int syevdxT<floatT>::calcEigenVecs( eigenT &evecs, eigenT &evals, eigenT &cv,
bool normalize,
bool check )
407 if( m_cusolverH ==
nullptr )
409 throw std::runtime_error(
"syevdx::calcEigenVecs: m_cusolverH is null" );
412 if( m_params ==
nullptr )
414 throw std::runtime_error(
"syevdx::calcEigenVecs: m_params is null" );
419 int info = execute( nVecs, cv.data() );
421 evecs.resize( m_n, nVecs );
423 cudaMemcpyAsync( evecs.data(), m_dev_A,
sizeof( floatT ) * m_n * nVecs, cudaMemcpyDeviceToHost, m_stream );
424 if( ec != cudaSuccess )
426 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: copying eigenvectors from device" );
428 evals.resize( 1, nVecs );
429 ec = cudaMemcpyAsync( evals.data(), m_dev_W,
sizeof( floatT ) * nVecs, cudaMemcpyDeviceToHost, m_stream );
430 if( ec != cudaSuccess )
432 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: copying eigenvalues from device" );
435 ec = cudaStreamSynchronize( m_stream );
436 if( ec != cudaSuccess )
438 MXCUDA_EXCEPTION( ec,
"syevdxT::calcEigenVecs: synchronizing" );
446 for(
int i = 0; i < nVecs; ++i )
448 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
453 for(
int i = 0; i < nVecs; ++i )
455 if( evals( i ) == 0 )
457 std::cerr <<
"got 0 eigenvalue (# " << i <<
")\n";
460 else if( evals( i ) < 0 )
462 std::cerr <<
"got < 0 eigenvalue (# " << i <<
")\n";
465 else if( !std::isfinite( evals( i ) ) )
467 std::cerr <<
"got not-normal eigenvalue (# " << i <<
")\n";
472 evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
475 for(
int r = 0; r < evecs.rows(); ++r )
477 if( !std::isfinite( evecs.col( i )( r ) ) )
479 std::cerr <<
"got not-normal eigenvector entry (# " << i <<
"," << r
480 <<
") = " << evecs.col( i )( r ) <<
"\n";
MXLAPACK_INT calcEigenVecs(eigenT &evecs, eigenT &evals, eigenT &cv, int nVecs=0, bool normalize=false, bool check=false, syevrMem< _evCalcT > *mem=0, double *t_eigenv=nullptr)
Calculate the eigenvectors and eigenvalues given a triangular matrix.