27#ifndef math_syevdxT_hpp 
   28#define math_syevdxT_hpp 
   39#include <cuda_runtime.h> 
   40#include <cusolverDn.h> 
   49#include "../../mxException.hpp" 
   56#define MXCUDA_EXCEPTION( ec, explan )                                                                                 \ 
   57    std::string msg = "Cuda Error: (";                                                                                 \ 
   58    msg += cudaGetErrorName( ec );                                                                                     \ 
   60    msg += cudaGetErrorString( ec );                                                                                   \ 
   61    msg += "\nContext: " explan;                                                                                       \ 
   62    msg += ".\nAt line " + std::to_string( __LINE__ );                                                                 \ 
   63    msg += " in " __FILE__;                                                                                            \ 
   64    throw std::runtime_error( msg ); 
   66#define MXCUSOLVER_EXCEPTION( ec, explan )                                                                             \ 
   67    std::string msg = "cusolver Error: (";                                                                             \ 
   68    msg += std::to_string( static_cast<int>( ec ) );                                                                   \ 
   70    msg += "\nContext: " explan;                                                                                       \ 
   71    msg += ".\nAt line " + std::to_string( __LINE__ );                                                                 \ 
   72    msg += " in " __FILE__;                                                                                            \ 
   73    throw std::runtime_error( msg ); 
   75template <
typename floatT>
 
   76struct cusolver_traits;
 
   79struct cusolver_traits<float>
 
   81    static constexpr cudaDataType cuda_data_type = CUDA_R_32F;
 
   85struct cusolver_traits<double>
 
   87    static constexpr cudaDataType cuda_data_type = CUDA_R_64F;
 
   90template <
typename floatT>
 
   94    cusolverDnHandle_t m_cusolverH{ 
nullptr };
 
   95    cusolverDnParams_t m_params{ 
nullptr };
 
   96    cudaStream_t m_stream{ 
nullptr };
 
   98    int m_allocations{ 0 };
 
  100    floatT *m_dev_A{ 
nullptr };
 
  101    floatT *m_dev_W{ 
nullptr };
 
  102    int *m_dev_info{ 
nullptr };
 
  104    cusolverEigRange_t m_range;
 
  105    cublasFillMode_t m_uplo;
 
  114    void *m_dev_work{ 
nullptr };  
 
  115    size_t m_dev_wsBytes{ 0 };
 
  116    void *m_host_work{ 
nullptr }; 
 
  117    size_t m_host_wsBytes{ 0 };
 
  121    syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = 
nullptr );
 
  125    void free( 
bool null = 
true );
 
  127    void setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream = 
nullptr );
 
  129    void allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER );
 
  131    int execute( int64_t &numEig, floatT *A );
 
  133    template <
typename eigenT>
 
  138                      bool normalize = 
false, 
 
  145template <
typename floatT>
 
  146syevdxT<floatT>::syevdxT()
 
  150template <
typename floatT>
 
  151syevdxT<floatT>::syevdxT( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
 
  152    : m_cusolverH{ cusolverH }, m_params{ params }, m_stream{ stream }
 
  156template <
typename floatT>
 
  157syevdxT<floatT>::~syevdxT()
 
  162template <
typename floatT>
 
  163void syevdxT<floatT>::free( 
bool null )
 
  165    if( m_dev_A != 
nullptr )
 
  167        cudaError_t ec = cudaFree( m_dev_A );
 
  168        if( ec != cudaSuccess )
 
  170            MXCUDA_EXCEPTION( ec, 
"syevdxT::free: free-ing device memory for matrix" );
 
  174    if( m_dev_W != 
nullptr )
 
  176        cudaError_t ec = cudaFree( m_dev_W );
 
  177        if( ec != cudaSuccess )
 
  179            MXCUDA_EXCEPTION( ec, 
"syevdxT::free: free-ing device memory for eigen values" );
 
  183    if( m_dev_info != 
nullptr )
 
  185        cudaError_t ec = cudaFree( m_dev_info );
 
  186        if( ec != cudaSuccess )
 
  188            MXCUDA_EXCEPTION( ec, 
"syevdxT::free: free-ing device memory for info" );
 
  192    if( m_dev_work != 
nullptr )
 
  194        cudaError_t ec = cudaFree( m_dev_work );
 
  195        if( ec != cudaSuccess )
 
  197            MXCUDA_EXCEPTION( ec, 
"syevdxT::free: free-ing device memory for work" );
 
  201    if( m_host_work != 
nullptr )
 
  203        ::free( m_host_work );
 
  210        m_dev_info = 
nullptr;
 
  211        m_dev_work = 
nullptr;
 
  212        m_host_work = 
nullptr;
 
  216template <
typename floatT>
 
  217void syevdxT<floatT>::setup( cusolverDnHandle_t cusolverH, cusolverDnParams_t params, cudaStream_t stream )
 
  219    m_cusolverH = cusolverH;
 
  224template <
typename floatT>
 
  225void syevdxT<floatT>::allocate( int64_t n, int64_t nVecs, cublasFillMode_t uplo )
 
  228    if( m_cusolverH == 
nullptr )
 
  230        throw std::runtime_error( 
"syevdx::allocate: m_cusolverH is null" );
 
  233    if( m_params == 
nullptr )
 
  235        throw std::runtime_error( 
"syevdx::allocate: m_params is null" );
 
  238    cusolverEigRange_t range;
 
  240    if( nVecs == 0 || nVecs == n )
 
  242        range = CUSOLVER_EIG_RANGE_ALL;
 
  248        range = CUSOLVER_EIG_RANGE_I;
 
  257    if( n == m_n && range == m_range && il == m_il && iu == m_iu && uplo == m_uplo && m_dev_A && m_dev_W &&
 
  258        m_dev_info && m_dev_work )
 
  274    cudaError_t ec = cudaMalloc( 
reinterpret_cast<void **
>( &m_dev_A ), 
sizeof( floatT ) * ( n * n ) );
 
  275    if( ec != cudaSuccess )
 
  277        MXCUDA_EXCEPTION( ec, 
"syevdxT::allocate: allocating device memory for matrix" );
 
  280    ec = cudaMalloc( 
reinterpret_cast<void **
>( &m_dev_W ), 
sizeof( floatT ) * n );
 
  281    if( ec != cudaSuccess )
 
  283        MXCUDA_EXCEPTION( ec, 
"syevdxT::allocate: allocating device memory for eigen values" );
 
  286    ec = cudaMalloc( 
reinterpret_cast<void **
>( &m_dev_info ), 
sizeof( 
int ) );
 
  287    if( ec != cudaSuccess )
 
  289        MXCUDA_EXCEPTION( ec, 
"syevdxT::allocate: allocating device memory for info" );
 
  294    cusolverStatus_t csec = cusolverDnXsyevdx_bufferSize( m_cusolverH,
 
  296                                                          CUSOLVER_EIG_MODE_VECTOR,
 
  300                                                          cusolver_traits<floatT>::cuda_data_type,
 
  308                                                          cusolver_traits<floatT>::cuda_data_type,
 
  310                                                          cusolver_traits<floatT>::cuda_data_type,
 
  313    if( csec != CUSOLVER_STATUS_SUCCESS )
 
  315        MXCUSOLVER_EXCEPTION( csec, 
"syevdxT::allocate: call to cusolverDnXsyevdx_bufferSize" );
 
  318    ec = cudaMalloc( 
reinterpret_cast<void **
>( &m_dev_work ), m_dev_wsBytes );
 
  319    if( ec != cudaSuccess )
 
  321        MXCUDA_EXCEPTION( ec, 
"syevdxT::allocate: allocating device memory for work" );
 
  324    if( m_host_wsBytes > 0 )
 
  326        m_host_work = 
reinterpret_cast<void *
>( malloc( m_host_wsBytes ) );
 
  327        if( m_host_work == 
nullptr )
 
  329            mxThrowException( err::allocerr, 
"syevdxT::allocate:", 
"allocating host memory for work" );
 
  334        m_host_work = 
nullptr; 
 
  340template <
typename floatT>
 
  341int syevdxT<floatT>::execute( int64_t &numEig, floatT *A )
 
  343    if( m_cusolverH == 
nullptr )
 
  345        throw std::runtime_error( 
"syevdx::execute: m_cusolverH is null" );
 
  348    if( m_params == 
nullptr )
 
  350        throw std::runtime_error( 
"syevdx::execute: m_params is null" );
 
  353    cudaError_t ec = cudaMemcpyAsync( m_dev_A, A, 
sizeof( floatT ) * m_n * m_n, cudaMemcpyHostToDevice, m_stream );
 
  354    if( ec != cudaSuccess )
 
  356        MXCUDA_EXCEPTION( ec, 
"syevdxT::execute: copying matrix to device" );
 
  359    cusolverStatus_t csec = cusolverDnXsyevdx( m_cusolverH,
 
  361                                               CUSOLVER_EIG_MODE_VECTOR,
 
  365                                               cusolver_traits<floatT>::cuda_data_type,
 
  373                                               cusolver_traits<floatT>::cuda_data_type,
 
  375                                               cusolver_traits<floatT>::cuda_data_type,
 
  382    if( csec != CUSOLVER_STATUS_SUCCESS )
 
  384        MXCUSOLVER_EXCEPTION( csec, 
"syevdxT::execute: call to cusolverDnXsyevdx" );
 
  388    ec = cudaMemcpyAsync( &info, m_dev_info, 
sizeof( 
int ), cudaMemcpyDeviceToHost, m_stream );
 
  389    if( ec != cudaSuccess )
 
  391        MXCUDA_EXCEPTION( ec, 
"syevdxT::execute: copying info from device" );
 
  394    ec = cudaStreamSynchronize( m_stream );
 
  395    if( ec != cudaSuccess )
 
  397        MXCUDA_EXCEPTION( ec, 
"syevdxT::execute: synchronizing" );
 
  403template <
typename floatT>
 
  404template <
typename eigenT>
 
  405int syevdxT<floatT>::calcEigenVecs( eigenT &evecs, eigenT &evals, eigenT &cv, 
bool normalize, 
bool check )
 
  407    if( m_cusolverH == 
nullptr )
 
  409        throw std::runtime_error( 
"syevdx::calcEigenVecs: m_cusolverH is null" );
 
  412    if( m_params == 
nullptr )
 
  414        throw std::runtime_error( 
"syevdx::calcEigenVecs: m_params is null" );
 
  419    int info = execute( nVecs, cv.data() );
 
  421    evecs.resize( m_n, nVecs );
 
  423        cudaMemcpyAsync( evecs.data(), m_dev_A, 
sizeof( floatT ) * m_n * nVecs, cudaMemcpyDeviceToHost, m_stream );
 
  424    if( ec != cudaSuccess )
 
  426        MXCUDA_EXCEPTION( ec, 
"syevdxT::calcEigenVecs: copying eigenvectors from device" );
 
  428    evals.resize( 1, nVecs );
 
  429    ec = cudaMemcpyAsync( evals.data(), m_dev_W, 
sizeof( floatT ) * nVecs, cudaMemcpyDeviceToHost, m_stream );
 
  430    if( ec != cudaSuccess )
 
  432        MXCUDA_EXCEPTION( ec, 
"syevdxT::calcEigenVecs: copying eigenvalues from device" );
 
  435    ec = cudaStreamSynchronize( m_stream );
 
  436    if( ec != cudaSuccess )
 
  438        MXCUDA_EXCEPTION( ec, 
"syevdxT::calcEigenVecs: synchronizing" );
 
  446            for( 
int i = 0; i < nVecs; ++i )
 
  448                evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
 
  453            for( 
int i = 0; i < nVecs; ++i )
 
  455                if( evals( i ) == 0 )
 
  457                    std::cerr << 
"got 0 eigenvalue (# " << i << 
")\n";
 
  460                else if( evals( i ) < 0 )
 
  462                    std::cerr << 
"got < 0 eigenvalue (# " << i << 
")\n";
 
  465                else if( !std::isfinite( evals( i ) ) )
 
  467                    std::cerr << 
"got not-normal eigenvalue (# " << i << 
")\n";
 
  472                    evecs.col( i ) = evecs.col( i ) / sqrt( evals( i ) );
 
  475                for( 
int r = 0; r < evecs.rows(); ++r )
 
  477                    if( !std::isfinite( evecs.col( i )( r ) ) )
 
  479                        std::cerr << 
"got not-normal eigenvector entry (# " << i << 
"," << r
 
  480                                  << 
") = " << evecs.col( i )( r ) << 
"\n";
 
MXLAPACK_INT calcEigenVecs(eigenT &evecs, eigenT &evals, eigenT &cv, int nVecs=0, bool normalize=false, bool check=false, syevrMem< _evCalcT > *mem=0, double *t_eigenv=nullptr)
Calculate the eigenvectors and eigenvalues given a triangular matrix.
#define mxThrowException(extype, src, expl)
Throw an exception. This macro takes care of the file and line.