27 #ifndef math_templateCublas_hpp
28 #define math_templateCublas_hpp
30 #include <cuda_runtime.h>
31 #include <cublas_v2.h>
52 template<
typename floatT>
74 template<
typename floatT>
107 template<
typename dataT1,
typename dataT2>
130 template<
typename floatT>
132 cublasOperation_t trans,
160 template<
typename floatT>
162 cublasOperation_t trans,
173 cublasStatus_t cublasTgemv<float>( cublasHandle_t handle,
174 cublasOperation_t trans,
188 cublasStatus_t cublasTgemv<double>( cublasHandle_t handle,
189 cublasOperation_t trans,
203 cublasStatus_t cublasTgemv<float>( cublasHandle_t handle,
204 cublasOperation_t trans,
215 cublasStatus_t cublasTgemv<double>( cublasHandle_t handle,
216 cublasOperation_t trans,
cublasStatus_t cublasTgemv(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const floatT *alpha, const floatT *A, const floatT *x, const floatT *beta, floatT *y)
Perform a matrix-vector multiplication for stride-less arrays.
cudaError_t elementwiseXxY(dataT1 *x, dataT2 *y, int size)
Calculates the element-wise product of two vectors, storing the result in the first.
cublasStatus_t cublasTaxpy(cublasHandle_t handle, int n, const floatT *alpha, const floatT *x, int incx, floatT *y, int incy)
Multiplies a vector by a scalar, adding it to a second vector which is overwritten by the result.
cublasStatus_t cublasTscal(cublasHandle_t handle, int n, const floatT *alpha, floatT *x, int incx)
Multiplies a vector by a scalar, overwriting the vector with the result.