3 #include "../../../catch2/catch.hpp"
6 #include "../../../../include/math/cuda/cudaPtr.hpp"
7 #include "../../../../include/math/cuda/templateCublas.hpp"
14 SCENARIO(
"scaling a vector with cublas",
"[math::cuda::templateCublas]" )
18 WHEN(
"type is single precision real")
20 std::vector<float> hx;
25 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
27 dx.
upload(hx.data(), hx.size());
28 REQUIRE(dx.size() == hx.size());
30 cublasHandle_t handle;
32 stat = cublasCreate(&handle);
33 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
36 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
46 stat = cublasDestroy(handle);
48 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
51 WHEN(
"type is double precision real")
53 std::vector<double> hx;
58 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
60 dx.
upload(hx.data(), hx.size());
61 REQUIRE(dx.size() == hx.size());
63 cublasHandle_t handle;
65 stat = cublasCreate(&handle);
66 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
69 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
79 stat = cublasDestroy(handle);
81 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
84 WHEN(
"type is single precision complex")
86 std::vector<std::complex<float>> hx;
88 std::complex<float> alpha = 2;
91 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
93 dx.
upload(hx.data(), hx.size());
94 REQUIRE(dx.size() == hx.size());
96 cublasHandle_t handle;
98 stat = cublasCreate(&handle);
99 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
102 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
106 REQUIRE(hx[0] == std::complex<float>(0,0));
107 REQUIRE(hx[1] == std::complex<float>(2,2));
108 REQUIRE(hx[2] == std::complex<float>(4,4));
109 REQUIRE(hx[3] == std::complex<float>(6,6));
110 REQUIRE(hx[4] == std::complex<float>(8,8));
112 stat = cublasDestroy(handle);
114 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
117 WHEN(
"type is double precision complex")
119 std::vector<std::complex<double>> hx;
121 std::complex<double> alpha = 2;
124 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
126 dx.
upload(hx.data(), hx.size());
127 REQUIRE(dx.size() == hx.size());
129 cublasHandle_t handle;
131 stat = cublasCreate(&handle);
132 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
135 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
139 REQUIRE(hx[0] == std::complex<double>(0,0));
140 REQUIRE(hx[1] == std::complex<double>(2,2));
141 REQUIRE(hx[2] == std::complex<double>(4,4));
142 REQUIRE(hx[3] == std::complex<double>(6,6));
143 REQUIRE(hx[4] == std::complex<double>(8,8));
145 stat = cublasDestroy(handle);
147 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
157 SCENARIO(
"scaling and accumulating a vector with cublas",
"[math::cuda::templateCublas]" )
161 WHEN(
"type is single precision real")
163 std::vector<float> hx, hy;
168 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
171 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 1;
173 dx.
upload(hx.data(), hx.size());
174 REQUIRE(dx.size() == hx.size());
176 dy.
upload(hy.data(), hy.size());
177 REQUIRE(dy.size() == hy.size());
179 cublasHandle_t handle;
181 stat = cublasCreate(&handle);
182 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
185 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
195 stat = cublasDestroy(handle);
197 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
200 WHEN(
"type is double precision real")
202 std::vector<double> hx, hy;
207 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
210 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 1;
212 dx.
upload(hx.data(), hx.size());
213 REQUIRE(dx.size() == hx.size());
215 dy.
upload(hy.data(), hy.size());
216 REQUIRE(dy.size() == hy.size());
218 cublasHandle_t handle;
220 stat = cublasCreate(&handle);
221 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
224 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
234 stat = cublasDestroy(handle);
236 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
239 WHEN(
"type is single precision complex")
241 std::vector<std::complex<float>> hx, hy;
243 std::complex<float> alpha = 2;
246 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
249 for(
size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<float>(1,1);
251 dx.
upload(hx.data(), hx.size());
252 REQUIRE(dx.size() == hx.size());
254 dy.
upload(hy.data(), hy.size());
255 REQUIRE(dy.size() == hy.size());
257 cublasHandle_t handle;
259 stat = cublasCreate(&handle);
260 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
263 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
267 REQUIRE(hy[0] == std::complex<float>(1,1));
268 REQUIRE(hy[1] == std::complex<float>(3,3));
269 REQUIRE(hy[2] == std::complex<float>(5,5));
270 REQUIRE(hy[3] == std::complex<float>(7,7));
271 REQUIRE(hy[4] == std::complex<float>(9,9));
273 stat = cublasDestroy(handle);
275 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
278 WHEN(
"type is double precision complex")
280 std::vector<std::complex<double>> hx, hy;
282 std::complex<double> alpha = 2;
285 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
288 for(
size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<double>(1,1);
290 dx.
upload(hx.data(), hx.size());
291 REQUIRE(dx.size() == hx.size());
293 dy.
upload(hy.data(), hy.size());
294 REQUIRE(dy.size() == hy.size());
296 cublasHandle_t handle;
298 stat = cublasCreate(&handle);
299 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
302 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
306 REQUIRE(hy[0] == std::complex<double>(1,1));
307 REQUIRE(hy[1] == std::complex<double>(3,3));
308 REQUIRE(hy[2] == std::complex<double>(5,5));
309 REQUIRE(hy[3] == std::complex<double>(7,7));
310 REQUIRE(hy[4] == std::complex<double>(9,9));
312 stat = cublasDestroy(handle);
314 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
324 SCENARIO(
"multiplying two vectors element by element",
"[math::cuda::templateCublas]" )
328 WHEN(
"both types are single precision real")
330 std::vector<float> hx, hy;
334 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
337 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
339 dx.
upload(hx.data(), hx.size());
340 REQUIRE(dx.size() == hx.size());
344 REQUIRE(dy.size() == hy.size());
347 REQUIRE(rv == cudaSuccess);
354 REQUIRE(hx[3] == 18);
355 REQUIRE(hx[4] == 32);
359 WHEN(
"type1 is complex-float, and type2 is float")
361 std::vector<std::complex<float>> hx;
364 std::vector<float> hy;
368 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
371 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
373 dx.
upload(hx.data(), hx.size());
374 REQUIRE(dx.size() == hx.size());
378 REQUIRE(dy.size() == hy.size());
381 REQUIRE(rv == cudaSuccess);
385 REQUIRE(hx[0] == std::complex<float>(0,0));
386 REQUIRE(hx[1] == std::complex<float>(2,2));
387 REQUIRE(hx[2] == std::complex<float>(8,8));
388 REQUIRE(hx[3] == std::complex<float>(18,18));
389 REQUIRE(hx[4] == std::complex<float>(32,32));
393 WHEN(
"type1 is complex-float, and type2 is complex-float")
395 std::vector<std::complex<float>> hx;
398 std::vector<std::complex<float>> hy;
402 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
405 for(
size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<float>(0,2*n);
407 dx.
upload(hx.data(), hx.size());
408 REQUIRE(dx.size() == hx.size());
412 REQUIRE(dy.size() == hy.size());
415 REQUIRE(rv == cudaSuccess);
419 REQUIRE(hx[0] == std::complex<float>(0,0));
420 REQUIRE(hx[1] == std::complex<float>(-2,2));
421 REQUIRE(hx[2] == std::complex<float>(-8,8));
422 REQUIRE(hx[3] == std::complex<float>(-18,18));
423 REQUIRE(hx[4] == std::complex<float>(-32,32));
427 WHEN(
"both types are double precision real")
429 std::vector<double> hx, hy;
433 for(
size_t n=0; n < hx.size(); ++n) hx[n] = n;
436 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
438 dx.
upload(hx.data(), hx.size());
439 REQUIRE(dx.size() == hx.size());
443 REQUIRE(dy.size() == hy.size());
446 REQUIRE(rv == cudaSuccess);
453 REQUIRE(hx[3] == 18);
454 REQUIRE(hx[4] == 32);
458 WHEN(
"type1 is complex-double, and type2 is double")
460 std::vector<std::complex<double>> hx;
463 std::vector<double> hy;
467 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
470 for(
size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
472 dx.
upload(hx.data(), hx.size());
473 REQUIRE(dx.size() == hx.size());
477 REQUIRE(dy.size() == hy.size());
480 REQUIRE(rv == cudaSuccess);
484 REQUIRE(hx[0] == std::complex<double>(0,0));
485 REQUIRE(hx[1] == std::complex<double>(2,2));
486 REQUIRE(hx[2] == std::complex<double>(8,8));
487 REQUIRE(hx[3] == std::complex<double>(18,18));
488 REQUIRE(hx[4] == std::complex<double>(32,32));
492 WHEN(
"type1 is complex-double, and type2 is complex-double")
494 std::vector<std::complex<double>> hx;
497 std::vector<std::complex<double>> hy;
501 for(
size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
504 for(
size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<double>(1,2*n);
506 dx.
upload(hx.data(), hx.size());
507 REQUIRE(dx.size() == hx.size());
511 REQUIRE(dy.size() == hy.size());
514 REQUIRE(rv == cudaSuccess);
519 REQUIRE(hx[0] == std::complex<double>(0,0));
520 REQUIRE(hx[1] == std::complex<double>(-1,3));
521 REQUIRE(hx[2] == std::complex<double>(-6,10));
522 REQUIRE(hx[3] == std::complex<double>(-15,21));
523 REQUIRE(hx[4] == std::complex<double>(-28,36));
534 SCENARIO(
"multiplying a vector by a matrix giving increments",
"[math::cuda::templateCublas]" )
536 GIVEN(
"a 2x2 matrix, float")
538 WHEN(
"float precision, beta is 0")
540 std::vector<float> hA;
543 std::vector<float> hx;
546 std::vector<float> hy;
566 dx.
upload(hx.data(), hx.size());
576 cublasHandle_t handle;
578 stat = cublasCreate(&handle);
579 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
581 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
582 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
587 REQUIRE(hy[1] == 10);
589 stat = cublasDestroy(handle);
590 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
593 WHEN(
"float precision, beta is 1, but y is all 0")
595 std::vector<float> hA;
598 std::vector<float> hx;
601 std::vector<float> hy;
621 dx.
upload(hx.data(), hx.size());
631 cublasHandle_t handle;
633 stat = cublasCreate(&handle);
634 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
636 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
637 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
642 REQUIRE(hy[1] == 10);
644 stat = cublasDestroy(handle);
645 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
647 WHEN(
"float precision, beta is 1, y is [1,2]")
649 std::vector<float> hA;
652 std::vector<float> hx;
655 std::vector<float> hy;
675 dx.
upload(hx.data(), hx.size());
687 cublasHandle_t handle;
689 stat = cublasCreate(&handle);
690 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
692 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
693 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
698 REQUIRE(hy[1] == 12);
700 stat = cublasDestroy(handle);
701 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
705 GIVEN(
"a 2x2 matrix, double")
707 WHEN(
"double precision, beta is 0")
709 std::vector<double> hA;
712 std::vector<double> hx;
715 std::vector<double> hy;
735 dx.
upload(hx.data(), hx.size());
745 cublasHandle_t handle;
747 stat = cublasCreate(&handle);
748 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
750 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
751 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
756 REQUIRE(hy[1] == 10);
758 stat = cublasDestroy(handle);
759 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
762 WHEN(
"double precision, beta is 1, but y is all 0")
764 std::vector<double> hA;
767 std::vector<double> hx;
770 std::vector<double> hy;
790 dx.
upload(hx.data(), hx.size());
800 cublasHandle_t handle;
802 stat = cublasCreate(&handle);
803 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
805 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
806 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
811 REQUIRE(hy[1] == 10);
813 stat = cublasDestroy(handle);
814 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
816 WHEN(
"double precision, beta is 1, y is [1,2]")
818 std::vector<double> hA;
821 std::vector<double> hx;
824 std::vector<double> hy;
844 dx.
upload(hx.data(), hx.size());
856 cublasHandle_t handle;
858 stat = cublasCreate(&handle);
859 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
861 stat =
mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
862 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
867 REQUIRE(hy[1] == 12);
869 stat = cublasDestroy(handle);
870 REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
cudaError_t elementwiseXxY(dataT1 *x, dataT2 *y, int size)
Calculates the element-wise product of two vectors, storing the result in the first.
cublasStatus_t cublasTaxpy(cublasHandle_t handle, int n, const floatT *alpha, const floatT *x, int incx, floatT *y, int incy)
Multiplies a vector by a scalar, adding it to a second vector which is overwritten by the result.
cublasStatus_t cublasTscal(cublasHandle_t handle, int n, const floatT *alpha, floatT *x, int incx)
Multiplies a vector by a scalar, overwriting the vector with the result.
cublasStatus_t cublasTgemv(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const floatT *alpha, const floatT *A, int lda, const floatT *x, int incx, const floatT *beta, floatT *y, int incy)
Perform a matrix-vector multiplication.
A smart-pointer wrapper for cuda device pointers.
cudaError_t initialize()
Initialize the array bytes to 0.
int download(hostPtrT *dest)
Copy from the device to the host.
int upload(const hostPtrT *src)
Copy from the host to the device, after allocation.
int resize(size_t sz)
Resize the memory allocation, in 1D.
SCENARIO("scaling a vector with cublas", "[math::cuda::templateCublas]")