mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
templateCublas_test.cpp
Go to the documentation of this file.
1 /** \file templateCublas_test.cpp
2  */
3 #include "../../../catch2/catch.hpp"
4 
5 
6 #include "../../../../include/math/cuda/cudaPtr.hpp"
7 #include "../../../../include/math/cuda/templateCublas.hpp"
8 
9 /** Scenario: scaling a vector with cublas
10  * Tests cublasTscal, as well as basic cudaPtr operations.
11  *
12  * \anchor test_math_templateCublas_scal
13  */
14 SCENARIO( "scaling a vector with cublas", "[math::cuda::templateCublas]" )
15 {
16  GIVEN("a vector")
17  {
18  WHEN("type is single precision real")
19  {
20  std::vector<float> hx;
22  float alpha = 2;
23 
24  hx.resize(5);
25  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
26 
27  dx.upload(hx.data(), hx.size());
28  REQUIRE(dx.size() == hx.size());
29 
30  cublasHandle_t handle;
31  cublasStatus_t stat;
32  stat = cublasCreate(&handle);
33  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
34 
35  stat = mx::cuda::cublasTscal(handle, dx.size(), &alpha, dx(), 1);
36  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
37 
38  dx.download(hx.data());
39 
40  REQUIRE(hx[0] == 0);
41  REQUIRE(hx[1] == 2);
42  REQUIRE(hx[2] == 4);
43  REQUIRE(hx[3] == 6);
44  REQUIRE(hx[4] == 8);
45 
46  stat = cublasDestroy(handle);
47 
48  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
49  }
50 
51  WHEN("type is double precision real")
52  {
53  std::vector<double> hx;
55  double alpha = 2;
56 
57  hx.resize(5);
58  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
59 
60  dx.upload(hx.data(), hx.size());
61  REQUIRE(dx.size() == hx.size());
62 
63  cublasHandle_t handle;
64  cublasStatus_t stat;
65  stat = cublasCreate(&handle);
66  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
67 
68  stat = mx::cuda::cublasTscal(handle, dx.size(), &alpha, dx(), 1);
69  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
70 
71  dx.download(hx.data());
72 
73  REQUIRE(hx[0] == 0);
74  REQUIRE(hx[1] == 2);
75  REQUIRE(hx[2] == 4);
76  REQUIRE(hx[3] == 6);
77  REQUIRE(hx[4] == 8);
78 
79  stat = cublasDestroy(handle);
80 
81  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
82  }
83 
84  WHEN("type is single precision complex")
85  {
86  std::vector<std::complex<float>> hx;
88  std::complex<float> alpha = 2;
89 
90  hx.resize(5);
91  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
92 
93  dx.upload(hx.data(), hx.size());
94  REQUIRE(dx.size() == hx.size());
95 
96  cublasHandle_t handle;
97  cublasStatus_t stat;
98  stat = cublasCreate(&handle);
99  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
100 
101  stat = mx::cuda::cublasTscal(handle, dx.size(), (cuComplex *) &alpha, dx(), 1);
102  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
103 
104  dx.download(hx.data());
105 
106  REQUIRE(hx[0] == std::complex<float>(0,0));
107  REQUIRE(hx[1] == std::complex<float>(2,2));
108  REQUIRE(hx[2] == std::complex<float>(4,4));
109  REQUIRE(hx[3] == std::complex<float>(6,6));
110  REQUIRE(hx[4] == std::complex<float>(8,8));
111 
112  stat = cublasDestroy(handle);
113 
114  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
115  }
116 
117  WHEN("type is double precision complex")
118  {
119  std::vector<std::complex<double>> hx;
121  std::complex<double> alpha = 2;
122 
123  hx.resize(5);
124  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
125 
126  dx.upload(hx.data(), hx.size());
127  REQUIRE(dx.size() == hx.size());
128 
129  cublasHandle_t handle;
130  cublasStatus_t stat;
131  stat = cublasCreate(&handle);
132  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
133 
134  stat = mx::cuda::cublasTscal(handle, dx.size(), (cuDoubleComplex *) &alpha, dx(), 1);
135  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
136 
137  dx.download(hx.data());
138 
139  REQUIRE(hx[0] == std::complex<double>(0,0));
140  REQUIRE(hx[1] == std::complex<double>(2,2));
141  REQUIRE(hx[2] == std::complex<double>(4,4));
142  REQUIRE(hx[3] == std::complex<double>(6,6));
143  REQUIRE(hx[4] == std::complex<double>(8,8));
144 
145  stat = cublasDestroy(handle);
146 
147  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
148  }
149  }
150 }
151 
152 /** Scenario: scaling and accumulating a vector with cublas
153  * Tests cublasTaxpy, as well as basic cudaPtr operations.
154  *
155  * \anchor test_math_templateCublas_axpy
156  */
157 SCENARIO( "scaling and accumulating a vector with cublas", "[math::cuda::templateCublas]" )
158 {
159  GIVEN("a vector")
160  {
161  WHEN("type is single precision real")
162  {
163  std::vector<float> hx, hy;
165  float alpha = 2;
166 
167  hx.resize(5);
168  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
169 
170  hy.resize(5);
171  for(size_t n=0; n < hy.size(); ++n) hy[n] = 1;
172 
173  dx.upload(hx.data(), hx.size());
174  REQUIRE(dx.size() == hx.size());
175 
176  dy.upload(hy.data(), hy.size());
177  REQUIRE(dy.size() == hy.size());
178 
179  cublasHandle_t handle;
180  cublasStatus_t stat;
181  stat = cublasCreate(&handle);
182  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
183 
184  stat = mx::cuda::cublasTaxpy(handle, dx.size(), &alpha, dx(), 1, dy(), 1);
185  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
186 
187  dy.download(hy.data());
188 
189  REQUIRE(hy[0] == 1);
190  REQUIRE(hy[1] == 3);
191  REQUIRE(hy[2] == 5);
192  REQUIRE(hy[3] == 7);
193  REQUIRE(hy[4] == 9);
194 
195  stat = cublasDestroy(handle);
196 
197  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
198  }
199 
200  WHEN("type is double precision real")
201  {
202  std::vector<double> hx, hy;
204  double alpha = 2;
205 
206  hx.resize(5);
207  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
208 
209  hy.resize(5);
210  for(size_t n=0; n < hy.size(); ++n) hy[n] = 1;
211 
212  dx.upload(hx.data(), hx.size());
213  REQUIRE(dx.size() == hx.size());
214 
215  dy.upload(hy.data(), hy.size());
216  REQUIRE(dy.size() == hy.size());
217 
218  cublasHandle_t handle;
219  cublasStatus_t stat;
220  stat = cublasCreate(&handle);
221  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
222 
223  stat = mx::cuda::cublasTaxpy(handle, dx.size(), &alpha, dx(), 1, dy(), 1);
224  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
225 
226  dy.download(hy.data());
227 
228  REQUIRE(hy[0] == 1);
229  REQUIRE(hy[1] == 3);
230  REQUIRE(hy[2] == 5);
231  REQUIRE(hy[3] == 7);
232  REQUIRE(hy[4] == 9);
233 
234  stat = cublasDestroy(handle);
235 
236  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
237  }
238 
239  WHEN("type is single precision complex")
240  {
241  std::vector<std::complex<float>> hx, hy;
243  std::complex<float> alpha = 2;
244 
245  hx.resize(5);
246  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
247 
248  hy.resize(5);
249  for(size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<float>(1,1);
250 
251  dx.upload(hx.data(), hx.size());
252  REQUIRE(dx.size() == hx.size());
253 
254  dy.upload(hy.data(), hy.size());
255  REQUIRE(dy.size() == hy.size());
256 
257  cublasHandle_t handle;
258  cublasStatus_t stat;
259  stat = cublasCreate(&handle);
260  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
261 
262  stat = mx::cuda::cublasTaxpy(handle, dx.size(), (cuComplex *) &alpha, dx(), 1, dy(), 1);
263  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
264 
265  dy.download(hy.data());
266 
267  REQUIRE(hy[0] == std::complex<float>(1,1));
268  REQUIRE(hy[1] == std::complex<float>(3,3));
269  REQUIRE(hy[2] == std::complex<float>(5,5));
270  REQUIRE(hy[3] == std::complex<float>(7,7));
271  REQUIRE(hy[4] == std::complex<float>(9,9));
272 
273  stat = cublasDestroy(handle);
274 
275  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
276  }
277 
278  WHEN("type is double precision complex")
279  {
280  std::vector<std::complex<double>> hx, hy;
282  std::complex<double> alpha = 2;
283 
284  hx.resize(5);
285  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
286 
287  hy.resize(5);
288  for(size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<double>(1,1);
289 
290  dx.upload(hx.data(), hx.size());
291  REQUIRE(dx.size() == hx.size());
292 
293  dy.upload(hy.data(), hy.size());
294  REQUIRE(dy.size() == hy.size());
295 
296  cublasHandle_t handle;
297  cublasStatus_t stat;
298  stat = cublasCreate(&handle);
299  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
300 
301  stat = mx::cuda::cublasTaxpy(handle, dx.size(), (cuDoubleComplex *) &alpha, dx(), 1, dy(), 1);
302  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
303 
304  dy.download(hy.data());
305 
306  REQUIRE(hy[0] == std::complex<double>(1,1));
307  REQUIRE(hy[1] == std::complex<double>(3,3));
308  REQUIRE(hy[2] == std::complex<double>(5,5));
309  REQUIRE(hy[3] == std::complex<double>(7,7));
310  REQUIRE(hy[4] == std::complex<double>(9,9));
311 
312  stat = cublasDestroy(handle);
313 
314  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
315  }
316  }
317 }
318 
319 /** Scenario: multiplying two vectors element by element
320  * Tests mx::cuda::elementwiseXxY, as well as basic cudaPtr operations.
321  *
322  * \anchor test_math_templateCublas_elementwiseXxY
323  */
324 SCENARIO( "multiplying two vectors element by element", "[math::cuda::templateCublas]" )
325 {
326  GIVEN("a vector")
327  {
328  WHEN("both types are single precision real")
329  {
330  std::vector<float> hx, hy;
332 
333  hx.resize(5);
334  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
335 
336  hy.resize(5);
337  for(size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
338 
339  dx.upload(hx.data(), hx.size());
340  REQUIRE(dx.size() == hx.size());
341 
342  dy.resize(hy.size());
343  dy.upload(hy.data());
344  REQUIRE(dy.size() == hy.size());
345 
346  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
347  REQUIRE(rv == cudaSuccess);
348 
349  dx.download(hx.data());
350 
351  REQUIRE(hx[0] == 0);
352  REQUIRE(hx[1] == 2);
353  REQUIRE(hx[2] == 8);
354  REQUIRE(hx[3] == 18);
355  REQUIRE(hx[4] == 32);
356 
357  }
358 
359  WHEN("type1 is complex-float, and type2 is float")
360  {
361  std::vector<std::complex<float>> hx;
363 
364  std::vector<float> hy;
366 
367  hx.resize(5);
368  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
369 
370  hy.resize(5);
371  for(size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
372 
373  dx.upload(hx.data(), hx.size());
374  REQUIRE(dx.size() == hx.size());
375 
376  dy.resize(hy.size());
377  dy.upload(hy.data());
378  REQUIRE(dy.size() == hy.size());
379 
380  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
381  REQUIRE(rv == cudaSuccess);
382 
383  dx.download(hx.data());
384 
385  REQUIRE(hx[0] == std::complex<float>(0,0));
386  REQUIRE(hx[1] == std::complex<float>(2,2));
387  REQUIRE(hx[2] == std::complex<float>(8,8));
388  REQUIRE(hx[3] == std::complex<float>(18,18));
389  REQUIRE(hx[4] == std::complex<float>(32,32));
390 
391  }
392 
393  WHEN("type1 is complex-float, and type2 is complex-float")
394  {
395  std::vector<std::complex<float>> hx;
397 
398  std::vector<std::complex<float>> hy;
400 
401  hx.resize(5);
402  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<float>(n,n);
403 
404  hy.resize(5);
405  for(size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<float>(0,2*n);
406 
407  dx.upload(hx.data(), hx.size());
408  REQUIRE(dx.size() == hx.size());
409 
410  dy.resize(hy.size());
411  dy.upload(hy.data());
412  REQUIRE(dy.size() == hy.size());
413 
414  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
415  REQUIRE(rv == cudaSuccess);
416 
417  dx.download(hx.data());
418 
419  REQUIRE(hx[0] == std::complex<float>(0,0));
420  REQUIRE(hx[1] == std::complex<float>(-2,2)); //(1,1) * (0,2) = (0 + 2i + 0i -2) = (-2,2)
421  REQUIRE(hx[2] == std::complex<float>(-8,8));
422  REQUIRE(hx[3] == std::complex<float>(-18,18));
423  REQUIRE(hx[4] == std::complex<float>(-32,32));
424 
425  }
426 
427  WHEN("both types are double precision real")
428  {
429  std::vector<double> hx, hy;
431 
432  hx.resize(5);
433  for(size_t n=0; n < hx.size(); ++n) hx[n] = n;
434 
435  hy.resize(5);
436  for(size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
437 
438  dx.upload(hx.data(), hx.size());
439  REQUIRE(dx.size() == hx.size());
440 
441  dy.resize(hy.size());
442  dy.upload(hy.data());
443  REQUIRE(dy.size() == hy.size());
444 
445  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
446  REQUIRE(rv == cudaSuccess);
447 
448  dx.download(hx.data());
449 
450  REQUIRE(hx[0] == 0);
451  REQUIRE(hx[1] == 2);
452  REQUIRE(hx[2] == 8);
453  REQUIRE(hx[3] == 18);
454  REQUIRE(hx[4] == 32);
455 
456  }
457 
458  WHEN("type1 is complex-double, and type2 is double")
459  {
460  std::vector<std::complex<double>> hx;
462 
463  std::vector<double> hy;
465 
466  hx.resize(5);
467  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
468 
469  hy.resize(5);
470  for(size_t n=0; n < hy.size(); ++n) hy[n] = 2*n;
471 
472  dx.upload(hx.data(), hx.size());
473  REQUIRE(dx.size() == hx.size());
474 
475  dy.resize(hy.size());
476  dy.upload(hy.data());
477  REQUIRE(dy.size() == hy.size());
478 
479  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
480  REQUIRE(rv == cudaSuccess);
481 
482  dx.download(hx.data());
483 
484  REQUIRE(hx[0] == std::complex<double>(0,0));
485  REQUIRE(hx[1] == std::complex<double>(2,2));
486  REQUIRE(hx[2] == std::complex<double>(8,8));
487  REQUIRE(hx[3] == std::complex<double>(18,18));
488  REQUIRE(hx[4] == std::complex<double>(32,32));
489 
490  }
491 
492  WHEN("type1 is complex-double, and type2 is complex-double")
493  {
494  std::vector<std::complex<double>> hx;
496 
497  std::vector<std::complex<double>> hy;
499 
500  hx.resize(5);
501  for(size_t n=0; n < hx.size(); ++n) hx[n] = std::complex<double>(n,n);
502 
503  hy.resize(5);
504  for(size_t n=0; n < hy.size(); ++n) hy[n] = std::complex<double>(1,2*n);
505 
506  dx.upload(hx.data(), hx.size());
507  REQUIRE(dx.size() == hx.size());
508 
509  dy.resize(hy.size());
510  dy.upload(hy.data());
511  REQUIRE(dy.size() == hy.size());
512 
513  cudaError_t rv = mx::cuda::elementwiseXxY(dx(), dy(), dx.size());
514  REQUIRE(rv == cudaSuccess);
515 
516  dx.download(hx.data());
517 
518 
519  REQUIRE(hx[0] == std::complex<double>(0,0)); //(0,0) * (0,0) = (0,0)
520  REQUIRE(hx[1] == std::complex<double>(-1,3)); //(1,1) * (1,2) = (1 + 2i + i -2) = (-1,3)
521  REQUIRE(hx[2] == std::complex<double>(-6,10));
522  REQUIRE(hx[3] == std::complex<double>(-15,21));
523  REQUIRE(hx[4] == std::complex<double>(-28,36));
524 
525  }
526  }
527 }
528 
529 /** Scenario: multiplying a vector by a matrix
530  * Tests mx::cuda::cublasTgemv, as well as basic cudaPtr operations.
531  *
532  * \anchor test_math_templateCublas_cublasTgemv_inc
533  */
534 SCENARIO( "multiplying a vector by a matrix giving increments", "[math::cuda::templateCublas]" )
535 {
536  GIVEN("a 2x2 matrix, float")
537  {
538  WHEN("float precision, beta is 0")
539  {
540  std::vector<float> hA; //This will actually be a vector
542 
543  std::vector<float> hx; //This will actually be a vector
545 
546  std::vector<float> hy;
548 
549  /* Column major order:
550  1 3
551  2 4
552  */
553  hA.resize(4);
554  hA[0] = 1;
555  hA[1] = 2;
556  hA[2] = 3;
557  hA[3] = 4;
558 
559  dA.resize(2,2);
560  dA.upload(hA.data());
561 
562  hx.resize(2);
563  hx[0] = 1;
564  hx[1] = 2;
565 
566  dx.upload(hx.data(), hx.size());
567 
568  hy.resize(2);
569 
570  dy.resize(2);
571  dy.initialize();
572 
573  float alpha = 1;
574  float beta = 0;
575 
576  cublasHandle_t handle;
577  cublasStatus_t stat;
578  stat = cublasCreate(&handle);
579  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
580 
581  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
582  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
583 
584  dy.download(hy.data());
585 
586  REQUIRE(hy[0] == 7);
587  REQUIRE(hy[1] == 10);
588 
589  stat = cublasDestroy(handle);
590  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
591  }
592 
593  WHEN("float precision, beta is 1, but y is all 0")
594  {
595  std::vector<float> hA; //This will actually be a vector
597 
598  std::vector<float> hx; //This will actually be a vector
600 
601  std::vector<float> hy;
603 
604  /* Column major order:
605  1 3
606  2 4
607  */
608  hA.resize(4);
609  hA[0] = 1;
610  hA[1] = 2;
611  hA[2] = 3;
612  hA[3] = 4;
613 
614  dA.resize(2,2);
615  dA.upload(hA.data());
616 
617  hx.resize(2);
618  hx[0] = 1;
619  hx[1] = 2;
620 
621  dx.upload(hx.data(), hx.size());
622 
623  hy.resize(2);
624 
625  dy.resize(2);
626  dy.initialize();
627 
628  float alpha = 1;
629  float beta = 1;
630 
631  cublasHandle_t handle;
632  cublasStatus_t stat;
633  stat = cublasCreate(&handle);
634  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
635 
636  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
637  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
638 
639  dy.download(hy.data());
640 
641  REQUIRE(hy[0] == 7);
642  REQUIRE(hy[1] == 10);
643 
644  stat = cublasDestroy(handle);
645  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
646  }
647  WHEN("float precision, beta is 1, y is [1,2]")
648  {
649  std::vector<float> hA; //This will actually be a vector
651 
652  std::vector<float> hx; //This will actually be a vector
654 
655  std::vector<float> hy;
657 
658  /* Column major order:
659  1 3
660  2 4
661  */
662  hA.resize(4);
663  hA[0] = 1;
664  hA[1] = 2;
665  hA[2] = 3;
666  hA[3] = 4;
667 
668  dA.resize(2,2);
669  dA.upload(hA.data());
670 
671  hx.resize(2);
672  hx[0] = 1;
673  hx[1] = 2;
674 
675  dx.upload(hx.data(), hx.size());
676 
677  hy.resize(2);
678  hy[0] = 1;
679  hy[1] = 2;
680 
681  dy.resize(2);
682  dy.upload(hx.data());
683 
684  float alpha = 1;
685  float beta = 1;
686 
687  cublasHandle_t handle;
688  cublasStatus_t stat;
689  stat = cublasCreate(&handle);
690  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
691 
692  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
693  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
694 
695  dy.download(hy.data());
696 
697  REQUIRE(hy[0] == 8);
698  REQUIRE(hy[1] == 12);
699 
700  stat = cublasDestroy(handle);
701  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
702  }
703  }
704 
705  GIVEN("a 2x2 matrix, double")
706  {
707  WHEN("double precision, beta is 0")
708  {
709  std::vector<double> hA; //This will actually be a vector
711 
712  std::vector<double> hx; //This will actually be a vector
714 
715  std::vector<double> hy;
717 
718  /* Column major order:
719  1 3
720  2 4
721  */
722  hA.resize(4);
723  hA[0] = 1;
724  hA[1] = 2;
725  hA[2] = 3;
726  hA[3] = 4;
727 
728  dA.resize(2,2);
729  dA.upload(hA.data());
730 
731  hx.resize(2);
732  hx[0] = 1;
733  hx[1] = 2;
734 
735  dx.upload(hx.data(), hx.size());
736 
737  hy.resize(2);
738 
739  dy.resize(2);
740  dy.initialize();
741 
742  double alpha = 1;
743  double beta = 0;
744 
745  cublasHandle_t handle;
746  cublasStatus_t stat;
747  stat = cublasCreate(&handle);
748  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
749 
750  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
751  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
752 
753  dy.download(hy.data());
754 
755  REQUIRE(hy[0] == 7);
756  REQUIRE(hy[1] == 10);
757 
758  stat = cublasDestroy(handle);
759  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
760  }
761 
762  WHEN("double precision, beta is 1, but y is all 0")
763  {
764  std::vector<double> hA; //This will actually be a vector
766 
767  std::vector<double> hx; //This will actually be a vector
769 
770  std::vector<double> hy;
772 
773  /* Column major order:
774  1 3
775  2 4
776  */
777  hA.resize(4);
778  hA[0] = 1;
779  hA[1] = 2;
780  hA[2] = 3;
781  hA[3] = 4;
782 
783  dA.resize(2,2);
784  dA.upload(hA.data());
785 
786  hx.resize(2);
787  hx[0] = 1;
788  hx[1] = 2;
789 
790  dx.upload(hx.data(), hx.size());
791 
792  hy.resize(2);
793 
794  dy.resize(2);
795  dy.initialize();
796 
797  double alpha = 1;
798  double beta = 1;
799 
800  cublasHandle_t handle;
801  cublasStatus_t stat;
802  stat = cublasCreate(&handle);
803  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
804 
805  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
806  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
807 
808  dy.download(hy.data());
809 
810  REQUIRE(hy[0] == 7);
811  REQUIRE(hy[1] == 10);
812 
813  stat = cublasDestroy(handle);
814  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
815  }
816  WHEN("double precision, beta is 1, y is [1,2]")
817  {
818  std::vector<double> hA; //This will actually be a vector
820 
821  std::vector<double> hx; //This will actually be a vector
823 
824  std::vector<double> hy;
826 
827  /* Column major order:
828  1 3
829  2 4
830  */
831  hA.resize(4);
832  hA[0] = 1;
833  hA[1] = 2;
834  hA[2] = 3;
835  hA[3] = 4;
836 
837  dA.resize(2,2);
838  dA.upload(hA.data());
839 
840  hx.resize(2);
841  hx[0] = 1;
842  hx[1] = 2;
843 
844  dx.upload(hx.data(), hx.size());
845 
846  hy.resize(2);
847  hy[0] = 1;
848  hy[1] = 2;
849 
850  dy.resize(2);
851  dy.upload(hx.data());
852 
853  double alpha = 1;
854  double beta = 1;
855 
856  cublasHandle_t handle;
857  cublasStatus_t stat;
858  stat = cublasCreate(&handle);
859  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
860 
861  stat = mx::cuda::cublasTgemv(handle, CUBLAS_OP_N, 2, 2, &alpha, dA(), 2, dx(), 1, &beta, dy(),1);
862  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
863 
864  dy.download(hy.data());
865 
866  REQUIRE(hy[0] == 8);
867  REQUIRE(hy[1] == 12);
868 
869  stat = cublasDestroy(handle);
870  REQUIRE(stat == CUBLAS_STATUS_SUCCESS);
871  }
872  }
873 }
874 
cudaError_t elementwiseXxY(dataT1 *x, dataT2 *y, int size)
Calculates the element-wise product of two vectors, storing the result in the first.
cublasStatus_t cublasTaxpy(cublasHandle_t handle, int n, const floatT *alpha, const floatT *x, int incx, floatT *y, int incy)
Multiplies a vector by a scalar, adding it to a second vector which is overwritten by the result.
cublasStatus_t cublasTscal(cublasHandle_t handle, int n, const floatT *alpha, floatT *x, int incx)
Multiplies a vector by a scalar, overwriting the vector with the result.
cublasStatus_t cublasTgemv(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const floatT *alpha, const floatT *A, int lda, const floatT *x, int incx, const floatT *beta, floatT *y, int incy)
Perform a matrix-vector multiplication.
A smart-pointer wrapper for cuda device pointers.
Definition: cudaPtr.hpp:47
cudaError_t initialize()
Initialize the array bytes to 0.
Definition: cudaPtr.hpp:219
int download(hostPtrT *dest)
Copy from the device to the host.
Definition: cudaPtr.hpp:274
int upload(const hostPtrT *src)
Copy from the host to the device, after allocation.
Definition: cudaPtr.hpp:245
int resize(size_t sz)
Resize the memory allocation, in 1D.
Definition: cudaPtr.hpp:183
SCENARIO("scaling a vector with cublas", "[math::cuda::templateCublas]")