Tpetra parallel linear algebra Version of the Day
Tpetra_BlockView.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_BLOCKVIEW_HPP
43#define TPETRA_BLOCKVIEW_HPP
44
52
53#include "TpetraCore_config.h"
54#include "Kokkos_ArithTraits.hpp"
55#include "Kokkos_Complex.hpp"
56
57namespace Tpetra {
58
63
64namespace Impl {
65
72template<class ViewType1,
73 class ViewType2,
74 const int rank1 = ViewType1::rank>
75struct AbsMax {
76 static KOKKOS_INLINE_FUNCTION void
77 run (const ViewType2& Y, const ViewType1& X);
78};
79
84template<class ViewType1,
85 class ViewType2>
86struct AbsMax<ViewType1, ViewType2, 2> {
89 static KOKKOS_INLINE_FUNCTION void
90 run (const ViewType2& Y, const ViewType1& X)
91 {
92 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
93 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
95 static_assert(! std::is_const<STY>::value,
96 "AbsMax: The type of each entry of Y must be nonconst.");
97 typedef typename std::decay<decltype (X(0,0)) >::type STX;
98 static_assert( std::is_same<STX, STY>::value,
99 "AbsMax: The type of each entry of X and Y must be the same.");
100 typedef Kokkos::Details::ArithTraits<STY> KAT;
101
102 const int numCols = Y.extent (1);
103 const int numRows = Y.extent (0);
104 for (int j = 0; j < numCols; ++j) {
105 for (int i = 0; i < numRows; ++i) {
106 STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
107 const STX X_ij = X(i,j);
108 // NOTE: no std::max (not a CUDA __device__ function); must
109 // cast back up to complex.
110 const auto Y_ij_abs = KAT::abs (Y_ij);
111 const auto X_ij_abs = KAT::abs (X_ij);
112 Y_ij = (Y_ij_abs >= X_ij_abs) ?
113 static_cast<STY> (Y_ij_abs) :
114 static_cast<STY> (X_ij_abs);
115 }
116 }
117 }
118};
119
124template<class ViewType1,
125 class ViewType2>
126struct AbsMax<ViewType1, ViewType2, 1> {
129 static KOKKOS_INLINE_FUNCTION void
130 run (const ViewType2& Y, const ViewType1& X)
131 {
132 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
133 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
134
135 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
136 static_assert(! std::is_const<STY>::value,
137 "AbsMax: The type of each entry of Y must be nonconst.");
138 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
139 static_assert( std::is_same<STX, STY>::value,
140 "AbsMax: The type of each entry of X and Y must be the same.");
141 typedef Kokkos::Details::ArithTraits<STY> KAT;
142
143 const int numRows = Y.extent (0);
144 for (int i = 0; i < numRows; ++i) {
145 STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
146 const STX X_i = X(i);
147 // NOTE: no std::max (not a CUDA __device__ function); must
148 // cast back up to complex.
149 const auto Y_i_abs = KAT::abs (Y_i);
150 const auto X_i_abs = KAT::abs (X_i);
151 Y_i = (Y_i_abs >= X_i_abs) ?
152 static_cast<STY> (Y_i_abs) :
153 static_cast<STY> (X_i_abs);
154 }
155 }
156};
157
164template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
165KOKKOS_INLINE_FUNCTION void
166absMax (const ViewType2& Y, const ViewType1& X)
167{
168 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
169 "absMax: ViewType1 and ViewType2 must have the same rank.");
171}
172
177template<class ViewType,
178 class CoefficientType,
179 class LayoutType = typename ViewType::array_layout,
180 class IndexType = int,
181 const int rank = ViewType::rank>
182struct SCAL {
183 static KOKKOS_INLINE_FUNCTION void
184 run (const CoefficientType& alpha, const ViewType& x);
185};
186
189template<class ViewType,
190 class CoefficientType,
191 class LayoutType,
192 class IndexType>
193struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
195 static KOKKOS_INLINE_FUNCTION void
196 run (const CoefficientType& alpha, const ViewType& x)
197 {
198 const IndexType numRows = static_cast<IndexType> (x.extent (0));
199 // BLAS _SCAL doesn't check whether alpha is 0.
200 for (IndexType i = 0; i < numRows; ++i) {
201 x(i) = alpha * x(i);
202 }
203 }
204};
205
208template<class ViewType,
209 class CoefficientType,
210 class LayoutType,
211 class IndexType>
212struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
214 static KOKKOS_INLINE_FUNCTION void
215 run (const CoefficientType& alpha, const ViewType& A)
216 {
217 const IndexType numRows = static_cast<IndexType> (A.extent (0));
218 const IndexType numCols = static_cast<IndexType> (A.extent (1));
219
220 // BLAS _SCAL doesn't check whether alpha is 0.
221 for (IndexType i = 0; i < numRows; ++i) {
222 for (IndexType j = 0; j < numCols; ++j) {
223 A(i,j) = alpha * A(i,j);
224 }
225 }
226 }
227};
228
234template<class ViewType,
235 class CoefficientType,
236 class IndexType>
237struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
239 static KOKKOS_INLINE_FUNCTION void
240 run (const CoefficientType& alpha, const ViewType& A)
241 {
242 const IndexType N = A.size ();
243 typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
244 scalar_type* const A_raw = A.data ();
245
246 for (IndexType i = 0; i < N; ++i) {
247 A_raw[i] = alpha * A_raw[i];
248 }
249 }
250};
251
252
257template<class ViewType,
258 class InputType,
259 class LayoutType = typename ViewType::array_layout,
260 class IndexType = int,
261 const int rank = ViewType::rank>
262struct FILL {
263 static KOKKOS_INLINE_FUNCTION void
264 run (const ViewType& x, const InputType& val);
265};
266
269template<class ViewType,
270 class InputType,
271 class LayoutType,
272 class IndexType>
273struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
274 static KOKKOS_INLINE_FUNCTION void
275 run (const ViewType& x, const InputType& val)
276 {
277 const IndexType numRows = static_cast<IndexType> (x.extent (0));
278 for (IndexType i = 0; i < numRows; ++i) {
279 x(i) = val;
280 }
281 }
282};
283
286template<class ViewType,
287 class InputType,
288 class LayoutType,
289 class IndexType>
290struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
291 static KOKKOS_INLINE_FUNCTION void
292 run (const ViewType& X, const InputType& val)
293 {
294 const IndexType numRows = static_cast<IndexType> (X.extent (0));
295 const IndexType numCols = static_cast<IndexType> (X.extent (1));
296 for (IndexType j = 0; j < numCols; ++j) {
297 for (IndexType i = 0; i < numRows; ++i) {
298 X(i,j) = val;
299 }
300 }
301 }
302};
303
308template<class CoefficientType,
309 class ViewType1,
310 class ViewType2,
311 class LayoutType1 = typename ViewType1::array_layout,
312 class LayoutType2 = typename ViewType2::array_layout,
313 class IndexType = int,
314 const int rank = ViewType1::rank>
315struct AXPY {
316 static KOKKOS_INLINE_FUNCTION void
317 run (const CoefficientType& alpha,
318 const ViewType1& x,
319 const ViewType2& y);
320};
321
324template<class CoefficientType,
325 class ViewType1,
326 class ViewType2,
327 class LayoutType1,
328 class LayoutType2,
329 class IndexType>
330struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
332 static KOKKOS_INLINE_FUNCTION void
333 run (const CoefficientType& alpha,
334 const ViewType1& x,
335 const ViewType2& y)
336 {
337 using Kokkos::Details::ArithTraits;
338 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
339 "AXPY: x and y must have the same rank.");
340
341 const IndexType numRows = static_cast<IndexType> (y.extent (0));
342 if (alpha != ArithTraits<CoefficientType>::zero ()) {
343 for (IndexType i = 0; i < numRows; ++i) {
344 y(i) += alpha * x(i);
345 }
346 }
347 }
348};
349
352template<class CoefficientType,
353 class ViewType1,
354 class ViewType2,
355 class LayoutType1,
356 class LayoutType2,
357 class IndexType>
358struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
360 static KOKKOS_INLINE_FUNCTION void
361 run (const CoefficientType& alpha,
362 const ViewType1& X,
363 const ViewType2& Y)
364 {
365 using Kokkos::Details::ArithTraits;
366 static_assert (ViewType1::rank == ViewType2::rank,
367 "AXPY: X and Y must have the same rank.");
368 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
369 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
370
371 if (alpha != ArithTraits<CoefficientType>::zero ()) {
372 for (IndexType i = 0; i < numRows; ++i) {
373 for (IndexType j = 0; j < numCols; ++j) {
374 Y(i,j) += alpha * X(i,j);
375 }
376 }
377 }
378 }
379};
380
384template<class CoefficientType,
385 class ViewType1,
386 class ViewType2,
387 class IndexType>
388struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
390 static KOKKOS_INLINE_FUNCTION void
391 run (const CoefficientType& alpha,
392 const ViewType1& X,
393 const ViewType2& Y)
394 {
395 using Kokkos::Details::ArithTraits;
396 static_assert (static_cast<int> (ViewType1::rank) ==
397 static_cast<int> (ViewType2::rank),
398 "AXPY: X and Y must have the same rank.");
399 typedef typename std::decay<decltype (X(0,0)) >::type SX;
400 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
401
402 const IndexType N = static_cast<IndexType> (Y.size ());
403 const SX* const X_raw = X.data ();
404 SY* const Y_raw = Y.data ();
405
406 if (alpha != ArithTraits<CoefficientType>::zero ()) {
407 for (IndexType i = 0; i < N; ++i) {
408 Y_raw[i] += alpha * X_raw[i];
409 }
410 }
411 }
412};
413
417template<class CoefficientType,
418 class ViewType1,
419 class ViewType2,
420 class IndexType>
421struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
423 static KOKKOS_INLINE_FUNCTION void
424 run (const CoefficientType& alpha,
425 const ViewType1& X,
426 const ViewType2& Y)
427 {
428 using Kokkos::Details::ArithTraits;
429 static_assert (ViewType1::rank == ViewType2::rank,
430 "AXPY: X and Y must have the same rank.");
431 typedef typename std::decay<decltype (X(0,0)) >::type SX;
432 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
433
434 const IndexType N = static_cast<IndexType> (Y.size ());
435 const SX* const X_raw = X.data ();
436 SY* const Y_raw = Y.data ();
437
438 if (alpha != ArithTraits<CoefficientType>::zero ()) {
439 for (IndexType i = 0; i < N; ++i) {
440 Y_raw[i] += alpha * X_raw[i];
441 }
442 }
443 }
444};
445
450template<class ViewType1,
451 class ViewType2,
452 class LayoutType1 = typename ViewType1::array_layout,
453 class LayoutType2 = typename ViewType2::array_layout,
454 class IndexType = int,
455 const int rank = ViewType1::rank>
456struct COPY {
457 static KOKKOS_INLINE_FUNCTION void
458 run (const ViewType1& x, const ViewType2& y);
459};
460
463template<class ViewType1,
464 class ViewType2,
465 class LayoutType1,
466 class LayoutType2,
467 class IndexType>
468struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
470 static KOKKOS_INLINE_FUNCTION void
471 run (const ViewType1& x, const ViewType2& y)
472 {
473 const IndexType numRows = static_cast<IndexType> (x.extent (0));
474 for (IndexType i = 0; i < numRows; ++i) {
475 y(i) = x(i);
476 }
477 }
478};
479
482template<class ViewType1,
483 class ViewType2,
484 class LayoutType1,
485 class LayoutType2,
486 class IndexType>
487struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
489 static KOKKOS_INLINE_FUNCTION void
490 run (const ViewType1& X, const ViewType2& Y)
491 {
492 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
493 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
494
495 // BLAS _SCAL doesn't check whether alpha is 0.
496 for (IndexType i = 0; i < numRows; ++i) {
497 for (IndexType j = 0; j < numCols; ++j) {
498 Y(i,j) = X(i,j);
499 }
500 }
501 }
502};
503
507template<class ViewType1,
508 class ViewType2,
509 class IndexType>
510struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
512 static KOKKOS_INLINE_FUNCTION void
513 run (const ViewType1& X, const ViewType2& Y)
514 {
515 typedef typename std::decay<decltype (X(0,0)) >::type SX;
516 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
517
518 const IndexType N = static_cast<IndexType> (Y.size ());
519 const SX* const X_raw = X.data ();
520 SY* const Y_raw = Y.data ();
521
522 // BLAS _SCAL doesn't check whether alpha is 0.
523 for (IndexType i = 0; i < N; ++i) {
524 Y_raw[i] = X_raw[i];
525 }
526 }
527};
528
532template<class ViewType1,
533 class ViewType2,
534 class IndexType>
535struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
537 static KOKKOS_INLINE_FUNCTION void
538 run (const ViewType1& X, const ViewType2& Y)
539 {
540 typedef typename std::decay<decltype (X(0,0)) >::type SX;
541 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
542
543 const IndexType N = static_cast<IndexType> (Y.size ());
544 const SX* const X_raw = X.data ();
545 SY* const Y_raw = Y.data ();
546
547 // BLAS _SCAL doesn't check whether alpha is 0.
548 for (IndexType i = 0; i < N; ++i) {
549 Y_raw[i] = X_raw[i];
550 }
551 }
552};
553
554
555template<class VecType1,
556 class BlkType,
557 class VecType2,
558 class CoeffType,
559 class IndexType = int,
560 class VecLayoutType1 = typename VecType1::array_layout,
561 class BlkLayoutType = typename BlkType::array_layout,
562 class VecLayoutType2 = typename VecType2::array_layout>
563struct GEMV {
564 static KOKKOS_INLINE_FUNCTION void
565 run (const CoeffType& alpha,
566 const BlkType& A,
567 const VecType1& x,
568 const VecType2& y)
569 {
570 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
571 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
572 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
573 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
574
575 const IndexType numRows = static_cast<IndexType> (A.extent (0));
576 const IndexType numCols = static_cast<IndexType> (A.extent (1));
577
578 for (IndexType i = 0; i < numRows; ++i) {
579 y_elt_type y_i = y(i);
580 for (IndexType j = 0; j < numCols; ++j) {
581 y_i += alpha * A(i,j) * x(j);
582 }
583 y(i) = y_i;
584 }
585 }
586};
587
588template<class VecType1,
589 class BlkType,
590 class VecType2,
591 class CoeffType,
592 class IndexType>
593struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
594 Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
595{
596 static KOKKOS_INLINE_FUNCTION void
597 run (const CoeffType& alpha,
598 const BlkType& A,
599 const VecType1& x,
600 const VecType2& y)
601 {
602 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
603 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
604 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
605 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
606 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
607
608 const IndexType numRows = static_cast<IndexType> (A.extent (0));
609 const IndexType numCols = static_cast<IndexType> (A.extent (1));
610 const A_elt_type* const A_raw = A.data ();
611
612 for (IndexType i = 0; i < numRows; ++i) {
613 y_elt_type y_i = y(i);
614 const A_elt_type* const A_i = A_raw + i*numCols;
615 for (IndexType j = 0; j < numCols; ++j) {
616 y_i += alpha * A_i[j] * x(j);
617 }
618 y(i) = y_i;
619 }
620 }
621};
622
623template<class VecType1,
624 class BlkType,
625 class VecType2,
626 class CoeffType,
627 class IndexType>
628struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
629 Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
630{
631 static KOKKOS_INLINE_FUNCTION void
632 run (const CoeffType& alpha,
633 const BlkType& A,
634 const VecType1& x,
635 const VecType2& y)
636 {
637 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
638 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
639 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
640 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
641
642 const A_elt_type* const A_raw = A.data ();
643 const IndexType numRows = static_cast<IndexType> (A.extent (0));
644 const IndexType numCols = static_cast<IndexType> (A.extent (1));
645 for (IndexType j = 0; j < numCols; ++j) {
646 const A_elt_type* const A_j = A_raw + j*numRows;
647 for (IndexType i = 0; i < numRows; ++i) {
648 y(i) += alpha * A_j[i] * x(i);
649 }
650 }
651 }
652};
653
654} // namespace Impl
655
658template<class ViewType,
659 class CoefficientType,
660 class LayoutType = typename ViewType::array_layout,
661 class IndexType = int,
662 const int rank = ViewType::rank>
663KOKKOS_INLINE_FUNCTION void
664SCAL (const CoefficientType& alpha, const ViewType& x)
665{
667}
668
670template<class ViewType,
671 class InputType,
672 class LayoutType = typename ViewType::array_layout,
673 class IndexType = int,
674 const int rank = ViewType::rank>
675KOKKOS_INLINE_FUNCTION void
676FILL (const ViewType& x, const InputType& val)
677{
679}
680
686template<class CoefficientType,
687 class ViewType1,
688 class ViewType2,
689 class LayoutType1 = typename ViewType1::array_layout,
690 class LayoutType2 = typename ViewType2::array_layout,
691 class IndexType = int,
692 const int rank = ViewType1::rank>
693KOKKOS_INLINE_FUNCTION void
694AXPY (const CoefficientType& alpha,
695 const ViewType1& x,
696 const ViewType2& y)
697{
698 static_assert (static_cast<int> (ViewType1::rank) ==
699 static_cast<int> (ViewType2::rank),
700 "AXPY: x and y must have the same rank.");
701 Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
702 IndexType, rank>::run (alpha, x, y);
703}
704
713template<class ViewType1,
714 class ViewType2,
715 class LayoutType1 = typename ViewType1::array_layout,
716 class LayoutType2 = typename ViewType2::array_layout,
717 class IndexType = int,
718 const int rank = ViewType1::rank>
719KOKKOS_INLINE_FUNCTION void
720COPY (const ViewType1& x, const ViewType2& y)
721{
722 static_assert (static_cast<int> (ViewType1::rank) ==
723 static_cast<int> (ViewType2::rank),
724 "COPY: x and y must have the same rank.");
725 Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
726 rank>::run (x, y);
727}
728
740template<class VecType1,
741 class BlkType,
742 class VecType2,
743 class CoeffType,
744 class IndexType = int>
745KOKKOS_INLINE_FUNCTION void
746GEMV (const CoeffType& alpha,
747 const BlkType& A,
748 const VecType1& x,
749 const VecType2& y)
750{
751 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
752 IndexType>::run (alpha, A, x, y);
753}
754
762template<class ViewType1,
763 class ViewType2,
764 class ViewType3,
765 class CoefficientType,
766 class IndexType = int>
767KOKKOS_INLINE_FUNCTION void
768GEMM (const char transA[],
769 const char transB[],
770 const CoefficientType& alpha,
771 const ViewType1& A,
772 const ViewType2& B,
773 const CoefficientType& beta,
774 const ViewType3& C)
775{
776 // Assert that A, B, and C are in fact matrices
777 static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
778 static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
779 static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
780
781 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
782 typedef Kokkos::Details::ArithTraits<Scalar> STS;
783 const Scalar ZERO = STS::zero();
784 const Scalar ONE = STS::one();
785
786 // Get the dimensions
787 IndexType m, n, k;
788 if(transA[0] == 'N' || transA[0] == 'n') {
789 m = static_cast<IndexType> (A.extent (0));
790 n = static_cast<IndexType> (A.extent (1));
791 }
792 else {
793 m = static_cast<IndexType> (A.extent (1));
794 n = static_cast<IndexType> (A.extent (0));
795 }
796 k = static_cast<IndexType> (C.extent (1));
797
798 // quick return if possible
799 if(alpha == ZERO && beta == ONE)
800 return;
801
802 // And if alpha equals zero...
803 if(alpha == ZERO) {
804 if(beta == ZERO) {
805 for(IndexType i=0; i<m; i++) {
806 for(IndexType j=0; j<k; j++) {
807 C(i,j) = ZERO;
808 }
809 }
810 }
811 else {
812 for(IndexType i=0; i<m; i++) {
813 for(IndexType j=0; j<k; j++) {
814 C(i,j) = beta*C(i,j);
815 }
816 }
817 }
818 }
819
820 // Start the operations
821 if(transB[0] == 'n' || transB[0] == 'N') {
822 if(transA[0] == 'n' || transA[0] == 'N') {
823 // Form C = alpha*A*B + beta*C
824 for(IndexType j=0; j<n; j++) {
825 if(beta == ZERO) {
826 for(IndexType i=0; i<m; i++) {
827 C(i,j) = ZERO;
828 }
829 }
830 else if(beta != ONE) {
831 for(IndexType i=0; i<m; i++) {
832 C(i,j) = beta*C(i,j);
833 }
834 }
835 for(IndexType l=0; l<k; l++) {
836 Scalar temp = alpha*B(l,j);
837 for(IndexType i=0; i<m; i++) {
838 C(i,j) = C(i,j) + temp*A(i,l);
839 }
840 }
841 }
842 }
843 else {
844 // Form C = alpha*A**T*B + beta*C
845 for(IndexType j=0; j<n; j++) {
846 for(IndexType i=0; i<m; i++) {
847 Scalar temp = ZERO;
848 for(IndexType l=0; l<k; l++) {
849 temp = temp + A(l,i)*B(l,j);
850 }
851 if(beta == ZERO) {
852 C(i,j) = alpha*temp;
853 }
854 else {
855 C(i,j) = alpha*temp + beta*C(i,j);
856 }
857 }
858 }
859 }
860 }
861 else {
862 if(transA[0] == 'n' || transA[0] == 'N') {
863 // Form C = alpha*A*B**T + beta*C
864 for(IndexType j=0; j<n; j++) {
865 if(beta == ZERO) {
866 for(IndexType i=0; i<m; i++) {
867 C(i,j) = ZERO;
868 }
869 }
870 else if(beta != ONE) {
871 for(IndexType i=0; i<m; i++) {
872 C(i,j) = beta*C(i,j);
873 }
874 }
875 for(IndexType l=0; l<k; l++) {
876 Scalar temp = alpha*B(j,l);
877 for(IndexType i=0; i<m; i++) {
878 C(i,j) = C(i,j) + temp*A(i,l);
879 }
880 }
881 }
882 }
883 else {
884 // Form C = alpha*A**T*B**T + beta*C
885 for(IndexType j=0; j<n; j++) {
886 for(IndexType i=0; i<m; i++) {
887 Scalar temp = ZERO;
888 for(IndexType l=0; l<k; l++) {
889 temp = temp + A(l,i)*B(j,l);
890 }
891 if(beta == ZERO) {
892 C(i,j) = alpha*temp;
893 }
894 else {
895 C(i,j) = alpha*temp + beta*C(i,j);
896 }
897 }
898 }
899 }
900 }
901}
902
904template<class LittleBlockType,
905 class LittleVectorType>
906KOKKOS_INLINE_FUNCTION void
907GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
908{
909 // The type of an entry of ipiv is the index type.
910 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
911 static_assert (std::is_integral<IndexType>::value,
912 "GETF2: The type of each entry of ipiv must be an integer type.");
913 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
914 static_assert (! std::is_const<Scalar>::value,
915 "GETF2: A must not be a const View (or LittleBlock).");
916 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
917 "GETF2: ipiv must not be a const View (or LittleBlock).");
918 static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
919 typedef Kokkos::Details::ArithTraits<Scalar> STS;
920 const Scalar ZERO = STS::zero();
921
922 const IndexType numRows = static_cast<IndexType> (A.extent (0));
923 const IndexType numCols = static_cast<IndexType> (A.extent (1));
924 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
925
926 // std::min is not a CUDA device function
927 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
928 if (pivDim < minPivDim) {
929 info = -2;
930 return;
931 }
932
933 // Initialize info
934 info = 0;
935
936 for(IndexType j=0; j < pivDim; j++)
937 {
938 // Find pivot and test for singularity
939 IndexType jp = j;
940 for(IndexType i=j+1; i<numRows; i++)
941 {
942 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
943 jp = i;
944 }
945 }
946 ipiv(j) = jp+1;
947
948 if(A(jp,j) != ZERO)
949 {
950 // Apply the interchange to columns 1:N
951 if(jp != j)
952 {
953 for(IndexType i=0; i < numCols; i++)
954 {
955 Scalar temp = A(jp,i);
956 A(jp,i) = A(j,i);
957 A(j,i) = temp;
958 }
959 }
960
961 // Compute elements J+1:M of J-th column
962 for(IndexType i=j+1; i<numRows; i++) {
963 A(i,j) = A(i,j) / A(j,j);
964 }
965 }
966 else if(info == 0) {
967 info = j;
968 }
969
970 // Update trailing submatrix
971 for(IndexType r=j+1; r < numRows; r++)
972 {
973 for(IndexType c=j+1; c < numCols; c++) {
974 A(r,c) = A(r,c) - A(r,j) * A(j,c);
975 }
976 }
977 }
978}
979
980namespace Impl {
981
985template<class LittleBlockType,
986 class LittleIntVectorType,
987 class LittleScalarVectorType,
988 const int rank = LittleScalarVectorType::rank>
989struct GETRS {
990 static KOKKOS_INLINE_FUNCTION void
991 run (const char mode[],
992 const LittleBlockType& A,
993 const LittleIntVectorType& ipiv,
994 const LittleScalarVectorType& B,
995 int& info);
996};
997
999template<class LittleBlockType,
1000 class LittleIntVectorType,
1001 class LittleScalarVectorType>
1002struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1003 static KOKKOS_INLINE_FUNCTION void
1004 run (const char mode[],
1005 const LittleBlockType& A,
1006 const LittleIntVectorType& ipiv,
1007 const LittleScalarVectorType& B,
1008 int& info)
1009 {
1010 // The type of an entry of ipiv is the index type.
1011 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1012 // IndexType must be signed, because this code does a countdown loop
1013 // to zero. Unsigned integers are always >= 0, even on underflow.
1014 static_assert (std::is_integral<IndexType>::value &&
1015 std::is_signed<IndexType>::value,
1016 "GETRS: The type of each entry of ipiv must be a signed integer.");
1017 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1018 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1019 "GETRS: B must not be a const View (or LittleBlock).");
1020 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1021 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1022 static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
1023
1024 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1025 const Scalar ZERO = STS::zero();
1026 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1027 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1028 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1029
1030 info = 0;
1031
1032 // Ensure that the matrix is square
1033 if (numRows != numCols) {
1034 info = -2;
1035 return;
1036 }
1037
1038 // Ensure that the pivot array is sufficiently large
1039 if (pivDim < numRows) {
1040 info = -3;
1041 return;
1042 }
1043
1044 // No transpose case
1045 if(mode[0] == 'n' || mode[0] == 'N') {
1046 // Apply row interchanges to the RHS
1047 for(IndexType i=0; i<numRows; i++) {
1048 if(ipiv(i) != i+1) {
1049 Scalar temp = B(i);
1050 B(i) = B(ipiv(i)-1);
1051 B(ipiv(i)-1) = temp;
1052 }
1053 }
1054
1055 // Solve Lx=b, overwriting b with x
1056 for(IndexType r=1; r < numRows; r++) {
1057 for(IndexType c=0; c < r; c++) {
1058 B(r) = B(r) - A(r,c)*B(c);
1059 }
1060 }
1061
1062 // Solve Ux=b, overwriting b with x
1063 for(IndexType r=numRows-1; r >= 0; r--) {
1064 // Check whether U is singular
1065 if(A(r,r) == ZERO) {
1066 info = r+1;
1067 return;
1068 }
1069
1070 for(IndexType c=r+1; c < numCols; c++) {
1071 B(r) = B(r) - A(r,c)*B(c);
1072 }
1073 B(r) = B(r) / A(r,r);
1074 }
1075 }
1076 // Transpose case
1077 else if(mode[0] == 't' || mode[0] == 'T') {
1078 info = -1; // NOT YET IMPLEMENTED
1079 return;
1080 }
1081 // Conjugate transpose case
1082 else if (mode[0] == 'c' || mode[0] == 'C') {
1083 info = -1; // NOT YET IMPLEMENTED
1084 return;
1085 }
1086 else { // invalid mode
1087 info = -1;
1088 return;
1089 }
1090 }
1091};
1092
1093
1095template<class LittleBlockType,
1096 class LittleIntVectorType,
1097 class LittleScalarVectorType>
1098struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1099 static KOKKOS_INLINE_FUNCTION void
1100 run (const char mode[],
1101 const LittleBlockType& A,
1102 const LittleIntVectorType& ipiv,
1103 const LittleScalarVectorType& B,
1104 int& info)
1105 {
1106 // The type of an entry of ipiv is the index type.
1107 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1108 static_assert (std::is_integral<IndexType>::value,
1109 "GETRS: The type of each entry of ipiv must be an integer type.");
1110 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1111 "GETRS: B must not be a const View (or LittleBlock).");
1112 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1113 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1114 static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1115
1116 // The current implementation iterates over one right-hand side at
1117 // a time. It might be faster to do this differently, but this
1118 // should work for now.
1119 const IndexType numRhs = B.extent (1);
1120 info = 0;
1121
1122 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1123 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1124 GETRS<LittleBlockType, LittleIntVectorType, decltype (B_cur), 1>::run (mode, A, ipiv, B_cur, info);
1125 if (info != 0) {
1126 return;
1127 }
1128 }
1129 }
1130};
1131
1132} // namespace Impl
1133
1137template<class LittleBlockType,
1138 class LittleIntVectorType,
1139 class LittleScalarVectorType>
1140KOKKOS_INLINE_FUNCTION void
1141GETRS (const char mode[],
1142 const LittleBlockType& A,
1143 const LittleIntVectorType& ipiv,
1144 const LittleScalarVectorType& B,
1145 int& info)
1146{
1147 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1148 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1149}
1150
1151
1166template<class LittleBlockType,
1167 class LittleIntVectorType,
1168 class LittleScalarVectorType>
1169KOKKOS_INLINE_FUNCTION void
1170GETRI (const LittleBlockType& A,
1171 const LittleIntVectorType& ipiv,
1172 const LittleScalarVectorType& work,
1173 int& info)
1174{
1175 // The type of an entry of ipiv is the index type.
1176 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1177 // IndexType must be signed, because this code does a countdown loop
1178 // to zero. Unsigned integers are always >= 0, even on underflow.
1179 static_assert (std::is_integral<IndexType>::value &&
1180 std::is_signed<IndexType>::value,
1181 "GETRI: The type of each entry of ipiv must be a signed integer.");
1182 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1183 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1184 "GETRI: A must not be a const View (or LittleBlock).");
1185 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1186 "GETRI: work must not be a const View (or LittleBlock).");
1187 static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1188 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1189 const Scalar ZERO = STS::zero();
1190 const Scalar ONE = STS::one();
1191
1192 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1193 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1194 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1195 const IndexType workDim = static_cast<IndexType> (work.extent (0));
1196
1197 info = 0;
1198
1199 // Ensure that the matrix is square
1200 if (numRows != numCols) {
1201 info = -1;
1202 return;
1203 }
1204
1205 // Ensure that the pivot array is sufficiently large
1206 if (pivDim < numRows) {
1207 info = -2;
1208 return;
1209 }
1210
1211 // Ensure that the work array is sufficiently large
1212 if (workDim < numRows) {
1213 info = -3;
1214 return;
1215 }
1216
1217 // Form Uinv in place
1218 for(IndexType j=0; j < numRows; j++) {
1219 if(A(j,j) == ZERO) {
1220 info = j+1;
1221 return;
1222 }
1223
1224 A(j,j) = ONE / A(j,j);
1225
1226 // Compute elements 1:j-1 of j-th column
1227 for(IndexType r=0; r < j; r++) {
1228 A(r,j) = A(r,r)*A(r,j);
1229 for(IndexType c=r+1; c < j; c++) {
1230 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1231 }
1232 }
1233 for(IndexType r=0; r < j; r++) {
1234 A(r,j) = -A(j,j)*A(r,j);
1235 }
1236 }
1237
1238 // Compute Ainv by solving A\L = Uinv
1239 for(IndexType j = numCols-2; j >= 0; j--) {
1240 // Copy lower triangular data to work array and replace with 0
1241 for(IndexType r=j+1; r < numRows; r++) {
1242 work(r) = A(r,j);
1243 A(r,j) = 0;
1244 }
1245
1246 for(IndexType r=0; r < numRows; r++) {
1247 for(IndexType i=j+1; i < numRows; i++) {
1248 A(r,j) = A(r,j) - work(i)*A(r,i);
1249 }
1250 }
1251 }
1252
1253 // Apply column interchanges
1254 for(IndexType j=numRows-1; j >= 0; j--) {
1255 IndexType jp = ipiv(j)-1;
1256 if(j != jp) {
1257 for(IndexType r=0; r < numRows; r++) {
1258 Scalar temp = A(r,j);
1259 A(r,j) = A(r,jp);
1260 A(r,jp) = temp;
1261 }
1262 }
1263 }
1264}
1265
1266
1267// mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1268// an implementation for trans != 'N' (the transpose and conjugate
1269// transpose cases).
1270#if 0
1271template<class LittleBlockType,
1272 class LittleVectorTypeX,
1273 class LittleVectorTypeY,
1274 class CoefficientType,
1275 class IndexType = int>
1276KOKKOS_INLINE_FUNCTION void
1277GEMV (const char trans,
1278 const CoefficientType& alpha,
1279 const LittleBlockType& A,
1280 const LittleVectorTypeX& x,
1281 const CoefficientType& beta,
1282 const LittleVectorTypeY& y)
1283{
1284 // y(0) returns a reference to the 0-th entry of y. Remove that
1285 // reference to get the type of each entry of y. It's OK if y has
1286 // zero entries -- this doesn't actually do y(i), it just returns
1287 // the type of that expression.
1288 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1289 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1290 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1291
1292 if (beta == 0.0) {
1293 if (alpha == 0.0) {
1294 for (IndexType i = 0; i < numRows; ++i) {
1295 y(i) = 0.0;
1296 }
1297 }
1298 else {
1299 for (IndexType i = 0; i < numRows; ++i) {
1300 y_value_type y_i = 0.0;
1301 for (IndexType j = 0; j < numCols; ++j) {
1302 y_i += A(i,j) * x(j);
1303 }
1304 y(i) = y_i;
1305 }
1306 }
1307 }
1308 else { // beta != 0
1309 if (alpha == 0.0) {
1310 if (beta == 0.0) {
1311 for (IndexType i = 0; i < numRows; ++i) {
1312 y(i) = 0.0;
1313 }
1314 }
1315 else {
1316 for (IndexType i = 0; i < numRows; ++i) {
1317 y(i) *= beta;
1318 }
1319 }
1320 }
1321 else {
1322 for (IndexType i = 0; i < numRows; ++i) {
1323 y_value_type y_i = beta * y(i);
1324 for (IndexType j = 0; j < numCols; ++j) {
1325 y_i += alpha * A(i,j) * x(j);
1326 }
1327 y(i) = y_i;
1328 }
1329 }
1330 }
1331}
1332
1333#endif // 0
1334
1335} // namespace Tpetra
1336
1337#endif // TPETRA_BLOCKVIEW_HPP
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
@ ZERO
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::SCAL function.