40#ifndef TPETRA_MULTIVECTOR_DEF_HPP
41#define TPETRA_MULTIVECTOR_DEF_HPP
53#include "Tpetra_Vector.hpp"
65#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
66# include "Teuchos_SerialDenseMatrix.hpp"
68#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
69#include "KokkosCompat_View.hpp"
70#include "KokkosBlas.hpp"
71#include "KokkosKernels_Utils.hpp"
72#include "Kokkos_Random.hpp"
73#include "Kokkos_ArithTraits.hpp"
77#ifdef HAVE_TPETRA_INST_FLOAT128
80 template<
class Generator>
81 struct rand<Generator, __float128> {
82 static KOKKOS_INLINE_FUNCTION __float128 max ()
84 return static_cast<__float128
> (1.0);
86 static KOKKOS_INLINE_FUNCTION __float128
91 const __float128 scalingFactor =
92 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
93 static_cast<__float128
> (2.0);
94 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
95 const __float128 lowerOrderTerm =
96 static_cast<__float128
> (gen.drand ()) * scalingFactor;
97 return higherOrderTerm + lowerOrderTerm;
99 static KOKKOS_INLINE_FUNCTION __float128
100 draw (Generator& gen,
const __float128& range)
103 const __float128 scalingFactor =
104 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
105 static_cast<__float128
> (2.0);
106 const __float128 higherOrderTerm =
107 static_cast<__float128
> (gen.drand (range));
108 const __float128 lowerOrderTerm =
109 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
110 return higherOrderTerm + lowerOrderTerm;
112 static KOKKOS_INLINE_FUNCTION __float128
113 draw (Generator& gen,
const __float128& start,
const __float128& end)
116 const __float128 scalingFactor =
117 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
118 static_cast<__float128
> (2.0);
119 const __float128 higherOrderTerm =
120 static_cast<__float128
> (gen.drand (start, end));
121 const __float128 lowerOrderTerm =
122 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
123 return higherOrderTerm + lowerOrderTerm;
145 template<
class ST,
class LO,
class GO,
class NT>
147 allocDualView (
const size_t lclNumRows,
148 const size_t numCols,
149 const bool zeroOut =
true,
150 const bool allowPadding =
false)
152 using ::Tpetra::Details::Behavior;
153 using Kokkos::AllowPadding;
154 using Kokkos::view_alloc;
155 using Kokkos::WithoutInitializing;
157 typedef typename dual_view_type::t_dev dev_view_type;
162 const std::string label (
"MV::DualView");
163 const bool debug = Behavior::debug ();
173 dev_view_type d_view;
176 d_view = dev_view_type (view_alloc (label, AllowPadding),
177 lclNumRows, numCols);
180 d_view = dev_view_type (view_alloc (label),
181 lclNumRows, numCols);
186 d_view = dev_view_type (view_alloc (label,
189 lclNumRows, numCols);
192 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
193 lclNumRows, numCols);
204 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
205 KokkosBlas::fill (d_view, nan);
209 TEUCHOS_TEST_FOR_EXCEPTION
210 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
211 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
212 "allocDualView: d_view's dimensions actual dimensions do not match "
213 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
214 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
215 <<
". Please report this bug to the Tpetra developers.");
218 dual_view_type dv (d_view, Kokkos::create_mirror_view (d_view));
232 template<
class T,
class ExecSpace>
233 struct MakeUnmanagedView {
243 typedef typename Kokkos::Impl::if_c<
244 Kokkos::Impl::SpaceAccessibility<
245 typename ExecSpace::memory_space,
246 Kokkos::HostSpace>::accessible,
247 typename ExecSpace::device_type,
248 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
249 typedef Kokkos::LayoutLeft array_layout;
250 typedef Kokkos::View<T*, array_layout, host_exec_space,
251 Kokkos::MemoryUnmanaged> view_type;
253 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
255 const size_t numEnt =
static_cast<size_t> (x_in.size ());
259 return view_type (x_in.getRawPtr (), numEnt);
267 template<
class DualViewType>
269 takeSubview (
const DualViewType& X,
270 const Kokkos::Impl::ALL_t&,
271 const std::pair<size_t, size_t>& colRng)
273 if (X.extent (0) == 0 && X.extent (1) != 0) {
274 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
277 return subview (X, Kokkos::ALL (), colRng);
284 template<
class DualViewType>
286 takeSubview (
const DualViewType& X,
287 const std::pair<size_t, size_t>& rowRng,
288 const std::pair<size_t, size_t>& colRng)
290 if (X.extent (0) == 0 && X.extent (1) != 0) {
291 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
294 return subview (X, rowRng, colRng);
298 template<
class DualViewType>
300 getDualViewStride (
const DualViewType& dv)
304 const size_t LDA = dv.d_view.stride (1);
305 const size_t numRows = dv.extent (0);
308 return (numRows == 0) ? size_t (1) : numRows;
315 template<
class ViewType>
317 getViewStride (
const ViewType& view)
319 const size_t LDA = view.stride (1);
320 const size_t numRows = view.extent (0);
323 return (numRows == 0) ? size_t (1) : numRows;
330 template <
class impl_scalar_type,
class buffer_device_type>
333 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
336 if (! imports.need_sync_device ()) {
341 size_t localLengthThreshold =
343 return imports.extent(0) <= localLengthThreshold;
348 template <
class SC,
class LO,
class GO,
class NT>
350 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
352 if (! X.need_sync_device ()) {
357 size_t localLengthThreshold =
359 return X.getLocalLength () <= localLengthThreshold;
363 template <
class SC,
class LO,
class GO,
class NT>
365 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
372 using val_type =
typename MV::impl_scalar_type;
373 using mag_type =
typename MV::mag_type;
374 using dual_view_type =
typename MV::dual_view_type;
377 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
378 auto whichVecs = getMultiVectorWhichVectors (X);
382 const bool runOnHost = runKernelOnHost (X);
384 using view_type =
typename dual_view_type::t_host;
385 using array_layout =
typename view_type::array_layout;
386 using device_type =
typename view_type::device_type;
389 normImpl<val_type, array_layout, device_type,
390 mag_type> (norms, X_lcl, whichNorm, whichVecs,
391 isConstantStride, isDistributed, comm);
394 using view_type =
typename dual_view_type::t_dev;
395 using array_layout =
typename view_type::array_layout;
396 using device_type =
typename view_type::device_type;
399 normImpl<val_type, array_layout, device_type,
400 mag_type> (norms, X_lcl, whichNorm, whichVecs,
401 isConstantStride, isDistributed, comm);
410 template <
typename DstView,
typename SrcView>
411 struct AddAssignFunctor {
414 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
416 KOKKOS_INLINE_FUNCTION
void
417 operator () (
const size_t k)
const { tgt(k) += src(k); }
424 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
428 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
431 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
437 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
439 MultiVector (
const Teuchos::RCP<const map_type>& map,
440 const size_t numVecs,
441 const bool zeroOut) :
447 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
452 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
455 const Teuchos::DataAccess copyOrView) :
457 view_ (source.view_),
458 origView_ (source.origView_),
459 owningView_ (source.owningView_),
460 whichVectors_ (source.whichVectors_)
463 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
464 "const Teuchos::DataAccess): ";
468 if (copyOrView == Teuchos::Copy) {
472 this->
view_ = cpy.view_;
477 else if (copyOrView == Teuchos::View) {
480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
481 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
482 "invalid value " << copyOrView <<
". Valid values include "
483 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
484 << Teuchos::View <<
".");
488 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
490 MultiVector (
const Teuchos::RCP<const map_type>& map,
497 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
498 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
499 map->getNodeNumElements ();
500 const size_t lclNumRows_view = view.extent (0);
501 const size_t LDA = getDualViewStride (view);
503 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
504 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
505 std::invalid_argument,
"Kokkos::DualView does not match Map. "
506 "map->getNodeNumElements() = " << lclNumRows_map
507 <<
", view.extent(0) = " << lclNumRows_view
508 <<
", and getStride() = " << LDA <<
".");
510 using ::Tpetra::Details::Behavior;
511 const bool debug = Behavior::debug ();
513 using ::Tpetra::Details::checkGlobalDualViewValidity;
514 std::ostringstream gblErrStrm;
515 const bool verbose = Behavior::verbose ();
516 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
517 const bool gblValid =
518 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
520 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
521 (! gblValid, std::runtime_error, gblErrStrm.str ());
526 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
528 MultiVector (
const Teuchos::RCP<const map_type>& map,
529 const typename dual_view_type::t_dev& d_view) :
532 using Teuchos::ArrayRCP;
534 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
538 const size_t LDA = getViewStride (d_view);
539 const size_t lclNumRows = map->getNodeNumElements ();
540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
541 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
542 "Kokkos::View. map->getNodeNumElements() = " << lclNumRows
543 <<
", View's column stride = " << LDA
544 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
546 auto h_view = Kokkos::create_mirror_view (d_view);
549 using ::Tpetra::Details::Behavior;
550 const bool debug = Behavior::debug ();
552 using ::Tpetra::Details::checkGlobalDualViewValidity;
553 std::ostringstream gblErrStrm;
554 const bool verbose = Behavior::verbose ();
555 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
556 const bool gblValid =
557 checkGlobalDualViewValidity (&gblErrStrm, view_, verbose,
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
560 (! gblValid, std::runtime_error, gblErrStrm.str ());
565 view_.modify_device ();
570 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
572 MultiVector (
const Teuchos::RCP<const map_type>& map,
577 origView_ (origView),
578 owningView_ (origView)
580 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
582 const size_t LDA = getDualViewStride (origView);
584 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
585 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
586 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
587 <<
". This may also mean that the input origView's first dimension (number "
588 "of rows = " << origView.extent (0) <<
") does not not match the number "
589 "of entries in the Map on the calling process.");
591 using ::Tpetra::Details::Behavior;
592 const bool debug = Behavior::debug ();
594 using ::Tpetra::Details::checkGlobalDualViewValidity;
595 std::ostringstream gblErrStrm;
596 const bool verbose = Behavior::verbose ();
597 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
598 const bool gblValid_0 =
599 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
601 const bool gblValid_1 =
602 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
604 const bool gblValid = gblValid_0 && gblValid_1;
605 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
606 (! gblValid, std::runtime_error, gblErrStrm.str ());
611 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
615 const Teuchos::ArrayView<const size_t>& whichVectors) :
620 whichVectors_ (whichVectors.begin (), whichVectors.end ())
623 using Kokkos::subview;
624 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
626 using ::Tpetra::Details::Behavior;
627 const bool debug = Behavior::debug ();
629 using ::Tpetra::Details::checkGlobalDualViewValidity;
630 std::ostringstream gblErrStrm;
631 const bool verbose = Behavior::verbose ();
632 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
633 const bool gblValid =
634 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
637 (! gblValid, std::runtime_error, gblErrStrm.str ());
640 const size_t lclNumRows = map.is_null () ? size_t (0) :
641 map->getNodeNumElements ();
648 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
649 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
650 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
651 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
652 if (whichVectors.size () != 0) {
653 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
654 view.extent (1) != 0 && view.extent (1) == 0,
655 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
656 " = " << whichVectors.size () <<
" > 0.");
657 size_t maxColInd = 0;
658 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
659 for (size_type k = 0; k < whichVectors.size (); ++k) {
660 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
661 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
662 std::invalid_argument,
"whichVectors[" << k <<
"] = "
663 "Teuchos::OrdinalTraits<size_t>::invalid().");
664 maxColInd = std::max (maxColInd, whichVectors[k]);
666 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
667 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
668 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
669 <<
" <= max(whichVectors) = " << maxColInd <<
".");
674 const size_t LDA = getDualViewStride (view);
675 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
676 (LDA < lclNumRows, std::invalid_argument,
677 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
679 if (whichVectors.size () == 1) {
690 const std::pair<size_t, size_t> colRng (whichVectors[0],
691 whichVectors[0] + 1);
693 view_ = takeSubview (view_, ALL (), colRng);
695 whichVectors_.clear ();
699 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
701 MultiVector (
const Teuchos::RCP<const map_type>& map,
704 const Teuchos::ArrayView<const size_t>& whichVectors) :
707 origView_ (origView),
708 owningView_ (origView),
709 whichVectors_ (whichVectors.begin (), whichVectors.end ())
712 using Kokkos::subview;
713 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
715 using ::Tpetra::Details::Behavior;
716 const bool debug = Behavior::debug ();
718 using ::Tpetra::Details::checkGlobalDualViewValidity;
719 std::ostringstream gblErrStrm;
720 const bool verbose = Behavior::verbose ();
721 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
722 const bool gblValid_0 =
723 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
725 const bool gblValid_1 =
726 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
728 const bool gblValid = gblValid_0 && gblValid_1;
729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
730 (! gblValid, std::runtime_error, gblErrStrm.str ());
740 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
741 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
742 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
743 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
744 if (whichVectors.size () != 0) {
745 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
746 view.extent (1) != 0 && view.extent (1) == 0,
747 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
748 " = " << whichVectors.size () <<
" > 0.");
749 size_t maxColInd = 0;
750 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
751 for (size_type k = 0; k < whichVectors.size (); ++k) {
752 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
753 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
754 std::invalid_argument,
"whichVectors[" << k <<
"] = "
755 "Teuchos::OrdinalTraits<size_t>::invalid().");
756 maxColInd = std::max (maxColInd, whichVectors[k]);
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
760 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
761 <<
" <= max(whichVectors) = " << maxColInd <<
".");
766 const size_t LDA = getDualViewStride (origView);
767 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
768 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
769 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
770 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
771 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
773 if (whichVectors.size () == 1) {
782 const std::pair<size_t, size_t> colRng (whichVectors[0],
783 whichVectors[0] + 1);
791 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
793 MultiVector (
const Teuchos::RCP<const map_type>& map,
794 const Teuchos::ArrayView<const Scalar>& data,
796 const size_t numVecs) :
799 typedef LocalOrdinal LO;
800 typedef GlobalOrdinal GO;
801 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
807 const size_t lclNumRows =
808 map.is_null () ? size_t (0) : map->getNodeNumElements ();
809 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
810 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
811 "map->getNodeNumElements() = " << lclNumRows <<
".");
813 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
814 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
815 (
static_cast<size_t> (data.size ()) < minNumEntries,
816 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
817 "entries, given the input Map and number of vectors in the MultiVector."
818 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
819 "map->getNodeNumElements () = " << minNumEntries <<
".");
822 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
836 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
837 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
838 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
843 const size_t outStride =
844 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
845 if (LDA == outStride) {
851 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
853 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
855 for (
size_t j = 0; j < numVecs; ++j) {
856 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
857 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
865 MultiVector (
const Teuchos::RCP<const map_type>& map,
866 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
867 const size_t numVecs) :
871 typedef LocalOrdinal LO;
872 typedef GlobalOrdinal GO;
873 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
876 const size_t lclNumRows =
877 map.is_null () ? size_t (0) : map->getNodeNumElements ();
878 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
879 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
880 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
881 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
882 for (
size_t j = 0; j < numVecs; ++j) {
883 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
884 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
885 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
886 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
887 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
891 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
896 using array_layout =
typename decltype (X_out)::array_layout;
897 using input_col_view_type =
typename Kokkos::View<
const IST*,
900 Kokkos::MemoryUnmanaged>;
902 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
903 for (
size_t j = 0; j < numVecs; ++j) {
904 Teuchos::ArrayView<const IST> X_j_av =
905 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
906 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
907 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
914 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
917 return whichVectors_.empty ();
920 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
925 if (this->getMap ().is_null ()) {
926 return static_cast<size_t> (0);
928 return this->getMap ()->getNodeNumElements ();
932 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
937 if (this->getMap ().is_null ()) {
938 return static_cast<size_t> (0);
940 return this->getMap ()->getGlobalNumElements ();
944 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
949 return isConstantStride () ? getDualViewStride (origView_) : size_t (0);
952 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
958 auto thisData = view_.h_view.data();
959 auto otherData = other.
view_.h_view.data();
960 size_t thisSpan = view_.h_view.span();
961 size_t otherSpan = other.
view_.h_view.span();
962 return (otherData <= thisData && thisData < otherData + otherSpan)
963 || (thisData <= otherData && otherData < thisData + thisSpan);
966 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
975 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
976 if (src ==
nullptr) {
990 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
994 return this->getNumVectors ();
997 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1002 const size_t numSameIDs,
1003 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1004 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1007 using ::Tpetra::Details::Behavior;
1009 using ::Tpetra::Details::ProfilingRegion;
1011 using KokkosRefactor::Details::permute_array_multi_column;
1012 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1013 using Kokkos::Compat::create_const_view;
1015 const char tfecfFuncName[] =
"copyAndPermute: ";
1016 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
1018 const bool verbose = Behavior::verbose ();
1019 std::unique_ptr<std::string> prefix;
1021 auto map = this->getMap ();
1022 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1023 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1024 std::ostringstream os;
1025 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1026 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1027 os <<
"Start" << endl;
1028 std::cerr << os.str ();
1031 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1032 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1033 std::logic_error,
"permuteToLIDs.extent(0) = "
1034 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1035 << permuteFromLIDs.extent (0) <<
".");
1038 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1039 const size_t numCols = this->getNumVectors ();
1043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1044 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1045 std::logic_error,
"Input MultiVector needs sync to both host "
1047 const bool copyOnHost = runKernelOnHost(sourceMV);
1049 std::ostringstream os;
1050 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1051 std::cerr << os.str ();
1055 std::ostringstream os;
1056 os << *prefix <<
"Copy" << endl;
1057 std::cerr << os.str ();
1082 if (numSameIDs > 0) {
1083 const std::pair<size_t, size_t> rows (0, numSameIDs);
1085 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1086 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1088 for (
size_t j = 0; j < numCols; ++j) {
1089 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1090 const size_t srcCol =
1091 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1093 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1094 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1098 Kokkos::RangePolicy<typename Node::execution_space, size_t>;
1099 range_t rp(0,numSameIDs);
1100 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1102 Kokkos::parallel_for(rp, aaf);
1111 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1112 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1114 for (
size_t j = 0; j < numCols; ++j) {
1115 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1116 const size_t srcCol =
1117 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1119 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1120 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1124 Kokkos::RangePolicy<typename Node::execution_space, size_t>;
1125 range_t rp(0,numSameIDs);
1126 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1128 Kokkos::parallel_for(rp, aaf);
1149 if (permuteFromLIDs.extent (0) == 0 ||
1150 permuteToLIDs.extent (0) == 0) {
1152 std::ostringstream os;
1153 os << *prefix <<
"No permutations. Done!" << endl;
1154 std::cerr << os.str ();
1160 std::ostringstream os;
1161 os << *prefix <<
"Permute" << endl;
1162 std::cerr << os.str ();
1167 const bool nonConstStride =
1168 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1171 std::ostringstream os;
1172 os << *prefix <<
"nonConstStride="
1173 << (nonConstStride ?
"true" :
"false") << endl;
1174 std::cerr << os.str ();
1181 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1182 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1183 if (nonConstStride) {
1184 if (this->whichVectors_.size () == 0) {
1185 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1186 tmpTgt.modify_host ();
1187 for (
size_t j = 0; j < numCols; ++j) {
1188 tmpTgt.h_view(j) = j;
1191 tmpTgt.sync_device ();
1193 tgtWhichVecs = tmpTgt;
1196 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1198 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1203 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1204 this->getNumVectors (),
1205 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1206 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1207 this->getNumVectors () <<
".");
1209 if (sourceMV.whichVectors_.size () == 0) {
1210 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1211 tmpSrc.modify_host ();
1212 for (
size_t j = 0; j < numCols; ++j) {
1213 tmpSrc.h_view(j) = j;
1216 tmpSrc.sync_device ();
1218 srcWhichVecs = tmpSrc;
1221 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1222 sourceMV.whichVectors_ ();
1224 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1228 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1229 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1230 sourceMV.getNumVectors (), std::logic_error,
1231 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1232 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1238 std::ostringstream os;
1239 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1240 std::cerr << os.str ();
1242 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1243 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1245 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1246 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1247 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1248 auto permuteFromLIDs_h =
1249 create_const_view (permuteFromLIDs.view_host ());
1252 std::ostringstream os;
1253 os << *prefix <<
"Permute on host" << endl;
1254 std::cerr << os.str ();
1256 if (nonConstStride) {
1259 auto tgtWhichVecs_h =
1260 create_const_view (tgtWhichVecs.view_host ());
1261 auto srcWhichVecs_h =
1262 create_const_view (srcWhichVecs.view_host ());
1264 using op_type = KokkosRefactor::Details::AddOp;
1265 permute_array_multi_column_variable_stride (tgt_h, src_h,
1269 srcWhichVecs_h, numCols,
1273 using op_type = KokkosRefactor::Details::InsertOp;
1274 permute_array_multi_column_variable_stride (tgt_h, src_h,
1278 srcWhichVecs_h, numCols,
1284 using op_type = KokkosRefactor::Details::AddOp;
1285 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1286 permuteFromLIDs_h, numCols, op_type());
1289 using op_type = KokkosRefactor::Details::InsertOp;
1290 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1291 permuteFromLIDs_h, numCols, op_type());
1297 std::ostringstream os;
1298 os << *prefix <<
"Get permute LIDs on device" << endl;
1299 std::cerr << os.str ();
1301 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1302 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1304 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1305 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1306 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1307 auto permuteFromLIDs_d =
1308 create_const_view (permuteFromLIDs.view_device ());
1311 std::ostringstream os;
1312 os << *prefix <<
"Permute on device" << endl;
1313 std::cerr << os.str ();
1315 if (nonConstStride) {
1318 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1319 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1321 using op_type = KokkosRefactor::Details::AddOp;
1322 permute_array_multi_column_variable_stride (tgt_d, src_d,
1326 srcWhichVecs_d, numCols,
1330 using op_type = KokkosRefactor::Details::InsertOp;
1331 permute_array_multi_column_variable_stride (tgt_d, src_d,
1335 srcWhichVecs_d, numCols,
1341 using op_type = KokkosRefactor::Details::AddOp;
1342 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1343 permuteFromLIDs_d, numCols, op_type());
1346 using op_type = KokkosRefactor::Details::InsertOp;
1347 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1348 permuteFromLIDs_d, numCols, op_type());
1354 std::ostringstream os;
1355 os << *prefix <<
"Done!" << endl;
1356 std::cerr << os.str ();
1360 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1365 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1366 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1367 Kokkos::DualView<size_t*, buffer_device_type> ,
1368 size_t& constantNumPackets)
1370 using ::Tpetra::Details::Behavior;
1371 using ::Tpetra::Details::ProfilingRegion;
1373 using Kokkos::Compat::create_const_view;
1374 using Kokkos::Compat::getKokkosViewDeepCopy;
1377 const char tfecfFuncName[] =
"packAndPrepare: ";
1378 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1386 const bool debugCheckIndices = Behavior::debug ();
1391 const bool printDebugOutput = Behavior::verbose ();
1393 std::unique_ptr<std::string> prefix;
1394 if (printDebugOutput) {
1395 auto map = this->getMap ();
1396 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1397 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1398 std::ostringstream os;
1399 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1400 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1401 os <<
"Start" << endl;
1402 std::cerr << os.str ();
1406 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1408 const size_t numCols = sourceMV.getNumVectors ();
1413 constantNumPackets = numCols;
1417 if (exportLIDs.extent (0) == 0) {
1418 if (printDebugOutput) {
1419 std::ostringstream os;
1420 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1421 std::cerr << os.str ();
1441 const size_t numExportLIDs = exportLIDs.extent (0);
1442 const size_t newExportsSize = numCols * numExportLIDs;
1443 if (printDebugOutput) {
1444 std::ostringstream os;
1445 os << *prefix <<
"realloc: "
1446 <<
"numExportLIDs: " << numExportLIDs
1447 <<
", exports.extent(0): " << exports.extent (0)
1448 <<
", newExportsSize: " << newExportsSize << std::endl;
1449 std::cerr << os.str ();
1455 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1456 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1457 std::logic_error,
"Input MultiVector needs sync to both host "
1459 const bool packOnHost = runKernelOnHost(sourceMV);
1460 if (printDebugOutput) {
1461 std::ostringstream os;
1462 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1463 std::cerr << os.str ();
1475 exports.clear_sync_state ();
1476 exports.modify_host ();
1484 exports.clear_sync_state ();
1485 exports.modify_device ();
1501 if (sourceMV.isConstantStride ()) {
1502 using KokkosRefactor::Details::pack_array_single_column;
1503 if (printDebugOutput) {
1504 std::ostringstream os;
1505 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1506 std::cerr << os.str ();
1509 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1510 pack_array_single_column (exports.view_host (),
1512 exportLIDs.view_host (),
1517 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1518 pack_array_single_column (exports.view_device (),
1520 exportLIDs.view_device (),
1526 using KokkosRefactor::Details::pack_array_single_column;
1527 if (printDebugOutput) {
1528 std::ostringstream os;
1529 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1530 std::cerr << os.str ();
1533 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1534 pack_array_single_column (exports.view_host (),
1536 exportLIDs.view_host (),
1537 sourceMV.whichVectors_[0],
1541 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1542 pack_array_single_column (exports.view_device (),
1544 exportLIDs.view_device (),
1545 sourceMV.whichVectors_[0],
1551 if (sourceMV.isConstantStride ()) {
1552 using KokkosRefactor::Details::pack_array_multi_column;
1553 if (printDebugOutput) {
1554 std::ostringstream os;
1555 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1556 std::cerr << os.str ();
1559 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1560 pack_array_multi_column (exports.view_host (),
1562 exportLIDs.view_host (),
1567 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1568 pack_array_multi_column (exports.view_device (),
1570 exportLIDs.view_device (),
1576 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1577 if (printDebugOutput) {
1578 std::ostringstream os;
1579 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1581 std::cerr << os.str ();
1586 using IST = impl_scalar_type;
1587 using DV = Kokkos::DualView<IST*, device_type>;
1588 using HES =
typename DV::t_host::execution_space;
1589 using DES =
typename DV::t_dev::execution_space;
1590 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1592 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1593 pack_array_multi_column_variable_stride
1594 (exports.view_host (),
1596 exportLIDs.view_host (),
1597 getKokkosViewDeepCopy<HES> (whichVecs),
1602 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1603 pack_array_multi_column_variable_stride
1604 (exports.view_device (),
1606 exportLIDs.view_device (),
1607 getKokkosViewDeepCopy<DES> (whichVecs),
1614 if (printDebugOutput) {
1615 std::ostringstream os;
1616 os << *prefix <<
"Done!" << endl;
1617 std::cerr << os.str ();
1623 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1625 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1626 typename NO::device_type::memory_space>::value,
1631 const std::string* prefix,
1632 const bool areRemoteLIDsContiguous,
1641 std::ostringstream os;
1642 os << *prefix <<
"Realloc (if needed) imports_ from "
1643 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1644 std::cerr << os.str ();
1647 bool reallocated =
false;
1649 using IST = impl_scalar_type;
1650 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1662 if ((dual_view_type::impl_dualview_is_single_device::value ||
1665 areRemoteLIDsContiguous &&
1667 (getNumVectors() == 1) &&
1670 size_t offset = getLocalLength () - newSize;
1671 reallocated = this->imports_.d_view.data() != view_.d_view.data() + offset;
1673 typedef std::pair<size_t, size_t> range_type;
1674 this->imports_ = DV(view_,
1675 range_type (offset, getLocalLength () ),
1679 std::ostringstream os;
1680 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1681 std::cerr << os.str ();
1691 std::ostringstream os;
1692 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1693 std::cerr << os.str ();
1695 this->imports_ = this->unaliased_imports_;
1700 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1702 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1703 typename NO::device_type::memory_space>::value,
1708 const std::string* prefix,
1709 const bool areRemoteLIDsContiguous,
1715 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1720 const std::string* prefix,
1721 const bool areRemoteLIDsContiguous,
1724 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1727 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1731 return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
1732 view_.d_view.data() + view_.d_view.extent(0));
1736 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1740 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1741 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1742 Kokkos::DualView<size_t*, buffer_device_type> ,
1743 const size_t constantNumPackets,
1746 using ::Tpetra::Details::Behavior;
1747 using ::Tpetra::Details::ProfilingRegion;
1748 using KokkosRefactor::Details::unpack_array_multi_column;
1749 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1750 using Kokkos::Compat::getKokkosViewDeepCopy;
1752 const char longFuncName[] =
"Tpetra::MultiVector::unpackAndCombine";
1753 const char tfecfFuncName[] =
"unpackAndCombine: ";
1754 ProfilingRegion regionUAC (longFuncName);
1762 const bool debugCheckIndices = Behavior::debug ();
1764 const bool printDebugOutput = Behavior::verbose ();
1765 std::unique_ptr<std::string> prefix;
1766 if (printDebugOutput) {
1767 auto map = this->getMap ();
1768 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1769 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1770 std::ostringstream os;
1771 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1772 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1773 os <<
"Start" << endl;
1774 std::cerr << os.str ();
1778 if (importLIDs.extent (0) == 0) {
1779 if (printDebugOutput) {
1780 std::ostringstream os;
1781 os << *prefix <<
"No imports. Done!" << endl;
1782 std::cerr << os.str ();
1788 if (importsAreAliased()) {
1789 if (printDebugOutput) {
1790 std::ostringstream os;
1791 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1792 std::cerr << os.str ();
1798 const size_t numVecs = getNumVectors ();
1799 if (debugCheckIndices) {
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1801 (
static_cast<size_t> (imports.extent (0)) !=
1802 numVecs * importLIDs.extent (0),
1804 "imports.extent(0) = " << imports.extent (0)
1805 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1806 <<
" * " << importLIDs.extent (0) <<
" = "
1807 << numVecs * importLIDs.extent (0) <<
".");
1809 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1810 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1811 "constantNumPackets input argument must be nonzero.");
1813 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1814 (
static_cast<size_t> (numVecs) !=
1815 static_cast<size_t> (constantNumPackets),
1816 std::runtime_error,
"constantNumPackets must equal numVecs.");
1824 const bool unpackOnHost = runKernelOnHost(imports);
1826 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1829 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1832 if (printDebugOutput) {
1833 std::ostringstream os;
1834 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1836 std::cerr << os.str ();
1841 auto imports_d = imports.view_device ();
1842 auto imports_h = imports.view_host ();
1843 auto importLIDs_d = importLIDs.view_device ();
1844 auto importLIDs_h = importLIDs.view_host ();
1846 Kokkos::DualView<size_t*, device_type> whichVecs;
1847 if (! isConstantStride ()) {
1848 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1849 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1851 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1853 whichVecs.modify_host ();
1857 whichVecs.modify_device ();
1861 auto whichVecs_d = whichVecs.view_device ();
1862 auto whichVecs_h = whichVecs.view_host ();
1872 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1873 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1874 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1877 const bool use_atomic_updates = unpackOnHost ?
1878 host_exec_space::concurrency () != 1 :
1879 dev_exec_space::concurrency () != 1;
1881 if (printDebugOutput) {
1882 std::ostringstream os;
1884 std::cerr << os.str ();
1891 using op_type = KokkosRefactor::Details::InsertOp;
1892 if (isConstantStride ()) {
1894 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1895 unpack_array_multi_column (host_exec_space (),
1896 X_h, imports_h, importLIDs_h,
1897 op_type (), numVecs,
1903 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1904 unpack_array_multi_column (dev_exec_space (),
1905 X_d, imports_d, importLIDs_d,
1906 op_type (), numVecs,
1913 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1914 unpack_array_multi_column_variable_stride (host_exec_space (),
1924 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1925 unpack_array_multi_column_variable_stride (dev_exec_space (),
1937 using op_type = KokkosRefactor::Details::AddOp;
1938 if (isConstantStride ()) {
1940 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1941 unpack_array_multi_column (host_exec_space (),
1942 X_h, imports_h, importLIDs_h,
1943 op_type (), numVecs,
1948 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1949 unpack_array_multi_column (dev_exec_space (),
1950 X_d, imports_d, importLIDs_d,
1951 op_type (), numVecs,
1958 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1959 unpack_array_multi_column_variable_stride (host_exec_space (),
1969 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1970 unpack_array_multi_column_variable_stride (dev_exec_space (),
1982 using op_type = KokkosRefactor::Details::AbsMaxOp;
1983 if (isConstantStride ()) {
1985 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1986 unpack_array_multi_column (host_exec_space (),
1987 X_h, imports_h, importLIDs_h,
1988 op_type (), numVecs,
1993 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1994 unpack_array_multi_column (dev_exec_space (),
1995 X_d, imports_d, importLIDs_d,
1996 op_type (), numVecs,
2003 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2004 unpack_array_multi_column_variable_stride (host_exec_space (),
2014 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2015 unpack_array_multi_column_variable_stride (dev_exec_space (),
2027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2028 (
true, std::logic_error,
"Invalid CombineMode");
2032 if (printDebugOutput) {
2033 std::ostringstream os;
2034 os << *prefix <<
"Nothing to unpack" << endl;
2035 std::cerr << os.str ();
2039 if (printDebugOutput) {
2040 std::ostringstream os;
2041 os << *prefix <<
"Done!" << endl;
2042 std::cerr << os.str ();
2046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2051 if (isConstantStride ()) {
2052 return static_cast<size_t> (view_.extent (1));
2054 return static_cast<size_t> (whichVectors_.size ());
2062 gblDotImpl (
const RV& dotsOut,
2063 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2064 const bool distributed)
2066 using Teuchos::REDUCE_MAX;
2067 using Teuchos::REDUCE_SUM;
2068 using Teuchos::reduceAll;
2069 typedef typename RV::non_const_value_type
dot_type;
2071 const size_t numVecs = dotsOut.extent (0);
2086 if (distributed && ! comm.is_null ()) {
2089 const int nv =
static_cast<int> (numVecs);
2090 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
2092 if (commIsInterComm) {
2096 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2098 const dot_type*
const lclSum = lclDots.data ();
2099 dot_type*
const gblSum = dotsOut.data ();
2100 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2103 dot_type*
const inout = dotsOut.data ();
2104 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2110 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2114 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2116 using ::Tpetra::Details::Behavior;
2117 using Kokkos::subview;
2118 using Teuchos::Comm;
2119 using Teuchos::null;
2122 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2123 typedef typename dual_view_type::t_dev_const XMV;
2124 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2128 const size_t numVecs = this->getNumVectors ();
2132 const size_t lclNumRows = this->getLocalLength ();
2133 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2134 const bool debug = Behavior::debug ();
2137 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2139 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2140 "compatible with the input MultiVector A. We only test for this "
2151 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2153 "MultiVectors do not have the same local length. "
2154 "this->getLocalLength() = " << lclNumRows <<
" != "
2156 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2158 "MultiVectors must have the same number of columns (vectors). "
2159 "this->getNumVectors() = " << numVecs <<
" != "
2161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2162 numDots != numVecs, std::runtime_error,
2163 "The output array 'dots' must have the same number of entries as the "
2164 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2165 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2167 const std::pair<size_t, size_t> colRng (0, numVecs);
2168 RV dotsOut = subview (dots, colRng);
2169 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2170 this->getMap ()->getComm ();
2172 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2175 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2176 this->whichVectors_.getRawPtr (),
2179 gblDotImpl (dotsOut, comm, this->isDistributed ());
2183 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2188 using ::Tpetra::Details::ProfilingRegion;
2190 using dot_type =
typename MV::dot_type;
2191 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2194 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2195 map.is_null () ? Teuchos::null : map->getComm ();
2196 if (comm.is_null ()) {
2197 return Kokkos::ArithTraits<dot_type>::zero ();
2200 using LO = LocalOrdinal;
2204 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2206 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2207 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2208 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2215 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2217 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2218 lclDot = KokkosBlas::dot (x_1d, y_1d);
2221 using Teuchos::outArg;
2222 using Teuchos::REDUCE_SUM;
2223 using Teuchos::reduceAll;
2224 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2236 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2240 const Teuchos::ArrayView<dot_type>& dots)
const
2242 const char tfecfFuncName[] =
"dot: ";
2245 const size_t numVecs = this->getNumVectors ();
2246 const size_t lclNumRows = this->getLocalLength ();
2247 const size_t numDots =
static_cast<size_t> (dots.size ());
2257 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2259 "MultiVectors do not have the same local length. "
2260 "this->getLocalLength() = " << lclNumRows <<
" != "
2262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2264 "MultiVectors must have the same number of columns (vectors). "
2265 "this->getNumVectors() = " << numVecs <<
" != "
2267 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2268 (numDots != numVecs, std::runtime_error,
2269 "The output array 'dots' must have the same number of entries as the "
2270 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2271 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2274 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2278 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2285 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2288 using ::Tpetra::Details::NORM_TWO;
2289 using ::Tpetra::Details::ProfilingRegion;
2290 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2293 MV& X =
const_cast<MV&
> (*this);
2294 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2297 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2300 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2302 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2303 this->norm2 (norms_av);
2307 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2310 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2313 using ::Tpetra::Details::NORM_ONE;
2314 using ::Tpetra::Details::ProfilingRegion;
2315 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2318 MV& X =
const_cast<MV&
> (*this);
2319 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2322 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2325 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2327 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2328 this->norm1 (norms_av);
2331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2334 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2337 using ::Tpetra::Details::NORM_INF;
2338 using ::Tpetra::Details::ProfilingRegion;
2339 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2342 MV& X =
const_cast<MV&
> (*this);
2343 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2346 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2349 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2351 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2352 this->normInf (norms_av);
2355 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2358 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2362 using Kokkos::subview;
2363 using Teuchos::Comm;
2365 using Teuchos::reduceAll;
2366 using Teuchos::REDUCE_SUM;
2367 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2369 const size_t lclNumRows = this->getLocalLength ();
2370 const size_t numVecs = this->getNumVectors ();
2371 const size_t numMeans =
static_cast<size_t> (means.size ());
2373 TEUCHOS_TEST_FOR_EXCEPTION(
2374 numMeans != numVecs, std::runtime_error,
2375 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2376 <<
" != this->getNumVectors() = " << numVecs <<
".");
2378 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2379 const std::pair<size_t, size_t> colRng (0, numVecs);
2384 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2386 typename local_view_type::HostMirror::array_layout,
2388 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2389 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2391 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2392 this->getMap ()->getComm ();
2395 const bool useHostVersion = this->need_sync_device ();
2396 if (useHostVersion) {
2398 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2399 rowRng, Kokkos::ALL ());
2401 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2402 if (isConstantStride ()) {
2403 KokkosBlas::sum (lclSums, X_lcl);
2406 for (
size_t j = 0; j < numVecs; ++j) {
2407 const size_t col = whichVectors_[j];
2408 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2415 if (! comm.is_null () && this->isDistributed ()) {
2416 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2417 lclSums.data (), meansOut.data ());
2425 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2426 rowRng, Kokkos::ALL ());
2429 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2430 if (isConstantStride ()) {
2431 KokkosBlas::sum (lclSums, X_lcl);
2434 for (
size_t j = 0; j < numVecs; ++j) {
2435 const size_t col = whichVectors_[j];
2436 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2444 if (! comm.is_null () && this->isDistributed ()) {
2445 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2446 lclSums.data (), meansOut.data ());
2456 const impl_scalar_type OneOverN =
2457 ATS::one () /
static_cast<mag_type
> (this->getGlobalLength ());
2458 for (
size_t k = 0; k < numMeans; ++k) {
2459 meansOut(k) = meansOut(k) * OneOverN;
2464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2470 typedef Kokkos::Details::ArithTraits<IST> ATS;
2471 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2472 typedef typename pool_type::generator_type generator_type;
2474 const IST max = Kokkos::rand<generator_type, IST>::max ();
2475 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2477 this->randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2481 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2487 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2498 const uint64_t myRank =
2499 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2500 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2501 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2503 pool_type rand_pool (seed);
2504 const IST max =
static_cast<IST
> (maxVal);
2505 const IST min =
static_cast<IST
> (minVal);
2507 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2509 if (isConstantStride ()) {
2510 Kokkos::fill_random (thisView, rand_pool, min, max);
2513 const size_t numVecs = getNumVectors ();
2514 for (
size_t k = 0; k < numVecs; ++k) {
2515 const size_t col = whichVectors_[k];
2516 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2517 Kokkos::fill_random (X_k, rand_pool, min, max);
2522 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2527 using ::Tpetra::Details::ProfilingRegion;
2528 using ::Tpetra::Details::Blas::fill;
2529 using DES =
typename dual_view_type::t_dev::execution_space;
2530 using HES =
typename dual_view_type::t_host::execution_space;
2531 using LO = LocalOrdinal;
2532 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2537 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2538 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2544 const bool runOnHost = runKernelOnHost(*
this);
2546 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2547 if (this->isConstantStride ()) {
2548 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2551 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2552 this->whichVectors_.getRawPtr ());
2556 auto X = this->getLocalViewHost(Access::OverwriteAll);
2557 if (this->isConstantStride ()) {
2558 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2561 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2562 this->whichVectors_.getRawPtr ());
2568 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2571 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2573 using Teuchos::ArrayRCP;
2574 using Teuchos::Comm;
2577 using LO = LocalOrdinal;
2578 using GO = GlobalOrdinal;
2584 TEUCHOS_TEST_FOR_EXCEPTION(
2585 ! this->isConstantStride (), std::logic_error,
2586 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2587 "if the MultiVector is a column view of another MultiVector (that is, if "
2588 "isConstantStride() == false).");
2618#ifdef HAVE_TEUCHOS_DEBUG
2634 if (this->getMap ().is_null ()) {
2639 TEUCHOS_TEST_FOR_EXCEPTION(
2640 newMap.is_null (), std::invalid_argument,
2641 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2642 "This probably means that the input Map is incorrect.");
2646 const size_t newNumRows = newMap->getNodeNumElements ();
2647 const size_t origNumRows = view_.extent (0);
2648 const size_t numCols = this->getNumVectors ();
2650 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2651 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2654 else if (newMap.is_null ()) {
2657 const size_t newNumRows =
static_cast<size_t> (0);
2658 const size_t numCols = this->getNumVectors ();
2659 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2662 this->map_ = newMap;
2665 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2668 scale (
const Scalar& alpha)
2673 const IST theAlpha =
static_cast<IST
> (alpha);
2674 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2677 const size_t lclNumRows = getLocalLength ();
2678 const size_t numVecs = getNumVectors ();
2679 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2680 const std::pair<size_t, size_t> colRng (0, numVecs);
2688 const bool useHostVersion = need_sync_device ();
2689 if (useHostVersion) {
2690 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2691 if (isConstantStride ()) {
2692 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2695 for (
size_t k = 0; k < numVecs; ++k) {
2696 const size_t Y_col = whichVectors_[k];
2697 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2698 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2703 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2704 if (isConstantStride ()) {
2705 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2708 for (
size_t k = 0; k < numVecs; ++k) {
2709 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2710 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2711 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2718 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2721 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2723 const size_t numVecs = this->getNumVectors ();
2724 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2725 TEUCHOS_TEST_FOR_EXCEPTION(
2726 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2727 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2731 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2732 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2733 k_alphas.modify_host ();
2734 for (
size_t i = 0; i < numAlphas; ++i) {
2737 k_alphas.sync_device ();
2739 this->scale (k_alphas.view_device ());
2742 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2745 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2748 using Kokkos::subview;
2750 const size_t lclNumRows = this->getLocalLength ();
2751 const size_t numVecs = this->getNumVectors ();
2752 TEUCHOS_TEST_FOR_EXCEPTION(
2753 static_cast<size_t> (alphas.extent (0)) != numVecs,
2754 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2755 "alphas.extent(0) = " << alphas.extent (0)
2756 <<
" != this->getNumVectors () = " << numVecs <<
".");
2757 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2758 const std::pair<size_t, size_t> colRng (0, numVecs);
2768 const bool useHostVersion = this->need_sync_device ();
2769 if (useHostVersion) {
2772 auto alphas_h = Kokkos::create_mirror_view (alphas);
2775 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2776 if (isConstantStride ()) {
2777 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2780 for (
size_t k = 0; k < numVecs; ++k) {
2781 const size_t Y_col = this->isConstantStride () ? k :
2782 this->whichVectors_[k];
2783 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2786 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2791 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2792 if (isConstantStride ()) {
2793 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2800 auto alphas_h = Kokkos::create_mirror_view (alphas);
2803 for (
size_t k = 0; k < numVecs; ++k) {
2804 const size_t Y_col = this->isConstantStride () ? k :
2805 this->whichVectors_[k];
2806 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2807 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2813 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2816 scale (
const Scalar& alpha,
2820 using Kokkos::subview;
2821 const char tfecfFuncName[] =
"scale: ";
2823 const size_t lclNumRows = getLocalLength ();
2824 const size_t numVecs = getNumVectors ();
2826 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2828 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2830 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2832 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2836 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2837 const std::pair<size_t, size_t> colRng (0, numVecs);
2839 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2841 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2842 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2845 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2849 for (
size_t k = 0; k < numVecs; ++k) {
2850 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2852 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2853 auto X_k = subview (X_lcl, ALL (), X_col);
2855 KokkosBlas::scal (Y_k, theAlpha, X_k);
2862 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2867 const char tfecfFuncName[] =
"reciprocal: ";
2869 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2871 "MultiVectors do not have the same local length. "
2872 "this->getLocalLength() = " << getLocalLength ()
2874 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2875 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2876 ": MultiVectors do not have the same number of columns (vectors). "
2877 "this->getNumVectors() = " << getNumVectors ()
2878 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2880 const size_t numVecs = getNumVectors ();
2882 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2886 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2890 using Kokkos::subview;
2891 for (
size_t k = 0; k < numVecs; ++k) {
2892 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2893 auto vector_k = subview (this_view_dev, ALL (), this_col);
2894 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2895 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2896 KokkosBlas::reciprocal (vector_k, vector_Ak);
2901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2906 const char tfecfFuncName[] =
"abs";
2908 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2910 ": MultiVectors do not have the same local length. "
2911 "this->getLocalLength() = " << getLocalLength ()
2913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2914 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2915 ": MultiVectors do not have the same number of columns (vectors). "
2916 "this->getNumVectors() = " << getNumVectors ()
2917 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2918 const size_t numVecs = getNumVectors ();
2920 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2924 KokkosBlas::abs (this_view_dev, A_view_dev);
2928 using Kokkos::subview;
2930 for (
size_t k=0; k < numVecs; ++k) {
2931 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2932 auto vector_k = subview (this_view_dev, ALL (), this_col);
2933 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2934 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2935 KokkosBlas::abs (vector_k, vector_Ak);
2940 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2943 update (
const Scalar& alpha,
2947 const char tfecfFuncName[] =
"update: ";
2948 using Kokkos::subview;
2953 const size_t lclNumRows = getLocalLength ();
2954 const size_t numVecs = getNumVectors ();
2956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2958 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2962 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2967 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2968 const std::pair<size_t, size_t> colRng (0, numVecs);
2970 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2971 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2973 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2977 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2981 for (
size_t k = 0; k < numVecs; ++k) {
2982 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2984 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2985 auto X_k = subview (X_lcl, ALL (), X_col);
2987 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2992 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2995 update (
const Scalar& alpha,
2999 const Scalar& gamma)
3002 using Kokkos::subview;
3004 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3008 const size_t lclNumRows = this->getLocalLength ();
3009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3011 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3012 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3014 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3016 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3017 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3019 const size_t numVecs = getNumVectors ();
3020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3022 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3023 "but this MultiVector has " << numVecs <<
" column(s).");
3024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3027 "but this MultiVector has " << numVecs <<
" column(s).");
3033 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3034 const std::pair<size_t, size_t> colRng (0, numVecs);
3039 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3044 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3049 for (
size_t k = 0; k < numVecs; ++k) {
3050 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3053 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3054 theBeta, subview (B_lcl, rowRng, B_col),
3055 theGamma, subview (C_lcl, rowRng, this_col));
3060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3061 Teuchos::ArrayRCP<const Scalar>
3067 const char tfecfFuncName[] =
"getData: ";
3074 auto hostView = getLocalViewHost(Access::ReadOnly);
3075 const size_t col = isConstantStride () ? j : whichVectors_[j];
3076 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3077 Teuchos::ArrayRCP<const IST> dataAsArcp =
3078 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3080 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3081 (
static_cast<size_t> (hostView_j.extent (0)) <
3082 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3083 "hostView_j.extent(0) = " << hostView_j.extent (0)
3084 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3085 "Please report this bug to the Tpetra developers.");
3087 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3091 Teuchos::ArrayRCP<Scalar>
3096 using Kokkos::subview;
3098 const char tfecfFuncName[] =
"getDataNonConst: ";
3100 auto hostView = getLocalViewHost(Access::ReadWrite);
3101 const size_t col = isConstantStride () ? j : whichVectors_[j];
3102 auto hostView_j = subview (hostView, ALL (), col);
3103 Teuchos::ArrayRCP<IST> dataAsArcp =
3104 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3107 (
static_cast<size_t> (hostView_j.extent (0)) <
3108 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3109 "hostView_j.extent(0) = " << hostView_j.extent (0)
3110 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3111 "Please report this bug to the Tpetra developers.");
3113 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3116 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3117 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3119 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3126 bool contiguous =
true;
3127 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3128 for (
size_t j = 1; j < numCopyVecs; ++j) {
3129 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3134 if (contiguous && numCopyVecs > 0) {
3135 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3138 RCP<const MV> X_sub = this->subView (cols);
3139 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3145 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3146 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3148 subCopy (
const Teuchos::Range1D &colRng)
const
3153 RCP<const MV> X_sub = this->subView (colRng);
3154 RCP<MV> Y (
new MV (this->getMap (),
static_cast<size_t> (colRng.size ()),
false));
3159 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3163 return origView_.extent (0);
3166 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3170 return origView_.extent (1);
3173 template <
class Scalar,
class LO,
class GO,
class Node>
3176 const Teuchos::RCP<const map_type>& subMap,
3177 const local_ordinal_type rowOffset) :
3181 using Kokkos::subview;
3182 using Teuchos::outArg;
3185 using Teuchos::reduceAll;
3186 using Teuchos::REDUCE_MIN;
3189 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3190 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3193 std::unique_ptr<std::ostringstream> errStrm;
3200 const auto comm = subMap->getComm ();
3201 TEUCHOS_ASSERT( ! comm.is_null () );
3202 const int myRank = comm->getRank ();
3204 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3206 const LO newNumRows =
static_cast<LO
> (subMap->getNodeNumElements ());
3208 std::ostringstream os;
3209 os <<
"Proc " << myRank <<
": " << prefix
3210 <<
"X: {lclNumRows: " << lclNumRowsBefore
3212 <<
", numCols: " << numCols <<
"}, "
3213 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3214 std::cerr << os.str ();
3219 const bool tooManyElts =
3222 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3223 *errStrm <<
" Proc " << myRank <<
": subMap->getNodeNumElements() (="
3224 << newNumRows <<
") + rowOffset (=" << rowOffset
3228 TEUCHOS_TEST_FOR_EXCEPTION
3229 (! debug && tooManyElts, std::invalid_argument,
3230 prefix << errStrm->str () << suffix);
3234 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3236 std::ostringstream gblErrStrm;
3237 const std::string myErrStr =
3238 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3239 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3240 TEUCHOS_TEST_FOR_EXCEPTION
3241 (
true, std::invalid_argument, gblErrStrm.str ());
3245 using range_type = std::pair<LO, LO>;
3246 const range_type origRowRng
3247 (rowOffset,
static_cast<LO
> (X.
origView_.extent (0)));
3248 const range_type rowRng
3249 (rowOffset, rowOffset + newNumRows);
3259 if (newOrigView.extent (0) == 0 &&
3260 newOrigView.extent (1) != X.
origView_.extent (1)) {
3262 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3264 if (newView.extent (0) == 0 &&
3265 newView.extent (1) != X.
view_.extent (1)) {
3267 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3271 MV (subMap, newView, newOrigView) :
3277 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3278 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3279 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3281 if (errStrm.get () ==
nullptr) {
3282 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3284 *errStrm <<
" Proc " << myRank <<
3285 ": subMap.getNodeNumElements(): " << newNumRows <<
3286 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3287 ", X.getNumVectors(): " << numCols <<
3288 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3290 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3292 std::ostringstream gblErrStrm;
3294 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3295 "dimensions on one or more processes:" << endl;
3297 const std::string myErrStr =
3298 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3299 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3300 gblErrStrm << suffix << endl;
3301 TEUCHOS_TEST_FOR_EXCEPTION
3302 (
true, std::invalid_argument, gblErrStrm.str ());
3307 std::ostringstream os;
3308 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3309 std::cerr << os.str ();
3315 std::ostringstream os;
3316 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3317 std::cerr << os.str ();
3321 template <
class Scalar,
class LO,
class GO,
class Node>
3324 const map_type& subMap,
3325 const size_t rowOffset) :
3326 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3330 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3331 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3333 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3334 const size_t offset)
const
3337 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3340 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3341 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3344 const size_t offset)
3347 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3350 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3351 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3353 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3355 using Teuchos::Array;
3359 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3360 TEUCHOS_TEST_FOR_EXCEPTION(
3361 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3362 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3363 "contain at least one entry, but cols.size() = " << cols.size ()
3368 bool contiguous =
true;
3369 for (
size_t j = 1; j < numViewCols; ++j) {
3370 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3376 if (numViewCols == 0) {
3378 return rcp (
new MV (this->getMap (), numViewCols));
3381 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3385 if (isConstantStride ()) {
3386 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3389 Array<size_t> newcols (cols.size ());
3390 for (
size_t j = 0; j < numViewCols; ++j) {
3391 newcols[j] = whichVectors_[cols[j]];
3393 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3399 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3401 subView (
const Teuchos::Range1D& colRng)
const
3403 using ::Tpetra::Details::Behavior;
3405 using Kokkos::subview;
3406 using Teuchos::Array;
3410 const char tfecfFuncName[] =
"subView(Range1D): ";
3412 const size_t lclNumRows = this->getLocalLength ();
3413 const size_t numVecs = this->getNumVectors ();
3417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3418 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3419 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3421 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3422 numVecs != 0 && colRng.size () != 0 &&
3423 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3424 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3425 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3426 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3427 "[0, " << numVecs <<
"].");
3429 RCP<const MV> X_ret;
3439 const std::pair<size_t, size_t> rows (0, lclNumRows);
3440 if (colRng.size () == 0) {
3441 const std::pair<size_t, size_t> cols (0, 0);
3443 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3447 if (isConstantStride ()) {
3448 const std::pair<size_t, size_t> cols (colRng.lbound (),
3449 colRng.ubound () + 1);
3451 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3454 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3457 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3458 whichVectors_[0] + colRng.ubound () + 1);
3460 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3463 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3464 whichVectors_.begin () + colRng.ubound () + 1);
3465 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3470 const bool debug = Behavior::debug ();
3472 using Teuchos::Comm;
3473 using Teuchos::outArg;
3474 using Teuchos::REDUCE_MIN;
3475 using Teuchos::reduceAll;
3477 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3478 Teuchos::null : this->getMap ()->getComm ();
3479 if (! comm.is_null ()) {
3483 if (X_ret.is_null ()) {
3486 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3487 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3488 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3489 "MultiVector; the return value of this method) is null on some MPI "
3490 "process in this MultiVector's communicator. This should never "
3491 "happen. Please report this bug to the Tpetra developers.");
3492 if (! X_ret.is_null () &&
3493 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3496 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3497 outArg (gblSuccess));
3498 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3499 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3500 "colRng.size(), on at least one MPI process in this MultiVector's "
3501 "communicator. This should never happen. "
3502 "Please report this bug to the Tpetra developers.");
3509 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3510 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3515 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3520 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3525 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3529 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3535 using Kokkos::subview;
3536 typedef std::pair<size_t, size_t> range_type;
3537 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3541 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3542 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3544 static_cast<size_t> (j) :
3546 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3559 const size_t newSize = X.
imports_.extent (0) / numCols;
3560 const size_t offset = jj*newSize;
3562 newImports.d_view = subview (X.
imports_.d_view,
3563 range_type (offset, offset+newSize));
3564 newImports.h_view = subview (X.
imports_.h_view,
3565 range_type (offset, offset+newSize));
3569 const size_t newSize = X.
exports_.extent (0) / numCols;
3570 const size_t offset = jj*newSize;
3572 newExports.d_view = subview (X.
exports_.d_view,
3573 range_type (offset, offset+newSize));
3574 newExports.h_view = subview (X.
exports_.h_view,
3575 range_type (offset, offset+newSize));
3586 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3587 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3592 return Teuchos::rcp (
new V (*
this, j));
3596 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3597 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3602 return Teuchos::rcp (
new V (*
this, j));
3606 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3609 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3612 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3614 Kokkos::MemoryUnmanaged>;
3615 const char tfecfFuncName[] =
"get1dCopy: ";
3617 const size_t numRows = this->getLocalLength ();
3618 const size_t numCols = this->getNumVectors ();
3620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3621 (LDA < numRows, std::runtime_error,
3622 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3623 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3624 (numRows >
size_t (0) && numCols >
size_t (0) &&
3625 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3627 "A.size() = " << A.size () <<
", but its size must be at least "
3628 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3630 const std::pair<size_t, size_t> rowRange (0, numRows);
3631 const std::pair<size_t, size_t> colRange (0, numCols);
3633 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3635 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3648 if (this->need_sync_host() && this->need_sync_device()) {
3651 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3654 const bool useHostView = owningView_.h_view.use_count() >= owningView_.d_view.use_count();
3655 if (this->isConstantStride ()) {
3657 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3660 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3665 for (
size_t j = 0; j < numCols; ++j) {
3666 const size_t srcCol = this->whichVectors_[j];
3667 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3670 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3671 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3674 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3675 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3684 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3687 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3690 const char tfecfFuncName[] =
"get2dCopy: ";
3691 const size_t numRows = this->getLocalLength ();
3692 const size_t numCols = this->getNumVectors ();
3694 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3695 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3696 std::runtime_error,
"Input array of pointers must contain as many "
3697 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3698 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3700 if (numRows != 0 && numCols != 0) {
3702 for (
size_t j = 0; j < numCols; ++j) {
3703 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3705 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3706 "the input array of arrays is not long enough to fit all entries in "
3707 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3708 <<
" < getLocalLength() = " << numRows <<
".");
3712 for (
size_t j = 0; j < numCols; ++j) {
3713 Teuchos::RCP<const V> X_j = this->getVector (j);
3714 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3715 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3721 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3722 Teuchos::ArrayRCP<const Scalar>
3726 if (getLocalLength () == 0 || getNumVectors () == 0) {
3727 return Teuchos::null;
3729 TEUCHOS_TEST_FOR_EXCEPTION(
3730 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3731 "get1dView: This MultiVector does not have constant stride, so it is "
3732 "not possible to view its data as a single array. You may check "
3733 "whether a MultiVector has constant stride by calling "
3734 "isConstantStride().");
3738 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3739 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3740 Kokkos::Compat::persistingView (X_lcl);
3741 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3745 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3746 Teuchos::ArrayRCP<Scalar>
3750 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3751 return Teuchos::null;
3754 TEUCHOS_TEST_FOR_EXCEPTION
3755 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3756 "get1dViewNonConst: This MultiVector does not have constant stride, "
3757 "so it is not possible to view its data as a single array. You may "
3758 "check whether a MultiVector has constant stride by calling "
3759 "isConstantStride().");
3760 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3761 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3762 Kokkos::Compat::persistingView (X_lcl);
3763 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3767 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3768 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3772 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3778 const size_t myNumRows = this->getLocalLength ();
3779 const size_t numCols = this->getNumVectors ();
3780 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3782 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3783 for (
size_t j = 0; j < numCols; ++j) {
3784 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3785 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3786 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3787 Kokkos::Compat::persistingView (X_lcl_j);
3788 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3793 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3799 if (owningView_.d_view.use_count() > owningView_.h_view.use_count()) {
3801 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3802 " host while a device view is alive";
3804 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3805 <<
": " << msg << std::endl;
3807 throw std::runtime_error(msg);
3809 owningView_.sync_host();
3810 return view_.view_host();
3813 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3819 if (owningView_.d_view.use_count() > owningView_.h_view.use_count()) {
3821 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3822 " host while a device view is alive";
3824 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3825 <<
": " << msg << std::endl;
3827 throw std::runtime_error(msg);
3829 owningView_.sync_host();
3830 owningView_.modify_host();
3831 return view_.view_host();
3834 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3840 if (owningView_.h_view != view_.h_view) {
3842 return getLocalViewHost(Access::ReadWrite);
3845 if (owningView_.d_view.use_count() > owningView_.h_view.use_count()) {
3847 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3848 " host while a device view is alive";
3850 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3851 <<
": " << msg << std::endl;
3853 throw std::runtime_error(msg);
3855 owningView_.clear_sync_state();
3856 owningView_.modify_host();
3857 return view_.view_host();
3860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3866 if (owningView_.h_view.use_count() > owningView_.d_view.use_count()) {
3868 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3869 " device while a host view is alive";
3871 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3872 <<
": " << msg << std::endl;
3874 throw std::runtime_error(msg);
3876 owningView_.sync_device();
3877 return view_.view_device();
3880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3886 if(owningView_.h_view.use_count() > owningView_.d_view.use_count()) {
3888 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3889 " device while a host view is alive";
3891 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3892 <<
": " << msg << std::endl;
3894 throw std::runtime_error(msg);
3896 owningView_.sync_device();
3897 owningView_.modify_device();
3898 return view_.view_device();
3901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3907 if (owningView_.h_view != view_.h_view) {
3909 return getLocalViewDevice(Access::ReadWrite);
3912 if (owningView_.h_view.use_count() > owningView_.d_view.use_count()) {
3914 const char msg[] =
"Tpetra::MultiVector: Cannot access data on "
3915 " device while a host view is alive";
3917 std::cout <<
"Rank " << this->getMap()->getComm()->getRank()
3918 <<
": " << msg << std::endl;
3920 throw std::runtime_error(msg);
3922 owningView_.clear_sync_state();
3923 owningView_.modify_device();
3924 return view_.view_device();
3927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3928 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3935 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3941 const size_t myNumRows = this->getLocalLength ();
3942 const size_t numCols = this->getNumVectors ();
3943 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3945 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3946 for (
size_t j = 0; j < numCols; ++j) {
3947 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3948 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3949 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3950 Kokkos::Compat::persistingView (X_lcl_j);
3951 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
3956 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3960 Teuchos::ETransp transB,
3961 const Scalar& alpha,
3966 using ::Tpetra::Details::ProfilingRegion;
3967 using Teuchos::CONJ_TRANS;
3968 using Teuchos::NO_TRANS;
3969 using Teuchos::TRANS;
3972 using Teuchos::rcpFromRef;
3974 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3976 using STS = Teuchos::ScalarTraits<Scalar>;
3978 const char tfecfFuncName[] =
"multiply: ";
3979 ProfilingRegion region (
"Tpetra::MV::multiply");
4011 const bool C_is_local = ! this->isDistributed ();
4021 auto myMap = this->getMap ();
4022 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4023 using Teuchos::REDUCE_MIN;
4024 using Teuchos::reduceAll;
4025 using Teuchos::outArg;
4027 auto comm = myMap->getComm ();
4028 const size_t A_nrows =
4030 const size_t A_ncols =
4032 const size_t B_nrows =
4034 const size_t B_ncols =
4037 const bool lclBad = this->getLocalLength () != A_nrows ||
4038 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4040 const int myRank = comm->getRank ();
4041 std::ostringstream errStrm;
4042 if (this->getLocalLength () != A_nrows) {
4043 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4044 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4045 <<
"." << std::endl;
4047 if (this->getNumVectors () != B_ncols) {
4048 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4049 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4050 <<
"." << std::endl;
4052 if (A_ncols != B_nrows) {
4053 errStrm <<
"Proc " << myRank <<
": A_ncols="
4054 << A_ncols <<
" != B_nrows=" << B_nrows
4055 <<
"." << std::endl;
4058 const int lclGood = lclBad ? 0 : 1;
4060 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4062 std::ostringstream os;
4063 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4065 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4066 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4067 "least one process in this object's communicator." << std::endl
4069 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4071 << (transA == Teuchos::TRANS ?
"^T" :
4072 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4073 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4075 << (transA == Teuchos::TRANS ?
"^T" :
4076 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4077 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4078 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4088 const bool Case1 = C_is_local && A_is_local && B_is_local;
4090 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4091 transA != NO_TRANS &&
4094 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4098 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4099 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4100 "Multiplication of op(A) and op(B) into *this is not a "
4101 "supported use case.");
4103 if (beta != STS::zero () && Case2) {
4109 const int myRank = this->getMap ()->getComm ()->getRank ();
4111 beta_local = ATS::zero ();
4120 if (! isConstantStride ()) {
4121 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4123 C_tmp = rcp (
this,
false);
4126 RCP<const MV> A_tmp;
4128 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4130 A_tmp = rcpFromRef (A);
4133 RCP<const MV> B_tmp;
4135 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4137 B_tmp = rcpFromRef (B);
4140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4141 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4142 ! A_tmp->isConstantStride (), std::logic_error,
4143 "Failed to make temporary constant-stride copies of MultiVectors.");
4146 const LO A_lclNumRows = A_tmp->getLocalLength ();
4147 const LO A_numVecs = A_tmp->getNumVectors ();
4148 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4149 auto A_sub = Kokkos::subview (A_lcl,
4150 std::make_pair (LO (0), A_lclNumRows),
4151 std::make_pair (LO (0), A_numVecs));
4154 const LO B_lclNumRows = B_tmp->getLocalLength ();
4155 const LO B_numVecs = B_tmp->getNumVectors ();
4156 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4157 auto B_sub = Kokkos::subview (B_lcl,
4158 std::make_pair (LO (0), B_lclNumRows),
4159 std::make_pair (LO (0), B_numVecs));
4161 const LO C_lclNumRows = C_tmp->getLocalLength ();
4162 const LO C_numVecs = C_tmp->getNumVectors ();
4164 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4165 auto C_sub = Kokkos::subview (C_lcl,
4166 std::make_pair (LO (0), C_lclNumRows),
4167 std::make_pair (LO (0), C_numVecs));
4168 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4169 (transA == Teuchos::TRANS ?
'T' :
'C'));
4170 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4171 (transB == Teuchos::TRANS ?
'T' :
'C'));
4174 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4176 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4180 if (! isConstantStride ()) {
4185 A_tmp = Teuchos::null;
4186 B_tmp = Teuchos::null;
4194 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4203 using Kokkos::subview;
4204 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4206 const size_t lclNumRows = this->getLocalLength ();
4207 const size_t numVecs = this->getNumVectors ();
4209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4211 std::runtime_error,
"MultiVectors do not have the same local length.");
4212 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4213 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4214 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4217 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4227 KokkosBlas::mult (scalarThis,
4230 subview (A_view, ALL (), 0),
4234 for (
size_t j = 0; j < numVecs; ++j) {
4235 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4237 KokkosBlas::mult (scalarThis,
4238 subview (this_view, ALL (), C_col),
4240 subview (A_view, ALL (), 0),
4241 subview (B_view, ALL (), B_col));
4246 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4252 using ::Tpetra::Details::ProfilingRegion;
4253 ProfilingRegion region (
"Tpetra::MV::reduce");
4255 const auto map = this->getMap ();
4256 if (map.get () ==
nullptr) {
4259 const auto comm = map->getComm ();
4260 if (comm.get () ==
nullptr) {
4266 const bool changed_on_device = this->need_sync_host ();
4267 if (changed_on_device) {
4272 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4276 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4281 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4289 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4290 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4291 TEUCHOS_TEST_FOR_EXCEPTION(
4292 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4294 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4295 <<
" is invalid. The range of valid row indices on this process "
4296 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4297 <<
", " << maxLocalIndex <<
"].");
4298 TEUCHOS_TEST_FOR_EXCEPTION(
4299 vectorIndexOutOfRange(col),
4301 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4302 <<
" of the multivector is invalid.");
4305 auto view = this->getLocalViewHost(Access::ReadWrite);
4306 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4307 view (lclRow, colInd) = ScalarValue;
4311 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4320 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4321 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4322 TEUCHOS_TEST_FOR_EXCEPTION(
4323 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4325 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4326 <<
" is invalid. The range of valid row indices on this process "
4327 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4328 <<
", " << maxLocalIndex <<
"].");
4329 TEUCHOS_TEST_FOR_EXCEPTION(
4330 vectorIndexOutOfRange(col),
4332 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4333 <<
" of the multivector is invalid.");
4336 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4338 auto view = this->getLocalViewHost(Access::ReadWrite);
4340 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4343 view(lclRow, colInd) += value;
4348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4357 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4360 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4361 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4362 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4364 "Global row index " << gblRow <<
" is not present on this process "
4365 << this->getMap ()->getComm ()->getRank () <<
".");
4366 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4367 (this->vectorIndexOutOfRange (col), std::runtime_error,
4368 "Vector index " << col <<
" of the MultiVector is invalid.");
4371 this->replaceLocalValue (lclRow, col, ScalarValue);
4374 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4384 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4387 TEUCHOS_TEST_FOR_EXCEPTION(
4388 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4390 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4391 <<
" is not present on this process "
4392 << this->getMap ()->getComm ()->getRank () <<
".");
4393 TEUCHOS_TEST_FOR_EXCEPTION(
4394 vectorIndexOutOfRange(col),
4396 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4397 <<
" of the multivector is invalid.");
4400 this->sumIntoLocalValue (lclRow, col, value, atomic);
4403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4405 Teuchos::ArrayRCP<T>
4411 typename dual_view_type::array_layout,
4413 const size_t col = isConstantStride () ? j : whichVectors_[j];
4414 col_dual_view_type X_col =
4415 Kokkos::subview (view_, Kokkos::ALL (), col);
4416 return Kokkos::Compat::persistingView (X_col.d_view);
4420#ifdef TPETRA_ENABLE_DEPRECATED_CODE
4421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4425 owningView_.clear_sync_state ();
4428 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4432 owningView_.sync_host ();
4435 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4439 owningView_.sync_device ();
4443 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4447 return owningView_.need_sync_host ();
4450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4454 return owningView_.need_sync_device ();
4457#ifdef TPETRA_ENABLE_DEPRECATED_CODE
4458 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4462 owningView_.modify_device ();
4465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4469 owningView_.modify_host ();
4473#ifdef TPETRA_ENABLE_DEPRECATED_CODE
4474 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4478 return view_.view_device ();
4481 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4485 return view_.view_host ();
4489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4494 using Teuchos::TypeNameTraits;
4496 std::ostringstream out;
4497 out <<
"\"" << className <<
"\": {";
4498 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4499 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4500 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4501 <<
", Node" << Node::name ()
4503 if (this->getObjectLabel () !=
"") {
4504 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4506 out <<
", numRows: " << this->getGlobalLength ();
4507 if (className !=
"Tpetra::Vector") {
4508 out <<
", numCols: " << this->getNumVectors ()
4509 <<
", isConstantStride: " << this->isConstantStride ();
4511 if (this->isConstantStride ()) {
4512 out <<
", columnStride: " << this->getStride ();
4519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4524 return this->descriptionImpl (
"Tpetra::MultiVector");
4529 template<
typename ViewType>
4530 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4533 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4534 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4535 static_assert(ViewType::rank == 2,
4536 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4539 out <<
"Values("<<prefix<<
"): " << std::endl
4541 const size_t numRows = v.extent(0);
4542 const size_t numCols = v.extent(1);
4544 for (
size_t i = 0; i < numRows; ++i) {
4546 if (i + 1 < numRows) {
4552 for (
size_t i = 0; i < numRows; ++i) {
4553 for (
size_t j = 0; j < numCols; ++j) {
4555 if (j + 1 < numCols) {
4559 if (i + 1 < numRows) {
4568 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4573 typedef LocalOrdinal LO;
4576 if (vl <= Teuchos::VERB_LOW) {
4577 return std::string ();
4579 auto map = this->getMap ();
4580 if (map.is_null ()) {
4581 return std::string ();
4583 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4584 auto outp = Teuchos::getFancyOStream (outStringP);
4585 Teuchos::FancyOStream& out = *outp;
4586 auto comm = map->getComm ();
4587 const int myRank = comm->getRank ();
4588 const int numProcs = comm->getSize ();
4590 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4591 Teuchos::OSTab tab1 (out);
4594 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4595 out <<
"Local number of rows: " << lclNumRows << endl;
4597 if (vl > Teuchos::VERB_MEDIUM) {
4600 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4601 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4603 if (this->isConstantStride ()) {
4604 out <<
"Column stride: " << this->getStride () << endl;
4607 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4614 auto X_dev = view_.view_device();
4615 auto X_host = view_.view_host();
4617 if(X_dev.data() == X_host.data()) {
4619 Details::print_vector(out,
"unified",X_host);
4622 Details::print_vector(out,
"host",X_host);
4623 if(X_dev.span_is_contiguous())
4625 auto X_dev_on_host = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace(), X_dev);
4626 Details::print_vector(out,
"dev",X_dev_on_host);
4630 auto X_contig = Tpetra::Details::TempView::toLayout<decltype(X_dev), Kokkos::LayoutLeft>(X_dev);
4631 auto X_dev_on_host = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace(), X_contig);
4632 Details::print_vector(out,
"dev",X_dev_on_host);
4638 return outStringP->str ();
4641 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4645 const std::string& className,
4646 const Teuchos::EVerbosityLevel verbLevel)
const
4648 using Teuchos::TypeNameTraits;
4649 using Teuchos::VERB_DEFAULT;
4650 using Teuchos::VERB_NONE;
4651 using Teuchos::VERB_LOW;
4653 const Teuchos::EVerbosityLevel vl =
4654 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4656 if (vl == VERB_NONE) {
4663 auto map = this->getMap ();
4664 if (map.is_null ()) {
4667 auto comm = map->getComm ();
4668 if (comm.is_null ()) {
4672 const int myRank = comm->getRank ();
4681 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4685 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4686 out <<
"\"" << className <<
"\":" << endl;
4687 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4689 out <<
"Template parameters:" << endl;
4690 Teuchos::OSTab tab2 (out);
4691 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4692 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4693 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4694 <<
"Node: " << Node::name () << endl;
4696 if (this->getObjectLabel () !=
"") {
4697 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4699 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4700 if (className !=
"Tpetra::Vector") {
4701 out <<
"Number of columns: " << this->getNumVectors () << endl;
4708 if (vl > VERB_LOW) {
4709 const std::string lclStr = this->localDescribeToString (vl);
4710 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4714 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4717 describe (Teuchos::FancyOStream &out,
4718 const Teuchos::EVerbosityLevel verbLevel)
const
4720 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4723 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4728 replaceMap (newMap);
4731 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4736 using ::Tpetra::Details::localDeepCopy;
4737 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4739 TEUCHOS_TEST_FOR_EXCEPTION
4741 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4742 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4743 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4744 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4745 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4747 TEUCHOS_TEST_FOR_EXCEPTION
4748 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4749 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4750 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4751 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4766 if (src_last_updated_on_host) {
4767 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4769 this->isConstantStride (),
4771 this->whichVectors_ (),
4775 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4777 this->isConstantStride (),
4779 this->whichVectors_ (),
4784 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4786 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4793 this->getNumVectors(),
4799 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4804 using ::Tpetra::Details::PackTraits;
4806 const size_t l1 = this->getLocalLength();
4808 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4815 template <
class ST,
class LO,
class GO,
class NT>
4818 std::swap(mv.
map_, this->map_);
4819 std::swap(mv.
view_, this->view_);
4820 std::swap(mv.
origView_, this->origView_);
4825#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4826 template <
class ST,
class LO,
class GO,
class NT>
4829 const Teuchos::SerialDenseMatrix<int, ST>& src)
4831 using ::Tpetra::Details::localDeepCopy;
4833 using IST =
typename MV::impl_scalar_type;
4834 using input_view_type =
4835 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4836 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4837 using pair_type = std::pair<LO, LO>;
4839 const LO numRows =
static_cast<LO
> (src.numRows ());
4840 const LO numCols =
static_cast<LO
> (src.numCols ());
4842 TEUCHOS_TEST_FOR_EXCEPTION
4845 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4846 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4848 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4850 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4851 input_view_type src_orig (src_raw, src.stride (), numCols);
4852 input_view_type src_view =
4853 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4855 constexpr bool src_isConstantStride =
true;
4856 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4860 src_isConstantStride,
4861 getMultiVectorWhichVectors (dst),
4865 template <
class ST,
class LO,
class GO,
class NT>
4867 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4870 using ::Tpetra::Details::localDeepCopy;
4872 using IST =
typename MV::impl_scalar_type;
4873 using output_view_type =
4874 Kokkos::View<IST**, Kokkos::LayoutLeft,
4875 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4876 using pair_type = std::pair<LO, LO>;
4878 const LO numRows =
static_cast<LO
> (dst.numRows ());
4879 const LO numCols =
static_cast<LO
> (dst.numCols ());
4881 TEUCHOS_TEST_FOR_EXCEPTION
4884 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4885 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4887 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4889 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4890 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4892 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4894 constexpr bool dst_isConstantStride =
true;
4895 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4898 localDeepCopy (dst_view,
4900 dst_isConstantStride,
4903 getMultiVectorWhichVectors (src));
4907 template <
class Scalar,
class LO,
class GO,
class NT>
4908 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4913 return Teuchos::rcp (
new MV (map, numVectors));
4916 template <
class ST,
class LO,
class GO,
class NT>
4934#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4935# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4936 template class MultiVector< SCALAR , LO , GO , NODE >; \
4937 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4938 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4939 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4940 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4943# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4944 template class MultiVector< SCALAR , LO , GO , NODE >; \
4945 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4950#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4952 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4953 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void fill(const ExecutionSpace &execSpace, const ViewType &X, const ValueType &alpha, const IndexType numRows, const IndexType numCols)
Fill the entries of the given 1-D or 2-D Kokkos::View with the given scalar value alpha.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Stand-alone utility functions and macros.
static bool assumeMpiIsCudaAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
Base class for distributed Tpetra objects that support data redistribution.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
dual_view_type origView_
The "original view" of the MultiVector's data.
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual std::string description() const
A simple one-line description of this object.
void reduce()
Sum values of a locally replicated multivector across all processes.
void randomize()
Set all values in the multivector to pseudorandom numbers.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
size_t getNumVectors() const
Number of columns in the multivector.
dual_view_type owningView_
The true original DualView - it owns the memory of this MultiVector, and was not constructed as a sub...
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
EWhichNorm
Input argument for normImpl() (which see).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.