Tpetra parallel linear algebra Version of the Day
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42
44#include "Tpetra_BlockView.hpp"
45#include "Teuchos_OrdinalTraits.hpp"
46
47#ifdef TPETRA_ENABLE_DEPRECATED_CODE
48namespace { // anonymous
49
61 template<class MultiVectorType>
62 struct RawHostPtrFromMultiVector {
63 typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
64
65 static impl_scalar_type* getRawPtr (MultiVectorType& X) {
66 return nullptr;
67 //auto X_view_host = X.getLocalViewHost ();
68 //impl_scalar_type* X_raw = X_view_host.data ();
69 //return X_raw;
70 }
71 };
72
85 template<class S, class LO, class GO, class N>
87 getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
89 return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
90 }
91
92} // namespace (anonymous)
93#endif
94
95namespace Tpetra {
96
97template<class Scalar, class LO, class GO, class Node>
100getMultiVectorView () const
101{
102 return mv_;
103}
104
105template<class Scalar, class LO, class GO, class Node>
106Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
109{
111 const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
112 TEUCHOS_TEST_FOR_EXCEPTION(
113 src_bmv == nullptr, std::invalid_argument, "Tpetra::"
114 "BlockMultiVector: The source object of an Import or Export to a "
115 "BlockMultiVector, must also be a BlockMultiVector.");
116 return Teuchos::rcp (src_bmv, false); // nonowning RCP
117}
118
119template<class Scalar, class LO, class GO, class Node>
122 const Teuchos::DataAccess copyOrView) :
123 dist_object_type (in),
124 meshMap_ (in.meshMap_),
125 pointMap_ (in.pointMap_),
126 mv_ (in.mv_, copyOrView),
127#ifdef TPETRA_ENABLE_DEPRECATED_CODE
128 mvData_ (getRawHostPtrFromMultiVector (mv_)),
129#endif // TPETRA_ENABLE_DEPRECATED_CODE
130 blockSize_ (in.blockSize_)
131{}
132
133template<class Scalar, class LO, class GO, class Node>
135BlockMultiVector (const map_type& meshMap,
136 const LO blockSize,
137 const LO numVecs) :
138 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
139 meshMap_ (meshMap),
140 pointMap_ (makePointMap (meshMap, blockSize)),
141 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
142#ifdef TPETRA_ENABLE_DEPRECATED_CODE
143 mvData_ (getRawHostPtrFromMultiVector (mv_)),
144#endif // TPETRA_ENABLE_DEPRECATED_CODE
145 blockSize_ (blockSize)
146{}
147
148template<class Scalar, class LO, class GO, class Node>
150BlockMultiVector (const map_type& meshMap,
151 const map_type& pointMap,
152 const LO blockSize,
153 const LO numVecs) :
154 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
155 meshMap_ (meshMap),
156 pointMap_ (pointMap),
157 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
158#ifdef TPETRA_ENABLE_DEPRECATED_CODE
159 mvData_ (getRawHostPtrFromMultiVector (mv_)),
160#endif // TPETRA_ENABLE_DEPRECATED_CODE
161 blockSize_ (blockSize)
162{}
163
164template<class Scalar, class LO, class GO, class Node>
166BlockMultiVector (const mv_type& X_mv,
167 const map_type& meshMap,
168 const LO blockSize) :
169 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
170 meshMap_ (meshMap),
171#ifdef TPETRA_ENABLE_DEPRECATED_CODE
172 mvData_ (nullptr), // just for now
173#endif // TPETRA_ENABLE_DEPRECATED_CODE
174 blockSize_ (blockSize)
175{
176 using Teuchos::RCP;
177
178 if (X_mv.getCopyOrView () == Teuchos::View) {
179 // The input MultiVector has view semantics, so assignment just
180 // does a shallow copy.
181 mv_ = X_mv;
182 }
183 else if (X_mv.getCopyOrView () == Teuchos::Copy) {
184 // The input MultiVector has copy semantics. We can't change
185 // that, but we can make a view of the input MultiVector and
186 // change the view to have view semantics. (That sounds silly;
187 // shouldn't views always have view semantics? but remember that
188 // "view semantics" just governs the default behavior of the copy
189 // constructor and assignment operator.)
190 RCP<const mv_type> X_view_const;
191 const size_t numCols = X_mv.getNumVectors ();
192 if (numCols == 0) {
193 Teuchos::Array<size_t> cols (0); // view will have zero columns
194 X_view_const = X_mv.subView (cols ());
195 } else { // Range1D is an inclusive range
196 X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
197 }
198 TEUCHOS_TEST_FOR_EXCEPTION(
199 X_view_const.is_null (), std::logic_error, "Tpetra::"
200 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
201 "should never happen. Please report this bug to the Tpetra developers.");
202
203 // It's perfectly OK to cast away const here. Those view methods
204 // should be marked const anyway, because views can't change the
205 // allocation (just the entries themselves).
206 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
207 TEUCHOS_TEST_FOR_EXCEPTION(
208 X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
209 "BlockMultiVector constructor: We just set a MultiVector "
210 "to have view semantics, but it claims that it doesn't have view "
211 "semantics. This should never happen. "
212 "Please report this bug to the Tpetra developers.");
213 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
214 }
215
216 // At this point, mv_ has been assigned, so we can ignore X_mv.
217 Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
218 if (! pointMap.is_null ()) {
219 pointMap_ = *pointMap; // Map::operator= also does a shallow copy
220 }
221#ifdef TPETRA_ENABLE_DEPRECATED_CODE
222 mvData_ = getRawHostPtrFromMultiVector (mv_);
223#endif // TPETRA_ENABLE_DEPRECATED_CODE
224}
225
226template<class Scalar, class LO, class GO, class Node>
229 const map_type& newMeshMap,
230 const map_type& newPointMap,
231 const size_t offset) :
232 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
233 meshMap_ (newMeshMap),
234 pointMap_ (newPointMap),
235 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
236#ifdef TPETRA_ENABLE_DEPRECATED_CODE
237 mvData_ (getRawHostPtrFromMultiVector (mv_)),
238#endif // TPETRA_ENABLE_DEPRECATED_CODE
239 blockSize_ (X.getBlockSize ())
240{}
241
242template<class Scalar, class LO, class GO, class Node>
245 const map_type& newMeshMap,
246 const size_t offset) :
247 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
248 meshMap_ (newMeshMap),
249 pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
250 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
251#ifdef TPETRA_ENABLE_DEPRECATED_CODE
252 mvData_ (getRawHostPtrFromMultiVector (mv_)),
253#endif // TPETRA_ENABLE_DEPRECATED_CODE
254 blockSize_ (X.getBlockSize ())
255{}
256
257template<class Scalar, class LO, class GO, class Node>
260 dist_object_type (Teuchos::null),
261#ifdef TPETRA_ENABLE_DEPRECATED_CODE
262 mvData_ (nullptr),
263#endif // TPETRA_ENABLE_DEPRECATED_CODE
264 blockSize_ (0)
265{}
266
267template<class Scalar, class LO, class GO, class Node>
270makePointMap (const map_type& meshMap, const LO blockSize)
271{
272 typedef Tpetra::global_size_t GST;
273 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
274
275 const GST gblNumMeshMapInds =
276 static_cast<GST> (meshMap.getGlobalNumElements ());
277 const size_t lclNumMeshMapIndices =
278 static_cast<size_t> (meshMap.getNodeNumElements ());
279 const GST gblNumPointMapInds =
280 gblNumMeshMapInds * static_cast<GST> (blockSize);
281 const size_t lclNumPointMapInds =
282 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
283 const GO indexBase = meshMap.getIndexBase ();
284
285 if (meshMap.isContiguous ()) {
286 return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
287 meshMap.getComm ());
288 }
289 else {
290 // "Hilbert's Hotel" trick: multiply each process' GIDs by
291 // blockSize, and fill in. That ensures correctness even if the
292 // mesh Map is overlapping.
293 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
294 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
295 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
296 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
297 const GO meshGid = lclMeshGblInds[g];
298 const GO pointGidStart = indexBase +
299 (meshGid - indexBase) * static_cast<GO> (blockSize);
300 const size_type offset = g * static_cast<size_type> (blockSize);
301 for (LO k = 0; k < blockSize; ++k) {
302 const GO pointGid = pointGidStart + static_cast<GO> (k);
303 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
304 }
305 }
306 return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
307 meshMap.getComm ());
308 }
309}
310
311
312template<class Scalar, class LO, class GO, class Node>
313void
315replaceLocalValuesImpl (const LO localRowIndex,
316 const LO colIndex,
317 const Scalar vals[])
318{
319 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
320 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
321 getBlockSize ());
322 Kokkos::deep_copy (X_dst, X_src);
323}
324
325
326template<class Scalar, class LO, class GO, class Node>
327bool
329replaceLocalValues (const LO localRowIndex,
330 const LO colIndex,
331 const Scalar vals[])
332{
333 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
334 return false;
335 } else {
336 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
337 return true;
338 }
339}
340
341template<class Scalar, class LO, class GO, class Node>
342bool
344replaceGlobalValues (const GO globalRowIndex,
345 const LO colIndex,
346 const Scalar vals[])
347{
348 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
349 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
350 return false;
351 } else {
352 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
353 return true;
354 }
355}
356
357template<class Scalar, class LO, class GO, class Node>
358void
360sumIntoLocalValuesImpl (const LO localRowIndex,
361 const LO colIndex,
362 const Scalar vals[])
363{
364 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
365 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
366 getBlockSize ());
367 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
368}
369
370template<class Scalar, class LO, class GO, class Node>
371bool
373sumIntoLocalValues (const LO localRowIndex,
374 const LO colIndex,
375 const Scalar vals[])
376{
377 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
378 return false;
379 } else {
380 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
381 return true;
382 }
383}
384
385template<class Scalar, class LO, class GO, class Node>
386bool
388sumIntoGlobalValues (const GO globalRowIndex,
389 const LO colIndex,
390 const Scalar vals[])
391{
392 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
393 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
394 return false;
395 } else {
396 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
397 return true;
398 }
399}
400
401#ifdef TPETRA_ENABLE_DEPRECATED_CODE
402
403template<class Scalar, class LO, class GO, class Node>
404bool
405// TPETRA_DEPRECATED
407getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals)
408{
409 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
410 return false;
411 } else {
412 auto X_ij = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
413 vals = reinterpret_cast<Scalar*> (X_ij.data ());
414 return true;
415 }
416}
417
418template<class Scalar, class LO, class GO, class Node>
419bool
420// TPETRA_DEPRECATED
421BlockMultiVector<Scalar, LO, GO, Node>::
422getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals)
423{
424 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
425 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
426 return false;
427 } else {
428 auto X_ij = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
429 vals = reinterpret_cast<Scalar*> (X_ij.data ());
430 return true;
431 }
432}
433
434template<class Scalar, class LO, class GO, class Node>
435typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
436// TPETRA_DEPRECATED
437BlockMultiVector<Scalar, LO, GO, Node>::
438getLocalBlock (const LO localRowIndex,
439 const LO colIndex)
440{
441 if (! isValidLocalMeshIndex (localRowIndex)) {
442 return little_host_vec_type ();
443 } else {
444 const size_t blockSize = getBlockSize ();
445 const size_t offset = colIndex * this->getStrideY () +
446 localRowIndex * blockSize;
447 impl_scalar_type* blockRaw = this->getRawPtr () + offset;
448 return little_host_vec_type (blockRaw, blockSize);
449 }
450}
451
452#endif // TPETRA_ENABLE_DEPRECATED_CODE
453
454template<class Scalar, class LO, class GO, class Node>
455typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
456BlockMultiVector<Scalar, LO, GO, Node>::
457getLocalBlockHost (const LO localRowIndex,
458 const LO colIndex,
459 const Access::ReadOnlyStruct) const
460{
461 if (!isValidLocalMeshIndex(localRowIndex)) {
462 return const_little_host_vec_type();
463 } else {
464 const size_t blockSize = getBlockSize();
465 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
466 LO startRow = localRowIndex*blockSize;
467 LO endRow = startRow + blockSize;
468 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
469 colIndex);
470 }
471}
472
473template<class Scalar, class LO, class GO, class Node>
474typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
476getLocalBlockHost (const LO localRowIndex,
477 const LO colIndex,
478 const Access::OverwriteAllStruct)
479{
480 if (!isValidLocalMeshIndex(localRowIndex)) {
481 return little_host_vec_type();
482 } else {
483 const size_t blockSize = getBlockSize();
484 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
485 LO startRow = localRowIndex*blockSize;
486 LO endRow = startRow + blockSize;
487 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
488 colIndex);
489 }
490}
491
492template<class Scalar, class LO, class GO, class Node>
493typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
495getLocalBlockHost (const LO localRowIndex,
496 const LO colIndex,
497 const Access::ReadWriteStruct)
498{
499 if (!isValidLocalMeshIndex(localRowIndex)) {
500 return little_host_vec_type();
501 } else {
502 const size_t blockSize = getBlockSize();
503 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
504 LO startRow = localRowIndex*blockSize;
505 LO endRow = startRow + blockSize;
506 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
507 colIndex);
508 }
509}
510
511template<class Scalar, class LO, class GO, class Node>
512Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
513BlockMultiVector<Scalar, LO, GO, Node>::
514getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
515{
516 using Teuchos::rcp;
517 using Teuchos::rcpFromRef;
518
519 // The source object of an Import or Export must be a
520 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
521 // them in that order; one must succeed. Note that the target of
522 // the Import or Export calls checkSizes in DistObject's doTransfer.
523 typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
524 const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
525 if (srcBlkVec == nullptr) {
526 const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
527 if (srcMultiVec == nullptr) {
528 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
529 // currently does a shallow copy by default. This is why we
530 // return by RCP, rather than by value.
531 return rcp (new mv_type ());
532 } else { // src is a MultiVector
533 return rcp (srcMultiVec, false); // nonowning RCP
534 }
535 } else { // src is a BlockMultiVector
536 return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
537 }
538}
539
540template<class Scalar, class LO, class GO, class Node>
543{
544 return ! getMultiVectorFromSrcDistObject (src).is_null ();
545}
546
547template<class Scalar, class LO, class GO, class Node>
550(const SrcDistObject& src,
551 const size_t numSameIDs,
552 const Kokkos::DualView<const local_ordinal_type*,
553 buffer_device_type>& permuteToLIDs,
554 const Kokkos::DualView<const local_ordinal_type*,
555 buffer_device_type>& permuteFromLIDs,
556 const CombineMode CM)
557{
558 TEUCHOS_TEST_FOR_EXCEPTION
559 (true, std::logic_error,
560 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
561 "instead, create a point importer using makePointMap function.");
562}
563
564template<class Scalar, class LO, class GO, class Node>
565void BlockMultiVector<Scalar, LO, GO, Node>::
566packAndPrepare
567(const SrcDistObject& src,
568 const Kokkos::DualView<const local_ordinal_type*,
569 buffer_device_type>& exportLIDs,
570 Kokkos::DualView<packet_type*,
571 buffer_device_type>& exports,
572 Kokkos::DualView<size_t*,
573 buffer_device_type> numPacketsPerLID,
574 size_t& constantNumPackets)
575{
576 TEUCHOS_TEST_FOR_EXCEPTION
577 (true, std::logic_error,
578 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
579 "instead, create a point importer using makePointMap function.");
580}
581
582template<class Scalar, class LO, class GO, class Node>
583void BlockMultiVector<Scalar, LO, GO, Node>::
584unpackAndCombine
585(const Kokkos::DualView<const local_ordinal_type*,
586 buffer_device_type>& importLIDs,
587 Kokkos::DualView<packet_type*,
588 buffer_device_type> imports,
589 Kokkos::DualView<size_t*,
590 buffer_device_type> numPacketsPerLID,
591 const size_t constantNumPackets,
592 const CombineMode combineMode)
593{
594 TEUCHOS_TEST_FOR_EXCEPTION
595 (true, std::logic_error,
596 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
597 "instead, create a point importer using makePointMap function.");
598}
599
600template<class Scalar, class LO, class GO, class Node>
602isValidLocalMeshIndex (const LO meshLocalIndex) const
603{
604 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
605 meshMap_.isNodeLocalElement (meshLocalIndex);
606}
607
608template<class Scalar, class LO, class GO, class Node>
610putScalar (const Scalar& val)
611{
612 mv_.putScalar (val);
613}
614
615template<class Scalar, class LO, class GO, class Node>
617scale (const Scalar& val)
618{
619 mv_.scale (val);
620}
621
622template<class Scalar, class LO, class GO, class Node>
624update (const Scalar& alpha,
626 const Scalar& beta)
627{
628 mv_.update (alpha, X.mv_, beta);
629}
630
631namespace Impl {
632// Y := alpha * D * X
633template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
634struct BlockWiseMultiply {
635 typedef typename ViewD::size_type Size;
636
637private:
638 typedef typename ViewD::device_type Device;
639 typedef typename ViewD::non_const_value_type ImplScalar;
640 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
641
642 template <typename View>
643 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
644 typename View::device_type, Unmanaged>;
645 typedef UnmanagedView<ViewY> UnMViewY;
646 typedef UnmanagedView<ViewD> UnMViewD;
647 typedef UnmanagedView<ViewX> UnMViewX;
648
649 const Size block_size_;
650 Scalar alpha_;
651 UnMViewY Y_;
652 UnMViewD D_;
653 UnMViewX X_;
654
655public:
656 BlockWiseMultiply (const Size block_size, const Scalar& alpha,
657 const ViewY& Y, const ViewD& D, const ViewX& X)
658 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
659 {}
660
661 KOKKOS_INLINE_FUNCTION
662 void operator() (const Size k) const {
663 const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
664 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
665 const auto num_vecs = X_.extent(1);
666 for (Size i = 0; i < num_vecs; ++i) {
667 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
668 auto X_curBlk = Kokkos::subview(X_, kslice, i);
669 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
670 // Y_curBlk := alpha * D_curBlk * X_curBlk.
671 // Recall that GEMV does an update (+=) of the last argument.
672 Tpetra::FILL(Y_curBlk, zero);
673 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
674 }
675 }
676};
677
678template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
679inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
680createBlockWiseMultiply (const int block_size, const Scalar& alpha,
681 const ViewY& Y, const ViewD& D, const ViewX& X) {
682 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
683}
684
685template <typename ViewY,
686 typename Scalar,
687 typename ViewD,
688 typename ViewZ,
689 typename LO = typename ViewY::size_type>
690class BlockJacobiUpdate {
691private:
692 typedef typename ViewD::device_type Device;
693 typedef typename ViewD::non_const_value_type ImplScalar;
694 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
695
696 template <typename ViewType>
697 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
698 typename ViewType::array_layout,
699 typename ViewType::device_type,
700 Unmanaged>;
701 typedef UnmanagedView<ViewY> UnMViewY;
702 typedef UnmanagedView<ViewD> UnMViewD;
703 typedef UnmanagedView<ViewZ> UnMViewZ;
704
705 const LO blockSize_;
706 UnMViewY Y_;
707 const Scalar alpha_;
708 UnMViewD D_;
709 UnMViewZ Z_;
710 const Scalar beta_;
711
712public:
713 BlockJacobiUpdate (const ViewY& Y,
714 const Scalar& alpha,
715 const ViewD& D,
716 const ViewZ& Z,
717 const Scalar& beta) :
718 blockSize_ (D.extent (1)),
719 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
720 Y_ (Y),
721 alpha_ (alpha),
722 D_ (D),
723 Z_ (Z),
724 beta_ (beta)
725 {
726 static_assert (static_cast<int> (ViewY::rank) == 1,
727 "Y must have rank 1.");
728 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
729 static_assert (static_cast<int> (ViewZ::rank) == 1,
730 "Z must have rank 1.");
731 // static_assert (static_cast<int> (ViewZ::rank) ==
732 // static_cast<int> (ViewY::rank),
733 // "Z must have the same rank as Y.");
734 }
735
736 KOKKOS_INLINE_FUNCTION void
737 operator() (const LO& k) const
738 {
739 using Kokkos::ALL;
740 using Kokkos::subview;
741 typedef Kokkos::pair<LO, LO> range_type;
742 typedef Kokkos::Details::ArithTraits<Scalar> KAT;
743
744 // We only have to implement the alpha != 0 case.
745
746 auto D_curBlk = subview (D_, k, ALL (), ALL ());
747 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
748
749 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
750
751 auto Z_curBlk = subview (Z_, kslice);
752 auto Y_curBlk = subview (Y_, kslice);
753 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
754 if (beta_ == KAT::zero ()) {
755 Tpetra::FILL (Y_curBlk, KAT::zero ());
756 }
757 else if (beta_ != KAT::one ()) {
758 Tpetra::SCAL (beta_, Y_curBlk);
759 }
760 Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
761 }
762};
763
764template<class ViewY,
765 class Scalar,
766 class ViewD,
767 class ViewZ,
768 class LO = typename ViewD::size_type>
769void
770blockJacobiUpdate (const ViewY& Y,
771 const Scalar& alpha,
772 const ViewD& D,
773 const ViewZ& Z,
774 const Scalar& beta)
775{
776 static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
777 static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
778 static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
779 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
780 "Y and Z must have the same rank.");
781 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
782
783 const auto lclNumMeshRows = D.extent (0);
784
785#ifdef HAVE_TPETRA_DEBUG
786 // D.extent(0) is the (local) number of mesh rows.
787 // D.extent(1) is the block size. Thus, their product should be
788 // the local number of point rows, that is, the number of rows in Y.
789 const auto blkSize = D.extent (1);
790 const auto lclNumPtRows = lclNumMeshRows * blkSize;
791 TEUCHOS_TEST_FOR_EXCEPTION
792 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
793 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
794 "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
795 << " = " << lclNumPtRows << ".");
796 TEUCHOS_TEST_FOR_EXCEPTION
797 (Y.extent (0) != Z.extent (0), std::invalid_argument,
798 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
799 "Z.extent(0) = " << Z.extent (0) << ".");
800 TEUCHOS_TEST_FOR_EXCEPTION
801 (Y.extent (1) != Z.extent (1), std::invalid_argument,
802 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
803 "Z.extent(1) = " << Z.extent (1) << ".");
804#endif // HAVE_TPETRA_DEBUG
805
806 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
807 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
808 // lclNumMeshRows must fit in LO, else the Map would not be correct.
809 range_type range (0, static_cast<LO> (lclNumMeshRows));
810 Kokkos::parallel_for (range, functor);
811}
812
813} // namespace Impl
814
815template<class Scalar, class LO, class GO, class Node>
817blockWiseMultiply (const Scalar& alpha,
818 const Kokkos::View<const impl_scalar_type***,
819 device_type, Kokkos::MemoryUnmanaged>& D,
821{
822 using Kokkos::ALL;
823 typedef typename device_type::execution_space execution_space;
824 const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
825
826 if (alpha == STS::zero ()) {
827 this->putScalar (STS::zero ());
828 }
829 else { // alpha != 0
830 const LO blockSize = this->getBlockSize ();
831 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
832 auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
833 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
834 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
835
836 // Use an explicit RangePolicy with the desired execution space.
837 // Otherwise, if you just use a number, it runs on the default
838 // execution space. For example, if the default execution space
839 // is Cuda but the current execution space is Serial, using just a
840 // number would incorrectly run with Cuda.
841 Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
842 Kokkos::parallel_for (range, bwm);
843 }
844}
845
846template<class Scalar, class LO, class GO, class Node>
848blockJacobiUpdate (const Scalar& alpha,
849 const Kokkos::View<const impl_scalar_type***,
850 device_type, Kokkos::MemoryUnmanaged>& D,
853 const Scalar& beta)
854{
855 using Kokkos::ALL;
856 using Kokkos::subview;
857 typedef impl_scalar_type IST;
858
859 const IST alphaImpl = static_cast<IST> (alpha);
860 const IST betaImpl = static_cast<IST> (beta);
861 const LO numVecs = mv_.getNumVectors ();
862
863 if (alpha == STS::zero ()) { // Y := beta * Y
864 this->scale (beta);
865 }
866 else { // alpha != 0
867 Z.update (STS::one (), X, -STS::one ());
868 for (LO j = 0; j < numVecs; ++j) {
869 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
870 auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
871 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
872 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
873 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
874 }
875 }
876}
877
878} // namespace Tpetra
879
880//
881// Explicit instantiation macro
882//
883// Must be expanded from within the Tpetra namespace!
884//
885#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
886 template class BlockMultiVector< S, LO, GO, NODE >;
887
888#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
typename device_type::execution_space execution_space
The Kokkos execution space.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
One or more distributed dense vectors.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
CombineMode
Rule for combining data in an Import or Export.