Tpetra parallel linear algebra Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44// mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45// incorrect (as() is not a device function) and usually irrelevant
46// (it would only matter if LocalOrdinal were bigger than size_t on a
47// particular platform, which is unlikely).
48
49// KDD Aug 2020: In the permute/pack/unpack functors,
50// the Enabled template parameter is specialized in
51// downstream packages like Stokhos using SFINAE to provide partial
52// specializations based on the scalar type of the SrcView and DstView
53// template parameters. See #7898.
54// Do not remove it before checking with Stokhos and other specializing users.
55
56#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
57#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
58
59#include "Kokkos_Core.hpp"
60#include "Kokkos_ArithTraits.hpp"
61#include "impl/Kokkos_Atomic_Generic.hpp"
62#include <sstream>
63#include <stdexcept>
64
65namespace Tpetra {
66namespace KokkosRefactor {
67namespace Details {
68
74namespace Impl {
75
82template<class IntegerType,
83 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
85 static KOKKOS_INLINE_FUNCTION bool
86 test (const IntegerType x,
87 const IntegerType exclusiveUpperBound);
88};
89
90// Partial specialization for the case where IntegerType IS signed.
91template<class IntegerType>
92struct OutOfBounds<IntegerType, true> {
93 static KOKKOS_INLINE_FUNCTION bool
94 test (const IntegerType x,
95 const IntegerType exclusiveUpperBound)
96 {
97 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
98 }
99};
100
101// Partial specialization for the case where IntegerType is NOT signed.
102template<class IntegerType>
103struct OutOfBounds<IntegerType, false> {
104 static KOKKOS_INLINE_FUNCTION bool
105 test (const IntegerType x,
106 const IntegerType exclusiveUpperBound)
107 {
108 return x >= exclusiveUpperBound;
109 }
110};
111
114template<class IntegerType>
115KOKKOS_INLINE_FUNCTION bool
116outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
117{
118 return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
119}
120
121} // namespace Impl
122
123 // Functors for implementing packAndPrepare and unpackAndCombine
124 // through parallel_for
125
126 template <typename DstView, typename SrcView, typename IdxView,
127 typename Enabled = void>
128 struct PackArraySingleColumn {
129 typedef typename DstView::execution_space execution_space;
130 typedef typename execution_space::size_type size_type;
131
132 DstView dst;
133 SrcView src;
134 IdxView idx;
135 size_t col;
136
137 PackArraySingleColumn (const DstView& dst_,
138 const SrcView& src_,
139 const IdxView& idx_,
140 const size_t col_) :
141 dst(dst_), src(src_), idx(idx_), col(col_) {}
142
143 KOKKOS_INLINE_FUNCTION void
144 operator() (const size_type k) const {
145 dst(k) = src(idx(k), col);
146 }
147
148 static void
149 pack (const DstView& dst,
150 const SrcView& src,
151 const IdxView& idx,
152 const size_t col)
153 {
154 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
155 Kokkos::parallel_for
156 ("Tpetra::MultiVector pack one col",
157 range_type (0, idx.size ()),
158 PackArraySingleColumn (dst, src, idx, col));
159 }
160 };
161
162 template <typename DstView,
163 typename SrcView,
164 typename IdxView,
165 typename SizeType = typename DstView::execution_space::size_type,
166 typename Enabled = void>
167 class PackArraySingleColumnWithBoundsCheck {
168 private:
169 static_assert (Kokkos::Impl::is_view<DstView>::value,
170 "DstView must be a Kokkos::View.");
171 static_assert (Kokkos::Impl::is_view<SrcView>::value,
172 "SrcView must be a Kokkos::View.");
173 static_assert (Kokkos::Impl::is_view<IdxView>::value,
174 "IdxView must be a Kokkos::View.");
175 static_assert (static_cast<int> (DstView::rank) == 1,
176 "DstView must be a rank-1 Kokkos::View.");
177 static_assert (static_cast<int> (SrcView::rank) == 2,
178 "SrcView must be a rank-2 Kokkos::View.");
179 static_assert (static_cast<int> (IdxView::rank) == 1,
180 "IdxView must be a rank-1 Kokkos::View.");
181 static_assert (std::is_integral<SizeType>::value,
182 "SizeType must be a built-in integer type.");
183 public:
184 typedef SizeType size_type;
185 using value_type = size_t;
186
187 private:
188 DstView dst;
189 SrcView src;
190 IdxView idx;
191 size_type col;
192
193 public:
194 PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
195 const SrcView& src_,
196 const IdxView& idx_,
197 const size_type col_) :
198 dst (dst_), src (src_), idx (idx_), col (col_) {}
199
200 KOKKOS_INLINE_FUNCTION void
201 operator() (const size_type k, value_type& lclErrCount) const {
202 using index_type = typename IdxView::non_const_value_type;
203
204 const index_type lclRow = idx(k);
205 if (lclRow < static_cast<index_type> (0) ||
206 lclRow >= static_cast<index_type> (src.extent (0))) {
207 ++lclErrCount;
208 }
209 else {
210 dst(k) = src(lclRow, col);
211 }
212 }
213
214 KOKKOS_INLINE_FUNCTION
215 void init (value_type& initialErrorCount) const {
216 initialErrorCount = 0;
217 }
218
219 KOKKOS_INLINE_FUNCTION void
220 join (volatile value_type& dstErrorCount,
221 const volatile value_type& srcErrorCount) const
222 {
223 dstErrorCount += srcErrorCount;
224 }
225
226 static void
227 pack (const DstView& dst,
228 const SrcView& src,
229 const IdxView& idx,
230 const size_type col)
231 {
232 typedef typename DstView::execution_space execution_space;
233 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
234 typedef typename IdxView::non_const_value_type index_type;
235
236 size_t errorCount = 0;
237 Kokkos::parallel_reduce
238 ("Tpetra::MultiVector pack one col debug only",
239 range_type (0, idx.size ()),
240 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
241 errorCount);
242
243 if (errorCount != 0) {
244 // Go back and find the out-of-bounds entries in the index
245 // array. Performance doesn't matter since we are already in
246 // an error state, so we can do this sequentially, on host.
247 auto idx_h = Kokkos::create_mirror_view (idx);
248 Kokkos::deep_copy (idx_h, idx);
249
250 std::vector<index_type> badIndices;
251 const size_type numInds = idx_h.extent (0);
252 for (size_type k = 0; k < numInds; ++k) {
253 if (idx_h(k) < static_cast<index_type> (0) ||
254 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
255 badIndices.push_back (idx_h(k));
256 }
257 }
258
259 TEUCHOS_TEST_FOR_EXCEPTION
260 (errorCount != badIndices.size (), std::logic_error,
261 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
262 << " != badIndices.size() = " << badIndices.size () << ". This sho"
263 "uld never happen. Please report this to the Tpetra developers.");
264
265 std::ostringstream os;
266 os << "MultiVector single-column pack kernel had "
267 << badIndices.size () << " out-of bounds index/ices. "
268 "Here they are: [";
269 for (size_t k = 0; k < badIndices.size (); ++k) {
270 os << badIndices[k];
271 if (k + 1 < badIndices.size ()) {
272 os << ", ";
273 }
274 }
275 os << "].";
276 throw std::runtime_error (os.str ());
277 }
278 }
279 };
280
281
282 template <typename DstView, typename SrcView, typename IdxView>
283 void
284 pack_array_single_column (const DstView& dst,
285 const SrcView& src,
286 const IdxView& idx,
287 const size_t col,
288 const bool debug = true)
289 {
290 static_assert (Kokkos::Impl::is_view<DstView>::value,
291 "DstView must be a Kokkos::View.");
292 static_assert (Kokkos::Impl::is_view<SrcView>::value,
293 "SrcView must be a Kokkos::View.");
294 static_assert (Kokkos::Impl::is_view<IdxView>::value,
295 "IdxView must be a Kokkos::View.");
296 static_assert (static_cast<int> (DstView::rank) == 1,
297 "DstView must be a rank-1 Kokkos::View.");
298 static_assert (static_cast<int> (SrcView::rank) == 2,
299 "SrcView must be a rank-2 Kokkos::View.");
300 static_assert (static_cast<int> (IdxView::rank) == 1,
301 "IdxView must be a rank-1 Kokkos::View.");
302
303 if (debug) {
304 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
305 impl_type::pack (dst, src, idx, col);
306 }
307 else {
308 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
309 impl_type::pack (dst, src, idx, col);
310 }
311 }
312
313 template <typename DstView, typename SrcView, typename IdxView,
314 typename Enabled = void>
315 struct PackArrayMultiColumn {
316 typedef typename DstView::execution_space execution_space;
317 typedef typename execution_space::size_type size_type;
318
319 DstView dst;
320 SrcView src;
321 IdxView idx;
322 size_t numCols;
323
324 PackArrayMultiColumn (const DstView& dst_,
325 const SrcView& src_,
326 const IdxView& idx_,
327 const size_t numCols_) :
328 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
329
330 KOKKOS_INLINE_FUNCTION void
331 operator() (const size_type k) const {
332 const typename IdxView::value_type localRow = idx(k);
333 const size_t offset = k*numCols;
334 for (size_t j = 0; j < numCols; ++j) {
335 dst(offset + j) = src(localRow, j);
336 }
337 }
338
339 static void pack(const DstView& dst,
340 const SrcView& src,
341 const IdxView& idx,
342 size_t numCols) {
343 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
344 Kokkos::parallel_for
345 ("Tpetra::MultiVector pack multicol const stride",
346 range_type (0, idx.size ()),
347 PackArrayMultiColumn (dst, src, idx, numCols));
348 }
349 };
350
351 template <typename DstView,
352 typename SrcView,
353 typename IdxView,
354 typename SizeType = typename DstView::execution_space::size_type,
355 typename Enabled = void>
356 class PackArrayMultiColumnWithBoundsCheck {
357 public:
358 using size_type = SizeType;
359 using value_type = size_t;
360
361 private:
362 DstView dst;
363 SrcView src;
364 IdxView idx;
365 size_type numCols;
366
367 public:
368 PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
369 const SrcView& src_,
370 const IdxView& idx_,
371 const size_type numCols_) :
372 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
373
374 KOKKOS_INLINE_FUNCTION void
375 operator() (const size_type k, value_type& lclErrorCount) const {
376 typedef typename IdxView::non_const_value_type index_type;
377
378 const index_type lclRow = idx(k);
379 if (lclRow < static_cast<index_type> (0) ||
380 lclRow >= static_cast<index_type> (src.extent (0))) {
381 ++lclErrorCount; // failed
382 }
383 else {
384 const size_type offset = k*numCols;
385 for (size_type j = 0; j < numCols; ++j) {
386 dst(offset + j) = src(lclRow, j);
387 }
388 }
389 }
390
391 KOKKOS_INLINE_FUNCTION
392 void init (value_type& initialErrorCount) const {
393 initialErrorCount = 0;
394 }
395
396 KOKKOS_INLINE_FUNCTION void
397 join (volatile value_type& dstErrorCount,
398 const volatile value_type& srcErrorCount) const
399 {
400 dstErrorCount += srcErrorCount;
401 }
402
403 static void
404 pack (const DstView& dst,
405 const SrcView& src,
406 const IdxView& idx,
407 const size_type numCols)
408 {
409 typedef typename DstView::execution_space execution_space;
410 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
411 typedef typename IdxView::non_const_value_type index_type;
412
413 size_t errorCount = 0;
414 Kokkos::parallel_reduce
415 ("Tpetra::MultiVector pack multicol const stride debug only",
416 range_type (0, idx.size ()),
417 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
418 errorCount);
419 if (errorCount != 0) {
420 // Go back and find the out-of-bounds entries in the index
421 // array. Performance doesn't matter since we are already in
422 // an error state, so we can do this sequentially, on host.
423 auto idx_h = Kokkos::create_mirror_view (idx);
424 Kokkos::deep_copy (idx_h, idx);
425
426 std::vector<index_type> badIndices;
427 const size_type numInds = idx_h.extent (0);
428 for (size_type k = 0; k < numInds; ++k) {
429 if (idx_h(k) < static_cast<index_type> (0) ||
430 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
431 badIndices.push_back (idx_h(k));
432 }
433 }
434
435 TEUCHOS_TEST_FOR_EXCEPTION
436 (errorCount != badIndices.size (), std::logic_error,
437 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
438 << " != badIndices.size() = " << badIndices.size () << ". This sho"
439 "uld never happen. Please report this to the Tpetra developers.");
440
441 std::ostringstream os;
442 os << "Tpetra::MultiVector multiple-column pack kernel had "
443 << badIndices.size () << " out-of bounds index/ices (errorCount = "
444 << errorCount << "): [";
445 for (size_t k = 0; k < badIndices.size (); ++k) {
446 os << badIndices[k];
447 if (k + 1 < badIndices.size ()) {
448 os << ", ";
449 }
450 }
451 os << "].";
452 throw std::runtime_error (os.str ());
453 }
454 }
455 };
456
457
458 template <typename DstView,
459 typename SrcView,
460 typename IdxView>
461 void
462 pack_array_multi_column (const DstView& dst,
463 const SrcView& src,
464 const IdxView& idx,
465 const size_t numCols,
466 const bool debug = true)
467 {
468 static_assert (Kokkos::Impl::is_view<DstView>::value,
469 "DstView must be a Kokkos::View.");
470 static_assert (Kokkos::Impl::is_view<SrcView>::value,
471 "SrcView must be a Kokkos::View.");
472 static_assert (Kokkos::Impl::is_view<IdxView>::value,
473 "IdxView must be a Kokkos::View.");
474 static_assert (static_cast<int> (DstView::rank) == 1,
475 "DstView must be a rank-1 Kokkos::View.");
476 static_assert (static_cast<int> (SrcView::rank) == 2,
477 "SrcView must be a rank-2 Kokkos::View.");
478 static_assert (static_cast<int> (IdxView::rank) == 1,
479 "IdxView must be a rank-1 Kokkos::View.");
480
481 if (debug) {
482 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
483 SrcView, IdxView> impl_type;
484 impl_type::pack (dst, src, idx, numCols);
485 }
486 else {
487 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
488 impl_type::pack (dst, src, idx, numCols);
489 }
490 }
491
492 template <typename DstView, typename SrcView, typename IdxView,
493 typename ColView, typename Enabled = void>
494 struct PackArrayMultiColumnVariableStride {
495 typedef typename DstView::execution_space execution_space;
496 typedef typename execution_space::size_type size_type;
497
498 DstView dst;
499 SrcView src;
500 IdxView idx;
501 ColView col;
502 size_t numCols;
503
504 PackArrayMultiColumnVariableStride (const DstView& dst_,
505 const SrcView& src_,
506 const IdxView& idx_,
507 const ColView& col_,
508 const size_t numCols_) :
509 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
510
511 KOKKOS_INLINE_FUNCTION
512 void operator() (const size_type k) const {
513 const typename IdxView::value_type localRow = idx(k);
514 const size_t offset = k*numCols;
515 for (size_t j = 0; j < numCols; ++j) {
516 dst(offset + j) = src(localRow, col(j));
517 }
518 }
519
520 static void pack(const DstView& dst,
521 const SrcView& src,
522 const IdxView& idx,
523 const ColView& col,
524 size_t numCols) {
525 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
526 Kokkos::parallel_for
527 ("Tpetra::MultiVector pack multicol var stride",
528 range_type (0, idx.size ()),
529 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
530 }
531 };
532
533 template <typename DstView,
534 typename SrcView,
535 typename IdxView,
536 typename ColView,
537 typename SizeType = typename DstView::execution_space::size_type,
538 typename Enabled = void>
539 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
540 public:
541 using size_type = SizeType;
542 using value_type = size_t;
543
544 private:
545 DstView dst;
546 SrcView src;
547 IdxView idx;
548 ColView col;
549 size_type numCols;
550
551 public:
552 PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
553 const SrcView& src_,
554 const IdxView& idx_,
555 const ColView& col_,
556 const size_type numCols_) :
557 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
558
559 KOKKOS_INLINE_FUNCTION void
560 operator() (const size_type k, value_type& lclErrorCount) const {
561 typedef typename IdxView::non_const_value_type row_index_type;
562 typedef typename ColView::non_const_value_type col_index_type;
563
564 const row_index_type lclRow = idx(k);
565 if (lclRow < static_cast<row_index_type> (0) ||
566 lclRow >= static_cast<row_index_type> (src.extent (0))) {
567 ++lclErrorCount = 0;
568 }
569 else {
570 const size_type offset = k*numCols;
571 for (size_type j = 0; j < numCols; ++j) {
572 const col_index_type lclCol = col(j);
573 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
574 ++lclErrorCount = 0;
575 }
576 else { // all indices are valid; do the assignment
577 dst(offset + j) = src(lclRow, lclCol);
578 }
579 }
580 }
581 }
582
583 KOKKOS_INLINE_FUNCTION void
584 init (value_type& initialErrorCount) const {
585 initialErrorCount = 0;
586 }
587
588 KOKKOS_INLINE_FUNCTION void
589 join (volatile value_type& dstErrorCount,
590 const volatile value_type& srcErrorCount) const
591 {
592 dstErrorCount += srcErrorCount;
593 }
594
595 static void
596 pack (const DstView& dst,
597 const SrcView& src,
598 const IdxView& idx,
599 const ColView& col,
600 const size_type numCols)
601 {
602 using execution_space = typename DstView::execution_space;
603 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
604 using row_index_type = typename IdxView::non_const_value_type;
605 using col_index_type = typename ColView::non_const_value_type;
606
607 size_t errorCount = 0;
608 Kokkos::parallel_reduce
609 ("Tpetra::MultiVector pack multicol var stride debug only",
610 range_type (0, idx.size ()),
611 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
612 col, numCols),
613 errorCount);
614 if (errorCount != 0) {
615 constexpr size_t maxNumBadIndicesToPrint = 100;
616
617 std::ostringstream os; // for error reporting
618 os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
619 "found " << errorCount
620 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
621
622 // Go back and find any out-of-bounds entries in the array of
623 // row indices. Performance doesn't matter since we are already
624 // in an error state, so we can do this sequentially, on host.
625 auto idx_h = Kokkos::create_mirror_view (idx);
626 Kokkos::deep_copy (idx_h, idx);
627
628 std::vector<row_index_type> badRows;
629 const size_type numRowInds = idx_h.extent (0);
630 for (size_type k = 0; k < numRowInds; ++k) {
631 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
632 badRows.push_back (idx_h(k));
633 }
634 }
635
636 if (badRows.size () != 0) {
637 os << badRows.size () << " out-of-bounds row ind"
638 << (badRows.size () != size_t (1) ? "ices" : "ex");
639 if (badRows.size () <= maxNumBadIndicesToPrint) {
640 os << ": [";
641 for (size_t k = 0; k < badRows.size (); ++k) {
642 os << badRows[k];
643 if (k + 1 < badRows.size ()) {
644 os << ", ";
645 }
646 }
647 os << "]. ";
648 }
649 else {
650 os << ". ";
651 }
652 }
653 else {
654 os << "No out-of-bounds row indices. ";
655 }
656
657 // Go back and find any out-of-bounds entries in the array
658 // of column indices.
659 auto col_h = Kokkos::create_mirror_view (col);
660 Kokkos::deep_copy (col_h, col);
661
662 std::vector<col_index_type> badCols;
663 const size_type numColInds = col_h.extent (0);
664 for (size_type k = 0; k < numColInds; ++k) {
665 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
666 badCols.push_back (col_h(k));
667 }
668 }
669
670 if (badCols.size () != 0) {
671 os << badCols.size () << " out-of-bounds column ind"
672 << (badCols.size () != size_t (1) ? "ices" : "ex");
673 if (badCols.size () <= maxNumBadIndicesToPrint) {
674 os << ": [";
675 for (size_t k = 0; k < badCols.size (); ++k) {
676 os << badCols[k];
677 if (k + 1 < badCols.size ()) {
678 os << ", ";
679 }
680 }
681 os << "]. ";
682 }
683 else {
684 os << ". ";
685 }
686 }
687 else {
688 os << "No out-of-bounds column indices. ";
689 }
690
691 TEUCHOS_TEST_FOR_EXCEPTION
692 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
693 std::logic_error, "Tpetra::MultiVector variable stride pack "
694 "kernel reports errorCount=" << errorCount << ", but we failed "
695 "to find any bad rows or columns. This should never happen. "
696 "Please report this to the Tpetra developers.");
697
698 throw std::runtime_error (os.str ());
699 } // hasErr
700 }
701 };
702
703 template <typename DstView,
704 typename SrcView,
705 typename IdxView,
706 typename ColView>
707 void
708 pack_array_multi_column_variable_stride (const DstView& dst,
709 const SrcView& src,
710 const IdxView& idx,
711 const ColView& col,
712 const size_t numCols,
713 const bool debug = true)
714 {
715 static_assert (Kokkos::Impl::is_view<DstView>::value,
716 "DstView must be a Kokkos::View.");
717 static_assert (Kokkos::Impl::is_view<SrcView>::value,
718 "SrcView must be a Kokkos::View.");
719 static_assert (Kokkos::Impl::is_view<IdxView>::value,
720 "IdxView must be a Kokkos::View.");
721 static_assert (Kokkos::Impl::is_view<ColView>::value,
722 "ColView must be a Kokkos::View.");
723 static_assert (static_cast<int> (DstView::rank) == 1,
724 "DstView must be a rank-1 Kokkos::View.");
725 static_assert (static_cast<int> (SrcView::rank) == 2,
726 "SrcView must be a rank-2 Kokkos::View.");
727 static_assert (static_cast<int> (IdxView::rank) == 1,
728 "IdxView must be a rank-1 Kokkos::View.");
729 static_assert (static_cast<int> (ColView::rank) == 1,
730 "ColView must be a rank-1 Kokkos::View.");
731
732 if (debug) {
733 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
734 SrcView, IdxView, ColView> impl_type;
735 impl_type::pack (dst, src, idx, col, numCols);
736 }
737 else {
738 typedef PackArrayMultiColumnVariableStride<DstView,
739 SrcView, IdxView, ColView> impl_type;
740 impl_type::pack (dst, src, idx, col, numCols);
741 }
742 }
743
744 // Tag types to indicate whether to use atomic updates in the
745 // various CombineMode "Op"s.
746 struct atomic_tag {};
747 struct nonatomic_tag {};
748
749 struct AddOp {
750 template<class SC>
751 KOKKOS_INLINE_FUNCTION
752 void operator() (atomic_tag, SC& dest, const SC& src) const {
753 Kokkos::atomic_add (&dest, src);
754 }
755
756 template<class SC>
757 KOKKOS_INLINE_FUNCTION
758 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
759 dest += src;
760 }
761 };
762
763 struct InsertOp {
764 // There's no point to using Kokkos::atomic_assign for the REPLACE
765 // or INSERT CombineModes, since this is not a well-defined
766 // reduction for MultiVector anyway. See GitHub Issue #4417
767 // (which this fixes).
768 template<class SC>
769 KOKKOS_INLINE_FUNCTION
770 void operator() (atomic_tag, SC& dest, const SC& src) const {
771 dest = src;
772 }
773
774 template<class SC>
775 KOKKOS_INLINE_FUNCTION
776 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
777 dest = src;
778 }
779 };
780
781 // Kokkos::Impl::atomic_fetch_oper wants a class like this.
782 template<class Scalar1, class Scalar2>
783 struct AbsMaxOper {
784 KOKKOS_INLINE_FUNCTION
785 static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
786 const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
787 const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
788 return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
789 }
790 };
791
792 struct AbsMaxOp {
793 template <typename SC>
794 KOKKOS_INLINE_FUNCTION
795 void operator() (atomic_tag, SC& dest, const SC& src) const {
796 Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
797 }
798
799 template <typename SC>
800 KOKKOS_INLINE_FUNCTION
801 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
802 dest = AbsMaxOper<SC, SC> ().apply (dest, src);
803 }
804 };
805
806 template <typename ExecutionSpace,
807 typename DstView,
808 typename SrcView,
809 typename IdxView,
810 typename Op,
811 typename Enabled = void>
812 class UnpackArrayMultiColumn {
813 private:
814 static_assert (Kokkos::Impl::is_view<DstView>::value,
815 "DstView must be a Kokkos::View.");
816 static_assert (Kokkos::Impl::is_view<SrcView>::value,
817 "SrcView must be a Kokkos::View.");
818 static_assert (Kokkos::Impl::is_view<IdxView>::value,
819 "IdxView must be a Kokkos::View.");
820 static_assert (static_cast<int> (DstView::rank) == 2,
821 "DstView must be a rank-2 Kokkos::View.");
822 static_assert (static_cast<int> (SrcView::rank) == 1,
823 "SrcView must be a rank-1 Kokkos::View.");
824 static_assert (static_cast<int> (IdxView::rank) == 1,
825 "IdxView must be a rank-1 Kokkos::View.");
826
827 public:
828 typedef typename ExecutionSpace::execution_space execution_space;
829 typedef typename execution_space::size_type size_type;
830
831 private:
832 DstView dst;
833 SrcView src;
834 IdxView idx;
835 Op op;
836 size_t numCols;
837
838 public:
839 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
840 const DstView& dst_,
841 const SrcView& src_,
842 const IdxView& idx_,
843 const Op& op_,
844 const size_t numCols_) :
845 dst (dst_),
846 src (src_),
847 idx (idx_),
848 op (op_),
849 numCols (numCols_)
850 {}
851
852 template<class TagType>
853 KOKKOS_INLINE_FUNCTION void
854 operator() (TagType tag, const size_type k) const
855 {
856 static_assert
857 (std::is_same<TagType, atomic_tag>::value ||
858 std::is_same<TagType, nonatomic_tag>::value,
859 "TagType must be atomic_tag or nonatomic_tag.");
860
861 const typename IdxView::value_type localRow = idx(k);
862 const size_t offset = k*numCols;
863 for (size_t j = 0; j < numCols; ++j) {
864 op (tag, dst(localRow, j), src(offset+j));
865 }
866 }
867
868 static void
869 unpack (const ExecutionSpace& execSpace,
870 const DstView& dst,
871 const SrcView& src,
872 const IdxView& idx,
873 const Op& op,
874 const size_t numCols,
875 const bool use_atomic_updates)
876 {
877 if (use_atomic_updates) {
878 using range_type =
879 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
880 Kokkos::parallel_for
881 ("Tpetra::MultiVector unpack const stride atomic",
882 range_type (0, idx.size ()),
883 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
884 }
885 else {
886 using range_type =
887 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
888 Kokkos::parallel_for
889 ("Tpetra::MultiVector unpack const stride nonatomic",
890 range_type (0, idx.size ()),
891 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
892 }
893 }
894 };
895
896 template <typename ExecutionSpace,
897 typename DstView,
898 typename SrcView,
899 typename IdxView,
900 typename Op,
901 typename SizeType = typename ExecutionSpace::execution_space::size_type,
902 typename Enabled = void>
903 class UnpackArrayMultiColumnWithBoundsCheck {
904 private:
905 static_assert (Kokkos::Impl::is_view<DstView>::value,
906 "DstView must be a Kokkos::View.");
907 static_assert (Kokkos::Impl::is_view<SrcView>::value,
908 "SrcView must be a Kokkos::View.");
909 static_assert (Kokkos::Impl::is_view<IdxView>::value,
910 "IdxView must be a Kokkos::View.");
911 static_assert (static_cast<int> (DstView::rank) == 2,
912 "DstView must be a rank-2 Kokkos::View.");
913 static_assert (static_cast<int> (SrcView::rank) == 1,
914 "SrcView must be a rank-1 Kokkos::View.");
915 static_assert (static_cast<int> (IdxView::rank) == 1,
916 "IdxView must be a rank-1 Kokkos::View.");
917 static_assert (std::is_integral<SizeType>::value,
918 "SizeType must be a built-in integer type.");
919
920 public:
921 using execution_space = typename ExecutionSpace::execution_space;
922 using size_type = SizeType;
923 using value_type = size_t;
924
925 private:
926 DstView dst;
927 SrcView src;
928 IdxView idx;
929 Op op;
930 size_type numCols;
931
932 public:
933 UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
934 const DstView& dst_,
935 const SrcView& src_,
936 const IdxView& idx_,
937 const Op& op_,
938 const size_type numCols_) :
939 dst (dst_),
940 src (src_),
941 idx (idx_),
942 op (op_),
943 numCols (numCols_)
944 {}
945
946 template<class TagType>
947 KOKKOS_INLINE_FUNCTION void
948 operator() (TagType tag,
949 const size_type k,
950 size_t& lclErrCount) const
951 {
952 static_assert
953 (std::is_same<TagType, atomic_tag>::value ||
954 std::is_same<TagType, nonatomic_tag>::value,
955 "TagType must be atomic_tag or nonatomic_tag.");
956 using index_type = typename IdxView::non_const_value_type;
957
958 const index_type lclRow = idx(k);
959 if (lclRow < static_cast<index_type> (0) ||
960 lclRow >= static_cast<index_type> (dst.extent (0))) {
961 ++lclErrCount;
962 }
963 else {
964 const size_type offset = k*numCols;
965 for (size_type j = 0; j < numCols; ++j) {
966 op (tag, dst(lclRow,j), src(offset+j));
967 }
968 }
969 }
970
971 template<class TagType>
972 KOKKOS_INLINE_FUNCTION void
973 init (TagType, size_t& initialErrorCount) const {
974 initialErrorCount = 0;
975 }
976
977 template<class TagType>
978 KOKKOS_INLINE_FUNCTION void
979 join (TagType,
980 volatile size_t& dstErrorCount,
981 const volatile size_t& srcErrorCount) const
982 {
983 dstErrorCount += srcErrorCount;
984 }
985
986 static void
987 unpack (const ExecutionSpace& execSpace,
988 const DstView& dst,
989 const SrcView& src,
990 const IdxView& idx,
991 const Op& op,
992 const size_type numCols,
993 const bool use_atomic_updates)
994 {
995 using index_type = typename IdxView::non_const_value_type;
996
997 size_t errorCount = 0;
998 if (use_atomic_updates) {
999 using range_type =
1000 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1001 Kokkos::parallel_reduce
1002 ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1003 range_type (0, idx.size ()),
1004 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1005 idx, op, numCols),
1006 errorCount);
1007 }
1008 else {
1009 using range_type =
1010 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1011 Kokkos::parallel_reduce
1012 ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1013 range_type (0, idx.size ()),
1014 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1015 idx, op, numCols),
1016 errorCount);
1017 }
1018
1019 if (errorCount != 0) {
1020 // Go back and find the out-of-bounds entries in the index
1021 // array. Performance doesn't matter since we are already in
1022 // an error state, so we can do this sequentially, on host.
1023 auto idx_h = Kokkos::create_mirror_view (idx);
1024 Kokkos::deep_copy (idx_h, idx);
1025
1026 std::vector<index_type> badIndices;
1027 const size_type numInds = idx_h.extent (0);
1028 for (size_type k = 0; k < numInds; ++k) {
1029 if (idx_h(k) < static_cast<index_type> (0) ||
1030 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1031 badIndices.push_back (idx_h(k));
1032 }
1033 }
1034
1035 if (errorCount != badIndices.size ()) {
1036 std::ostringstream os;
1037 os << "MultiVector unpack kernel: errorCount = " << errorCount
1038 << " != badIndices.size() = " << badIndices.size ()
1039 << ". This should never happen. "
1040 "Please report this to the Tpetra developers.";
1041 throw std::logic_error (os.str ());
1042 }
1043
1044 std::ostringstream os;
1045 os << "MultiVector unpack kernel had " << badIndices.size ()
1046 << " out-of bounds index/ices. Here they are: [";
1047 for (size_t k = 0; k < badIndices.size (); ++k) {
1048 os << badIndices[k];
1049 if (k + 1 < badIndices.size ()) {
1050 os << ", ";
1051 }
1052 }
1053 os << "].";
1054 throw std::runtime_error (os.str ());
1055 }
1056 }
1057 };
1058
1059 template <typename ExecutionSpace,
1060 typename DstView,
1061 typename SrcView,
1062 typename IdxView,
1063 typename Op>
1064 void
1065 unpack_array_multi_column (const ExecutionSpace& execSpace,
1066 const DstView& dst,
1067 const SrcView& src,
1068 const IdxView& idx,
1069 const Op& op,
1070 const size_t numCols,
1071 const bool use_atomic_updates,
1072 const bool debug)
1073 {
1074 static_assert (Kokkos::Impl::is_view<DstView>::value,
1075 "DstView must be a Kokkos::View.");
1076 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1077 "SrcView must be a Kokkos::View.");
1078 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1079 "IdxView must be a Kokkos::View.");
1080 static_assert (static_cast<int> (DstView::rank) == 2,
1081 "DstView must be a rank-2 Kokkos::View.");
1082 static_assert (static_cast<int> (SrcView::rank) == 1,
1083 "SrcView must be a rank-1 Kokkos::View.");
1084 static_assert (static_cast<int> (IdxView::rank) == 1,
1085 "IdxView must be a rank-1 Kokkos::View.");
1086
1087 if (debug) {
1088 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1089 DstView, SrcView, IdxView, Op> impl_type;
1090 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1091 use_atomic_updates);
1092 }
1093 else {
1094 typedef UnpackArrayMultiColumn<ExecutionSpace,
1095 DstView, SrcView, IdxView, Op> impl_type;
1096 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1097 use_atomic_updates);
1098 }
1099 }
1100
1101 template <typename ExecutionSpace,
1102 typename DstView,
1103 typename SrcView,
1104 typename IdxView,
1105 typename ColView,
1106 typename Op,
1107 typename Enabled = void>
1108 class UnpackArrayMultiColumnVariableStride {
1109 private:
1110 static_assert (Kokkos::Impl::is_view<DstView>::value,
1111 "DstView must be a Kokkos::View.");
1112 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1113 "SrcView must be a Kokkos::View.");
1114 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1115 "IdxView must be a Kokkos::View.");
1116 static_assert (Kokkos::Impl::is_view<ColView>::value,
1117 "ColView must be a Kokkos::View.");
1118 static_assert (static_cast<int> (DstView::rank) == 2,
1119 "DstView must be a rank-2 Kokkos::View.");
1120 static_assert (static_cast<int> (SrcView::rank) == 1,
1121 "SrcView must be a rank-1 Kokkos::View.");
1122 static_assert (static_cast<int> (IdxView::rank) == 1,
1123 "IdxView must be a rank-1 Kokkos::View.");
1124 static_assert (static_cast<int> (ColView::rank) == 1,
1125 "ColView must be a rank-1 Kokkos::View.");
1126
1127 public:
1128 using execution_space = typename ExecutionSpace::execution_space;
1129 using size_type = typename execution_space::size_type;
1130
1131 private:
1132 DstView dst;
1133 SrcView src;
1134 IdxView idx;
1135 ColView col;
1136 Op op;
1137 size_t numCols;
1138
1139 public:
1140 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1141 const DstView& dst_,
1142 const SrcView& src_,
1143 const IdxView& idx_,
1144 const ColView& col_,
1145 const Op& op_,
1146 const size_t numCols_) :
1147 dst (dst_),
1148 src (src_),
1149 idx (idx_),
1150 col (col_),
1151 op (op_),
1152 numCols (numCols_)
1153 {}
1154
1155 template<class TagType>
1156 KOKKOS_INLINE_FUNCTION void
1157 operator() (TagType tag, const size_type k) const
1158 {
1159 static_assert
1160 (std::is_same<TagType, atomic_tag>::value ||
1161 std::is_same<TagType, nonatomic_tag>::value,
1162 "TagType must be atomic_tag or nonatomic_tag.");
1163
1164 const typename IdxView::value_type localRow = idx(k);
1165 const size_t offset = k*numCols;
1166 for (size_t j = 0; j < numCols; ++j) {
1167 op (tag, dst(localRow, col(j)), src(offset+j));
1168 }
1169 }
1170
1171 static void
1172 unpack (const ExecutionSpace& execSpace,
1173 const DstView& dst,
1174 const SrcView& src,
1175 const IdxView& idx,
1176 const ColView& col,
1177 const Op& op,
1178 const size_t numCols,
1179 const bool use_atomic_updates)
1180 {
1181 if (use_atomic_updates) {
1182 using range_type =
1183 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1184 Kokkos::parallel_for
1185 ("Tpetra::MultiVector unpack var stride atomic",
1186 range_type (0, idx.size ()),
1187 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1188 idx, col, op, numCols));
1189 }
1190 else {
1191 using range_type =
1192 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1193 Kokkos::parallel_for
1194 ("Tpetra::MultiVector unpack var stride nonatomic",
1195 range_type (0, idx.size ()),
1196 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1197 idx, col, op, numCols));
1198 }
1199 }
1200 };
1201
1202 template <typename ExecutionSpace,
1203 typename DstView,
1204 typename SrcView,
1205 typename IdxView,
1206 typename ColView,
1207 typename Op,
1208 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1209 typename Enabled = void>
1210 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1211 private:
1212 static_assert (Kokkos::Impl::is_view<DstView>::value,
1213 "DstView must be a Kokkos::View.");
1214 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1215 "SrcView must be a Kokkos::View.");
1216 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1217 "IdxView must be a Kokkos::View.");
1218 static_assert (Kokkos::Impl::is_view<ColView>::value,
1219 "ColView must be a Kokkos::View.");
1220 static_assert (static_cast<int> (DstView::rank) == 2,
1221 "DstView must be a rank-2 Kokkos::View.");
1222 static_assert (static_cast<int> (SrcView::rank) == 1,
1223 "SrcView must be a rank-1 Kokkos::View.");
1224 static_assert (static_cast<int> (IdxView::rank) == 1,
1225 "IdxView must be a rank-1 Kokkos::View.");
1226 static_assert (static_cast<int> (ColView::rank) == 1,
1227 "ColView must be a rank-1 Kokkos::View.");
1228 static_assert (std::is_integral<SizeType>::value,
1229 "SizeType must be a built-in integer type.");
1230
1231 public:
1232 using execution_space = typename ExecutionSpace::execution_space;
1233 using size_type = SizeType;
1234 using value_type = size_t;
1235
1236 private:
1237 DstView dst;
1238 SrcView src;
1239 IdxView idx;
1240 ColView col;
1241 Op op;
1242 size_type numCols;
1243
1244 public:
1245 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1246 (const ExecutionSpace& /* execSpace */,
1247 const DstView& dst_,
1248 const SrcView& src_,
1249 const IdxView& idx_,
1250 const ColView& col_,
1251 const Op& op_,
1252 const size_t numCols_) :
1253 dst (dst_),
1254 src (src_),
1255 idx (idx_),
1256 col (col_),
1257 op (op_),
1258 numCols (numCols_)
1259 {}
1260
1261 template<class TagType>
1262 KOKKOS_INLINE_FUNCTION void
1263 operator() (TagType tag,
1264 const size_type k,
1265 value_type& lclErrorCount) const
1266 {
1267 static_assert
1268 (std::is_same<TagType, atomic_tag>::value ||
1269 std::is_same<TagType, nonatomic_tag>::value,
1270 "TagType must be atomic_tag or nonatomic_tag.");
1271 using row_index_type = typename IdxView::non_const_value_type;
1272 using col_index_type = typename ColView::non_const_value_type;
1273
1274 const row_index_type lclRow = idx(k);
1275 if (lclRow < static_cast<row_index_type> (0) ||
1276 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1277 ++lclErrorCount;
1278 }
1279 else {
1280 const size_type offset = k * numCols;
1281 for (size_type j = 0; j < numCols; ++j) {
1282 const col_index_type lclCol = col(j);
1283 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1284 ++lclErrorCount;
1285 }
1286 else { // all indices are valid; apply the op
1287 op (tag, dst(lclRow, col(j)), src(offset+j));
1288 }
1289 }
1290 }
1291 }
1292
1293 KOKKOS_INLINE_FUNCTION void
1294 init (value_type& initialErrorCount) const {
1295 initialErrorCount = 0;
1296 }
1297
1298 KOKKOS_INLINE_FUNCTION void
1299 join (volatile value_type& dstErrorCount,
1300 const volatile value_type& srcErrorCount) const
1301 {
1302 dstErrorCount += srcErrorCount;
1303 }
1304
1305 static void
1306 unpack (const ExecutionSpace& execSpace,
1307 const DstView& dst,
1308 const SrcView& src,
1309 const IdxView& idx,
1310 const ColView& col,
1311 const Op& op,
1312 const size_type numCols,
1313 const bool use_atomic_updates)
1314 {
1315 using row_index_type = typename IdxView::non_const_value_type;
1316 using col_index_type = typename ColView::non_const_value_type;
1317
1318 size_t errorCount = 0;
1319 if (use_atomic_updates) {
1320 using range_type =
1321 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1322 Kokkos::parallel_reduce
1323 ("Tpetra::MultiVector unpack var stride atomic debug only",
1324 range_type (0, idx.size ()),
1325 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1326 (execSpace, dst, src, idx, col, op, numCols),
1327 errorCount);
1328 }
1329 else {
1330 using range_type =
1331 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1332 Kokkos::parallel_reduce
1333 ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1334 range_type (0, idx.size ()),
1335 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1336 (execSpace, dst, src, idx, col, op, numCols),
1337 errorCount);
1338 }
1339
1340 if (errorCount != 0) {
1341 constexpr size_t maxNumBadIndicesToPrint = 100;
1342
1343 std::ostringstream os; // for error reporting
1344 os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1345 "found " << errorCount
1346 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1347
1348 // Go back and find any out-of-bounds entries in the array of
1349 // row indices. Performance doesn't matter since we are
1350 // already in an error state, so we can do this sequentially,
1351 // on host.
1352 auto idx_h = Kokkos::create_mirror_view (idx);
1353 Kokkos::deep_copy (idx_h, idx);
1354
1355 std::vector<row_index_type> badRows;
1356 const size_type numRowInds = idx_h.extent (0);
1357 for (size_type k = 0; k < numRowInds; ++k) {
1358 if (idx_h(k) < static_cast<row_index_type> (0) ||
1359 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1360 badRows.push_back (idx_h(k));
1361 }
1362 }
1363
1364 if (badRows.size () != 0) {
1365 os << badRows.size () << " out-of-bounds row ind"
1366 << (badRows.size () != size_t (1) ? "ices" : "ex");
1367 if (badRows.size () <= maxNumBadIndicesToPrint) {
1368 os << ": [";
1369 for (size_t k = 0; k < badRows.size (); ++k) {
1370 os << badRows[k];
1371 if (k + 1 < badRows.size ()) {
1372 os << ", ";
1373 }
1374 }
1375 os << "]. ";
1376 }
1377 else {
1378 os << ". ";
1379 }
1380 }
1381 else {
1382 os << "No out-of-bounds row indices. ";
1383 }
1384
1385 // Go back and find any out-of-bounds entries in the array
1386 // of column indices.
1387 auto col_h = Kokkos::create_mirror_view (col);
1388 Kokkos::deep_copy (col_h, col);
1389
1390 std::vector<col_index_type> badCols;
1391 const size_type numColInds = col_h.extent (0);
1392 for (size_type k = 0; k < numColInds; ++k) {
1393 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1394 badCols.push_back (col_h(k));
1395 }
1396 }
1397
1398 if (badCols.size () != 0) {
1399 os << badCols.size () << " out-of-bounds column ind"
1400 << (badCols.size () != size_t (1) ? "ices" : "ex");
1401 if (badCols.size () <= maxNumBadIndicesToPrint) {
1402 for (size_t k = 0; k < badCols.size (); ++k) {
1403 os << ": [";
1404 os << badCols[k];
1405 if (k + 1 < badCols.size ()) {
1406 os << ", ";
1407 }
1408 }
1409 os << "]. ";
1410 }
1411 else {
1412 os << ". ";
1413 }
1414 }
1415 else {
1416 os << "No out-of-bounds column indices. ";
1417 }
1418
1419 TEUCHOS_TEST_FOR_EXCEPTION
1420 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1421 std::logic_error, "Tpetra::MultiVector variable stride unpack "
1422 "kernel reports errorCount=" << errorCount << ", but we failed "
1423 "to find any bad rows or columns. This should never happen. "
1424 "Please report this to the Tpetra developers.");
1425
1426 throw std::runtime_error (os.str ());
1427 } // hasErr
1428 }
1429 };
1430
1431 template <typename ExecutionSpace,
1432 typename DstView,
1433 typename SrcView,
1434 typename IdxView,
1435 typename ColView,
1436 typename Op>
1437 void
1438 unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1439 const DstView& dst,
1440 const SrcView& src,
1441 const IdxView& idx,
1442 const ColView& col,
1443 const Op& op,
1444 const size_t numCols,
1445 const bool use_atomic_updates,
1446 const bool debug)
1447 {
1448 static_assert (Kokkos::Impl::is_view<DstView>::value,
1449 "DstView must be a Kokkos::View.");
1450 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1451 "SrcView must be a Kokkos::View.");
1452 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1453 "IdxView must be a Kokkos::View.");
1454 static_assert (Kokkos::Impl::is_view<ColView>::value,
1455 "ColView must be a Kokkos::View.");
1456 static_assert (static_cast<int> (DstView::rank) == 2,
1457 "DstView must be a rank-2 Kokkos::View.");
1458 static_assert (static_cast<int> (SrcView::rank) == 1,
1459 "SrcView must be a rank-1 Kokkos::View.");
1460 static_assert (static_cast<int> (IdxView::rank) == 1,
1461 "IdxView must be a rank-1 Kokkos::View.");
1462 static_assert (static_cast<int> (ColView::rank) == 1,
1463 "ColView must be a rank-1 Kokkos::View.");
1464
1465 if (debug) {
1466 using impl_type =
1467 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1468 DstView, SrcView, IdxView, ColView, Op>;
1469 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1470 use_atomic_updates);
1471 }
1472 else {
1473 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1474 DstView, SrcView, IdxView, ColView, Op>;
1475 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1476 use_atomic_updates);
1477 }
1478 }
1479
1480 template <typename DstView, typename SrcView,
1481 typename DstIdxView, typename SrcIdxView, typename Op,
1482 typename Enabled = void>
1483 struct PermuteArrayMultiColumn {
1484 typedef typename DstView::execution_space execution_space;
1485 typedef typename execution_space::size_type size_type;
1486
1487 DstView dst;
1488 SrcView src;
1489 DstIdxView dst_idx;
1490 SrcIdxView src_idx;
1491 size_t numCols;
1492 Op op;
1493
1494 PermuteArrayMultiColumn (const DstView& dst_,
1495 const SrcView& src_,
1496 const DstIdxView& dst_idx_,
1497 const SrcIdxView& src_idx_,
1498 const size_t numCols_,
1499 const Op& op_) :
1500 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1501 numCols(numCols_), op(op_) {}
1502
1503 KOKKOS_INLINE_FUNCTION void
1504 operator() (const size_type k) const {
1505 const typename DstIdxView::value_type toRow = dst_idx(k);
1506 const typename SrcIdxView::value_type fromRow = src_idx(k);
1507 nonatomic_tag tag; // permute does not need atomics
1508 for (size_t j = 0; j < numCols; ++j) {
1509 op(tag, dst(toRow, j), src(fromRow, j));
1510 }
1511 }
1512
1513 static void
1514 permute (const DstView& dst,
1515 const SrcView& src,
1516 const DstIdxView& dst_idx,
1517 const SrcIdxView& src_idx,
1518 const size_t numCols,
1519 const Op& op)
1520 {
1521 using range_type =
1522 Kokkos::RangePolicy<execution_space, size_type>;
1523 // permute does not need atomics for Op
1524 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1525 Kokkos::parallel_for
1526 ("Tpetra::MultiVector permute multicol const stride",
1527 range_type (0, n),
1528 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1529 }
1530 };
1531
1532 // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1533 // SrcView::Rank == 2
1534 template <typename DstView, typename SrcView,
1535 typename DstIdxView, typename SrcIdxView, typename Op>
1536 void permute_array_multi_column(const DstView& dst,
1537 const SrcView& src,
1538 const DstIdxView& dst_idx,
1539 const SrcIdxView& src_idx,
1540 size_t numCols,
1541 const Op& op) {
1542 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1543 dst, src, dst_idx, src_idx, numCols, op);
1544 }
1545
1546 template <typename DstView, typename SrcView,
1547 typename DstIdxView, typename SrcIdxView,
1548 typename DstColView, typename SrcColView, typename Op,
1549 typename Enabled = void>
1550 struct PermuteArrayMultiColumnVariableStride {
1551 typedef typename DstView::execution_space execution_space;
1552 typedef typename execution_space::size_type size_type;
1553
1554 DstView dst;
1555 SrcView src;
1556 DstIdxView dst_idx;
1557 SrcIdxView src_idx;
1558 DstColView dst_col;
1559 SrcColView src_col;
1560 size_t numCols;
1561 Op op;
1562
1563 PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1564 const SrcView& src_,
1565 const DstIdxView& dst_idx_,
1566 const SrcIdxView& src_idx_,
1567 const DstColView& dst_col_,
1568 const SrcColView& src_col_,
1569 const size_t numCols_,
1570 const Op& op_) :
1571 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1572 dst_col(dst_col_), src_col(src_col_),
1573 numCols(numCols_), op(op_) {}
1574
1575 KOKKOS_INLINE_FUNCTION void
1576 operator() (const size_type k) const {
1577 const typename DstIdxView::value_type toRow = dst_idx(k);
1578 const typename SrcIdxView::value_type fromRow = src_idx(k);
1579 const nonatomic_tag tag; // permute does not need atomics
1580 for (size_t j = 0; j < numCols; ++j) {
1581 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1582 }
1583 }
1584
1585 static void
1586 permute (const DstView& dst,
1587 const SrcView& src,
1588 const DstIdxView& dst_idx,
1589 const SrcIdxView& src_idx,
1590 const DstColView& dst_col,
1591 const SrcColView& src_col,
1592 const size_t numCols,
1593 const Op& op)
1594 {
1595 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1596 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1597 Kokkos::parallel_for
1598 ("Tpetra::MultiVector permute multicol var stride",
1599 range_type (0, n),
1600 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1601 dst_col, src_col, numCols, op));
1602 }
1603 };
1604
1605 // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1606 // SrcView::Rank == 2
1607 template <typename DstView, typename SrcView,
1608 typename DstIdxView, typename SrcIdxView,
1609 typename DstColView, typename SrcColView, typename Op>
1610 void permute_array_multi_column_variable_stride(const DstView& dst,
1611 const SrcView& src,
1612 const DstIdxView& dst_idx,
1613 const SrcIdxView& src_idx,
1614 const DstColView& dst_col,
1615 const SrcColView& src_col,
1616 size_t numCols,
1617 const Op& op) {
1618 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1619 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1620 dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1621 }
1622
1623} // Details namespace
1624} // KokkosRefactor namespace
1625} // Tpetra namespace
1626
1627#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...