Tpetra parallel linear algebra Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
54#include <memory>
55#include <sstream>
56#include <stdexcept>
57#include <string>
58
81
82namespace Tpetra {
83
84//
85// Users must never rely on anything in the Details namespace.
86//
87namespace Details {
88
89namespace PackCrsMatrixImpl {
97template<class OutputOffsetsViewType,
98 class CountsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
102 const bool debug =
103#ifdef HAVE_TPETRA_DEBUG
104 true
105#else
106 false
107#endif // HAVE_TPETRA_DEBUG
108 >
110public:
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
116 // output Views drive where execution happens.
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
141
142 NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
143 const CountsViewType& counts,
144 const InputOffsetsViewType& rowOffsets,
145 const InputLocalRowIndicesViewType& lclRowInds,
146 const InputLocalRowPidsViewType& lclRowPids,
147 const count_type sizeOfLclCount,
148 const count_type sizeOfGblColInd,
149 const count_type sizeOfPid,
150 const count_type sizeOfValue) :
151 outputOffsets_ (outputOffsets),
152 counts_ (counts),
153 rowOffsets_ (rowOffsets),
154 lclRowInds_ (lclRowInds),
155 lclRowPids_ (lclRowPids),
156 sizeOfLclCount_ (sizeOfLclCount),
157 sizeOfGblColInd_ (sizeOfGblColInd),
158 sizeOfPid_ (sizeOfPid),
159 sizeOfValue_ (sizeOfValue),
160 error_ ("error") // don't forget this, or you'll get segfaults!
161 {
162 if (debug) {
163 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
164
165 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream os;
167 os << "lclRowInds.extent(0) = " << numRowsToPack
168 << " != counts.extent(0) = " << counts_.extent (0)
169 << ".";
170 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
171 }
172 if (static_cast<size_t> (numRowsToPack + 1) !=
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream os;
175 os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176 << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
177 << ".";
178 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
179 }
180 }
181 }
182
183 KOKKOS_INLINE_FUNCTION void
184 operator() (const local_row_index_type& curInd,
185 output_offset_type& update,
186 const bool final) const
187 {
188 if (debug) {
189 if (curInd < static_cast<local_row_index_type> (0)) {
190 error_ () = 1;
191 return;
192 }
193 }
194
195 if (final) {
196 if (debug) {
197 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
198 error_ () = 2;
199 return;
200 }
201 }
202 outputOffsets_(curInd) = update;
203 }
204
205 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206 const auto lclRow = lclRowInds_(curInd);
207 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
209 error_ () = 3;
210 return;
211 }
212 // count_type could differ from the type of each row offset.
213 // For example, row offsets might each be 64 bits, but if their
214 // difference always fits in 32 bits, we may then safely use a
215 // 32-bit count_type.
216 const count_type count =
217 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
218
219 // We pack first the number of entries in the row, then that
220 // many global column indices, then that many pids (if any),
221 // then that many values. However, if the number of entries in
222 // the row is zero, we pack nothing.
223 const count_type numBytes = (count == 0) ?
224 static_cast<count_type> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
227 sizeOfValue_);
228
229 if (final) {
230 counts_(curInd) = numBytes;
231 }
232 update += numBytes;
233 }
234 }
235
236 // mfh 31 May 2017: Don't need init or join. If you have join, MUST
237 // have join both with and without volatile! Otherwise intrawarp
238 // joins are really slow on GPUs.
239
241 int getError () const {
242 auto error_h = Kokkos::create_mirror_view (error_);
243 Kokkos::deep_copy (error_h, error_);
244 return error_h ();
245 }
246
247private:
248 OutputOffsetsViewType outputOffsets_;
249 CountsViewType counts_;
250 typename InputOffsetsViewType::const_type rowOffsets_;
251 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252 typename InputLocalRowPidsViewType::const_type lclRowPids_;
253 count_type sizeOfLclCount_;
254 count_type sizeOfGblColInd_;
255 count_type sizeOfPid_;
256 count_type sizeOfValue_;
257 Kokkos::View<int, device_type> error_;
258};
259
269template<class OutputOffsetsViewType,
270 class CountsViewType,
271 class InputOffsetsViewType,
272 class InputLocalRowIndicesViewType,
273 class InputLocalRowPidsViewType>
274typename CountsViewType::non_const_value_type
275computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
276 const CountsViewType& counts,
277 const InputOffsetsViewType& rowOffsets,
278 const InputLocalRowIndicesViewType& lclRowInds,
279 const InputLocalRowPidsViewType& lclRowPids,
280 const typename CountsViewType::non_const_value_type sizeOfLclCount,
281 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
282 const typename CountsViewType::non_const_value_type sizeOfPid,
283 const typename CountsViewType::non_const_value_type sizeOfValue)
284{
285 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
286 CountsViewType, typename InputOffsetsViewType::const_type,
287 typename InputLocalRowIndicesViewType::const_type,
288 typename InputLocalRowPidsViewType::const_type> functor_type;
289 typedef typename CountsViewType::non_const_value_type count_type;
290 typedef typename OutputOffsetsViewType::size_type size_type;
291 typedef typename OutputOffsetsViewType::execution_space execution_space;
292 typedef typename functor_type::local_row_index_type LO;
293 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294 const char prefix[] = "computeNumPacketsAndOffsets: ";
295
296 count_type count = 0;
297 const count_type numRowsToPack = lclRowInds.extent (0);
298
299 if (numRowsToPack == 0) {
300 return count;
301 }
302 else {
303 TEUCHOS_TEST_FOR_EXCEPTION
304 (rowOffsets.extent (0) <= static_cast<size_type> (1),
305 std::invalid_argument, prefix << "There is at least one row to pack, "
306 "but the matrix has no rows. lclRowInds.extent(0) = " <<
307 numRowsToPack << ", but rowOffsets.extent(0) = " <<
308 rowOffsets.extent (0) << " <= 1.");
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (outputOffsets.extent (0) !=
311 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
312 prefix << "Output dimension does not match number of rows to pack. "
313 << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
314 << " != lclRowInds.extent(0) + 1 = "
315 << static_cast<size_type> (numRowsToPack + 1) << ".");
316 TEUCHOS_TEST_FOR_EXCEPTION
317 (counts.extent (0) != numRowsToPack, std::invalid_argument,
318 prefix << "counts.extent(0) = " << counts.extent (0)
319 << " != numRowsToPack = " << numRowsToPack << ".");
320
321 functor_type f (outputOffsets, counts, rowOffsets,
322 lclRowInds, lclRowPids, sizeOfLclCount,
323 sizeOfGblColInd, sizeOfPid, sizeOfValue);
324 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
325
326 // At least in debug mode, this functor checks for errors.
327 const int errCode = f.getError ();
328 TEUCHOS_TEST_FOR_EXCEPTION
329 (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
330 << errCode << " != 0.");
331
332#if 0
333 size_t total = 0;
334 for (LO k = 0; k < numRowsToPack; ++k) {
335 total += counts[k];
336 }
337 if (outputOffsets(numRowsToPack) != total) {
338 if (errStr.get () == NULL) {
339 errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
340 }
341 std::ostringstream& os = *errStr;
342 os << prefix
343 << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
344 << outputOffsets(numRowsToPack) << " != sum of counts = "
345 << total << "." << std::endl;
346 if (numRowsToPack != 0) {
347 // Only print the array if it's not too long.
348 if (numRowsToPack < static_cast<LO> (10)) {
349 os << "outputOffsets: [";
350 for (LO i = 0; i <= numRowsToPack; ++i) {
351 os << outputOffsets(i);
352 if (static_cast<LO> (i + 1) <= numRowsToPack) {
353 os << ",";
354 }
355 }
356 os << "]" << std::endl;
357 os << "counts: [";
358 for (LO i = 0; i < numRowsToPack; ++i) {
359 os << counts(i);
360 if (static_cast<LO> (i + 1) < numRowsToPack) {
361 os << ",";
362 }
363 }
364 os << "]" << std::endl;
365 }
366 else {
367 os << "outputOffsets(" << (numRowsToPack-1) << ") = "
368 << outputOffsets(numRowsToPack-1) << "." << std::endl;
369 }
370 }
371 count = outputOffsets(numRowsToPack);
372 return {false, errStr};
373 }
374#endif // HAVE_TPETRA_DEBUG
375
376 // Get last entry of outputOffsets, which is the sum of the entries
377 // of counts. Don't assume UVM.
378 using Tpetra::Details::getEntryOnHost;
379 return static_cast<count_type> (getEntryOnHost (outputOffsets,
380 numRowsToPack));
381 }
382}
383
399template<class ST, class ColumnMap, class BufferDeviceType>
400KOKKOS_FUNCTION
401Kokkos::pair<int, size_t>
402packCrsMatrixRow (const ColumnMap& col_map,
403 const Kokkos::View<char*, BufferDeviceType>& exports,
405 const typename PackTraits<int>::input_array_type& pids_in,
406 const typename PackTraits<ST>::input_array_type& vals_in,
407 const size_t offset,
408 const size_t num_ent,
409 const size_t num_bytes_per_value,
410 const bool pack_pids)
411{
412 using Kokkos::subview;
413 using LO = typename ColumnMap::local_ordinal_type;
414 using GO = typename ColumnMap::global_ordinal_type;
415 using return_type = Kokkos::pair<int, size_t>;
416
417 if (num_ent == 0) {
418 // Empty rows always take zero bytes, to ensure sparsity.
419 return return_type (0, 0);
420 }
421
422 const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
423 const size_t num_ent_beg = offset;
424 const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
425
426 const size_t gids_beg = num_ent_beg + num_ent_len;
427 const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
428
429 const size_t pids_beg = gids_beg + gids_len;
430 const size_t pids_len = pack_pids ?
431 num_ent * PackTraits<int>::packValueCount (int (0)) :
432 static_cast<size_t> (0);
433
434 const size_t vals_beg = gids_beg + gids_len + pids_len;
435 const size_t vals_len = num_ent * num_bytes_per_value;
436
437 char* const num_ent_out = exports.data () + num_ent_beg;
438 char* const gids_out = exports.data () + gids_beg;
439 char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
440 char* const vals_out = exports.data () + vals_beg;
441
442 size_t num_bytes_out = 0;
443 int error_code = 0;
444 num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
445
446 {
447 // Copy column indices one at a time, so that we don't need
448 // temporary storage.
449 for (size_t k = 0; k < num_ent; ++k) {
450 const LO lid = lids_in[k];
451 const GO gid = col_map.getGlobalElement (lid);
452 num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
453 }
454 // Copy PIDs one at a time, so that we don't need temporary storage.
455 if (pack_pids) {
456 for (size_t k = 0; k < num_ent; ++k) {
457 const LO lid = lids_in[k];
458 const int pid = pids_in[lid];
459 num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
460 }
461 }
462 const auto p =
463 PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
464 error_code += p.first;
465 num_bytes_out += p.second;
466 }
467
468 if (error_code != 0) {
469 return return_type (10, num_bytes_out);
470 }
471
472 const size_t expected_num_bytes =
473 num_ent_len + gids_len + pids_len + vals_len;
474 if (num_bytes_out != expected_num_bytes) {
475 return return_type (11, num_bytes_out);
476 }
477 return return_type (0, num_bytes_out);
478}
479
480template<class LocalMatrix, class LocalMap, class BufferDeviceType>
481struct PackCrsMatrixFunctor {
482 typedef LocalMatrix local_matrix_device_type;
483 typedef LocalMap local_map_type;
484 typedef typename local_matrix_device_type::value_type ST;
485 typedef typename local_map_type::local_ordinal_type LO;
486 typedef typename local_map_type::global_ordinal_type GO;
487 typedef typename local_matrix_device_type::device_type DT;
488
489 typedef Kokkos::View<const size_t*, BufferDeviceType>
490 num_packets_per_lid_view_type;
491 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
493 using export_lids_view_type = typename PackTraits<LO>::input_array_type;
494 using source_pids_view_type = typename PackTraits<int>::input_array_type;
495
496 typedef typename num_packets_per_lid_view_type::non_const_value_type
497 count_type;
498 typedef typename offsets_view_type::non_const_value_type
499 offset_type;
500 typedef Kokkos::pair<int, LO> value_type;
501
502 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503 "local_map_type::local_ordinal_type and "
504 "local_matrix_device_type::ordinal_type must be the same.");
505
506 local_matrix_device_type local_matrix;
507 local_map_type local_col_map;
508 exports_view_type exports;
509 num_packets_per_lid_view_type num_packets_per_lid;
510 export_lids_view_type export_lids;
511 source_pids_view_type source_pids;
512 offsets_view_type offsets;
513 size_t num_bytes_per_value;
514 bool pack_pids;
515
516 PackCrsMatrixFunctor (const local_matrix_device_type& local_matrix_in,
517 const local_map_type& local_col_map_in,
518 const exports_view_type& exports_in,
519 const num_packets_per_lid_view_type& num_packets_per_lid_in,
520 const export_lids_view_type& export_lids_in,
521 const source_pids_view_type& source_pids_in,
522 const offsets_view_type& offsets_in,
523 const size_t num_bytes_per_value_in,
524 const bool pack_pids_in) :
525 local_matrix (local_matrix_in),
526 local_col_map (local_col_map_in),
527 exports (exports_in),
528 num_packets_per_lid (num_packets_per_lid_in),
529 export_lids (export_lids_in),
530 source_pids (source_pids_in),
531 offsets (offsets_in),
532 num_bytes_per_value (num_bytes_per_value_in),
533 pack_pids (pack_pids_in)
534 {
535 const LO numRows = local_matrix_in.numRows ();
536 const LO rowMapDim =
537 static_cast<LO> (local_matrix.graph.row_map.extent (0));
538 TEUCHOS_TEST_FOR_EXCEPTION
539 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
540 std::logic_error, "local_matrix.graph.row_map.extent(0) = "
541 << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
542 }
543
544 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
545 {
546 using ::Tpetra::Details::OrdinalTraits;
547 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
548 }
549
550 KOKKOS_INLINE_FUNCTION void
551 join (volatile value_type& dst, const volatile value_type& src) const
552 {
553 // `dst` should reflect the first (least) bad index and all other
554 // associated error codes and data, so prefer keeping it.
555 if (src.first != 0 && dst.first == 0) {
556 dst = src;
557 }
558 }
559
560 KOKKOS_INLINE_FUNCTION
561 void operator() (const LO i, value_type& dst) const
562 {
563 const size_t offset = offsets[i];
564 const LO export_lid = export_lids[i];
565 const size_t buf_size = exports.size();
566 const size_t num_bytes = num_packets_per_lid(i);
567 const size_t num_ent =
568 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569 - local_matrix.graph.row_map[export_lid]);
570
571 // Only pack this row's data if it has a nonzero number of
572 // entries. We can do this because receiving processes get the
573 // number of packets, and will know that zero packets means zero
574 // entries.
575 if (num_ent == 0) {
576 return;
577 }
578
579 if (export_lid >= local_matrix.numRows ()) {
580 if (dst.first != 0) { // keep only the first error
581 dst = Kokkos::make_pair (1, i); // invalid row
582 }
583 return;
584 }
585 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586 if (dst.first != 0) { // keep only the first error
587 dst = Kokkos::make_pair (2, i); // out of bounds
588 }
589 return;
590 }
591
592 // We can now pack this row
593
594 // Since the matrix is locally indexed on the calling process, we
595 // have to use its column Map (which it _must_ have in this case)
596 // to convert to global indices.
597 const auto row_beg = local_matrix.graph.row_map[export_lid];
598 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599 auto vals_in = subview (local_matrix.values,
600 Kokkos::make_pair (row_beg, row_end));
601 auto lids_in = subview (local_matrix.graph.entries,
602 Kokkos::make_pair (row_beg, row_end));
603 typedef local_map_type LMT;
604 typedef BufferDeviceType BDT;
605 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606 source_pids, vals_in, offset,
607 num_ent, num_bytes_per_value,
608 pack_pids);
609 int error_code_this_row = p.first;
610 size_t num_bytes_packed_this_row = p.second;
611 if (error_code_this_row != 0) {
612 if (dst.first != 0) { // keep only the first error
613 dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
614 }
615 }
616 else if (num_bytes_packed_this_row != num_bytes) {
617 if (dst.first != 0) { // keep only the first error
618 dst = Kokkos::make_pair (3, i);
619 }
620 }
621 }
622};
623
631template<class LocalMatrix, class LocalMap, class BufferDeviceType>
632void
633do_pack (const LocalMatrix& local_matrix,
634 const LocalMap& local_map,
635 const Kokkos::View<char*, BufferDeviceType>& exports,
636 const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
638 const typename PackTraits<int>::input_array_type& source_pids,
639 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640 const size_t num_bytes_per_value,
641 const bool pack_pids)
642{
643 using LO = typename LocalMap::local_ordinal_type;
644 using DT = typename LocalMatrix::device_type;
645 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646 const char prefix[] = "Tpetra::Details::do_pack: ";
647
648 if (export_lids.extent (0) != 0) {
649 TEUCHOS_TEST_FOR_EXCEPTION
650 (static_cast<size_t> (offsets.extent (0)) !=
651 static_cast<size_t> (export_lids.extent (0) + 1),
652 std::invalid_argument, prefix << "offsets.extent(0) = "
653 << offsets.extent (0) << " != export_lids.extent(0) (= "
654 << export_lids.extent (0) << ") + 1.");
655 TEUCHOS_TEST_FOR_EXCEPTION
656 (export_lids.extent (0) != num_packets_per_lid.extent (0),
657 std::invalid_argument, prefix << "export_lids.extent(0) = " <<
658 export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
659 << num_packets_per_lid.extent (0) << ".");
660 // If exports has nonzero length at this point, then the matrix
661 // has at least one entry to pack. Thus, if packing process
662 // ranks, we had better have at least one process rank to pack.
663 TEUCHOS_TEST_FOR_EXCEPTION
664 (pack_pids && exports.extent (0) != 0 &&
665 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
666 "pack_pids is true, and exports.extent(0) = " <<
667 exports.extent (0) << " != 0, meaning that we need to pack at "
668 "least one matrix entry, but source_pids.extent(0) = 0.");
669 }
670
671 using pack_functor_type =
672 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
673 pack_functor_type f (local_matrix, local_map, exports,
674 num_packets_per_lid, export_lids,
675 source_pids, offsets, num_bytes_per_value,
676 pack_pids);
677
678 typename pack_functor_type::value_type result;
679 range_type range (0, num_packets_per_lid.extent (0));
680 Kokkos::parallel_reduce (range, f, result);
681
682 if (result.first != 0) {
683 // We can't deep_copy from AnonymousSpace Views, so we can't print
684 // out any information from them in case of error.
685 TEUCHOS_TEST_FOR_EXCEPTION
686 (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
687 "reported error code " << result.first << " for the first "
688 "bad row " << result.second << ".");
689 }
690}
691
721template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
722void
724 Kokkos::DualView<char*, BufferDeviceType>& exports,
725 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
728 size_t& constant_num_packets,
729 const bool pack_pids)
730{
731 ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
732 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
733 "Import/Export"
734 );
735 using Kokkos::View;
736 typedef BufferDeviceType DT;
737 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738 const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739 constexpr bool debug = false;
740
741 auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
742 auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
743
744 // Setting this to zero tells the caller to expect a possibly
745 // different ("nonconstant") number of packets per local index
746 // (i.e., a possibly different number of entries per row).
747 constant_num_packets = 0;
748
749 const size_t num_export_lids =
750 static_cast<size_t> (export_lids.extent (0));
751 TEUCHOS_TEST_FOR_EXCEPTION
752 (num_export_lids !=
753 static_cast<size_t> (num_packets_per_lid.extent (0)),
754 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
755 << num_export_lids << " != num_packets_per_lid.extent(0) = "
756 << num_packets_per_lid.extent (0) << ".");
757 if (num_export_lids != 0) {
758 TEUCHOS_TEST_FOR_EXCEPTION
759 (num_packets_per_lid.data () == NULL, std::invalid_argument,
760 prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
761 "num_packets_per_lid.data() = "
762 << num_packets_per_lid.data () << " == NULL.");
763 }
764
765 const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
766 const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
767 const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
768
769 size_t num_bytes_per_value = 0;
771 // Assume ST is default constructible; packValueCount wants an instance.
772 num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
773 }
774 else {
775 // Since the packed data come from the source matrix, we can use
776 // the source matrix to get the number of bytes per Scalar value
777 // stored in the matrix. This assumes that all Scalar values in
778 // the source matrix require the same number of bytes. If the
779 // source matrix has no entries on the calling process, then we
780 // hope that some process does have some idea how big a Scalar
781 // value is. Of course, if no processes have any entries, then no
782 // values should be packed (though this does assume that in our
783 // packing scheme, rows with zero entries take zero bytes).
784 size_t num_bytes_per_value_l = 0;
785 if (local_matrix.values.extent(0) > 0) {
786 const ST& val = local_matrix.values(0);
787 num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
788 }
789 using Teuchos::reduceAll;
790 reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
791 Teuchos::REDUCE_MAX,
792 num_bytes_per_value_l,
793 Teuchos::outArg (num_bytes_per_value));
794 }
795
796 if (num_export_lids == 0) {
797 exports = exports_view_type ("exports", 0);
798 return;
799 }
800
801 // Array of offsets into the pack buffer.
802 Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
803
804 // Compute number of packets per LID (row to send), as well as
805 // corresponding offsets (the prefix sum of the packet counts).
806 const size_t count =
807 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808 local_matrix.graph.row_map, export_lids,
809 export_pids,
810 num_bytes_per_lid, num_bytes_per_gid,
811 num_bytes_per_pid, num_bytes_per_value);
812
813 // Resize the output pack buffer if needed.
814 if (count > static_cast<size_t> (exports.extent (0))) {
815 exports = exports_view_type ("exports", count);
816 if (debug) {
817 std::ostringstream os;
818 os << "*** exports resized to " << count << std::endl;
819 std::cerr << os.str ();
820 }
821 }
822 if (debug) {
823 std::ostringstream os;
824 os << "*** count: " << count << ", exports.extent(0): "
825 << exports.extent (0) << std::endl;
826 std::cerr << os.str ();
827 }
828
829 // If exports has nonzero length at this point, then the matrix has
830 // at least one entry to pack. Thus, if packing process ranks, we
831 // had better have at least one process rank to pack.
832 TEUCHOS_TEST_FOR_EXCEPTION
833 (pack_pids && exports.extent (0) != 0 &&
834 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
835 "pack_pids is true, and exports.extent(0) = " <<
836 exports.extent (0) << " != 0, meaning that we need to pack at least "
837 "one matrix entry, but export_pids.extent(0) = 0.");
838
839 typedef typename std::decay<decltype (local_matrix)>::type
840 local_matrix_device_type;
841 typedef typename std::decay<decltype (local_col_map)>::type
842 local_map_type;
843
844 exports.modify_device ();
845 auto exports_d = exports.view_device ();
846 do_pack<local_matrix_device_type, local_map_type, DT>
847 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
848 export_lids, export_pids, offsets, num_bytes_per_value,
849 pack_pids);
850 // If we got this far, we succeeded.
851}
852
853} // namespace PackCrsMatrixImpl
854
855template<typename ST, typename LO, typename GO, typename NT>
856void
858 Teuchos::Array<char>& exports,
859 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
860 const Teuchos::ArrayView<const LO>& exportLIDs,
861 size_t& constantNumPackets)
862{
863 using local_matrix_device_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type;
864 using device_type = typename local_matrix_device_type::device_type;
865 using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
866 using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
868
869 // Convert all Teuchos::Array to Kokkos::View
870
871 // This is an output array, so we don't have to copy to device here.
872 // However, we'll have to remember to copy back to host when done.
873 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
874 create_mirror_view_from_raw_host_array (buffer_device_type (),
875 numPacketsPerLID.getRawPtr (),
876 numPacketsPerLID.size (), false,
877 "num_packets_per_lid");
878 // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
879 // DualView through here, instead of recreating a device View from a
880 // host ArrayView that itself came from a DualView.
881 //
882 // This is an input array, so we have to copy to device here.
883 // However, we never need to copy it back to host.
884 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
885 create_mirror_view_from_raw_host_array (buffer_device_type (),
886 exportLIDs.getRawPtr (),
887 exportLIDs.size (), true,
888 "export_lids");
889
890 Kokkos::View<int*, device_type> export_pids_d; // output arg
891 Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
892 constexpr bool pack_pids = false;
893 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
894 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
895 export_pids_d, constantNumPackets, pack_pids);
896
897 // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
898 // copy them back to host.
899 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
900 (numPacketsPerLID.getRawPtr (),
901 numPacketsPerLID.size ());
902 Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
903
904 // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
905 // exports_dv above, then we have two host copies for exports_h.
906
907 // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
908 // to copy them back to host.
909 if (static_cast<size_t> (exports.size ()) !=
910 static_cast<size_t> (exports_dv.extent (0))) {
911 exports.resize (exports_dv.extent (0));
912 }
913 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
914 exports.size ());
915 Kokkos::deep_copy (exports_h, exports_dv.d_view);
916}
917
918template<typename ST, typename LO, typename GO, typename NT>
919void
921 const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
922 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
923 const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
924 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
925 size_t& constantNumPackets)
926{
927 using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
928 using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
929
930 // Create an empty array of PIDs, since the interface needs it.
931 Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
932 constexpr bool pack_pids = false;
933
934 // Write-only device access
935 auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
936 numPacketsPerLID_nc.clear_sync_state ();
937 numPacketsPerLID_nc.modify_device ();
938 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
939
940 // Read-only device access
941 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
942 auto exportLIDs_d = exportLIDs.view_device ();
943
944 ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
945 "Tpetra::Details::packCrsMatrixNew",
946 "Import/Export"
947 );
948 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
949 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
950 exportPIDs_d, constantNumPackets, pack_pids);
951}
952
953template<typename ST, typename LO, typename GO, typename NT>
954void
956 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
957 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
958 const Teuchos::ArrayView<const LO>& exportLIDs,
959 const Teuchos::ArrayView<const int>& sourcePIDs,
960 size_t& constantNumPackets)
961{
962 typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type local_matrix_device_type;
963 typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
964 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
966
967 typename local_matrix_device_type::device_type outputDevice;
968
969 const bool verbose = ::Tpetra::Details::Behavior::verbose ();
970 std::unique_ptr<std::string> prefix;
971 if (verbose) {
972 const int myRank = [&] () {
973 auto map = sourceMatrix.getMap ();
974 if (map.get () == nullptr) {
975 return -1;
976 }
977 auto comm = map->getComm ();
978 if (comm.get () == nullptr) {
979 return -2;
980 }
981 return comm->getRank ();
982 } ();
983 std::ostringstream os;
984 os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
985 prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
986
987 std::ostringstream os2;
988 os2 << *prefix << "start" << std::endl;
989 std::cerr << os2.str ();
990 }
991
992 // Convert all Teuchos::Array to Kokkos::View
993
994 // This is an output array, so we don't have to copy to device here.
995 // However, we'll have to remember to copy back to host when done.
996 auto num_packets_per_lid_d =
997 create_mirror_view_from_raw_host_array (buffer_device_type (),
998 numPacketsPerLID.getRawPtr (),
999 numPacketsPerLID.size (), false,
1000 "num_packets_per_lid");
1001
1002 // This is an input array, so we have to copy to device here.
1003 // However, we never need to copy it back to host.
1004 auto export_lids_d =
1005 create_mirror_view_from_raw_host_array (buffer_device_type (),
1006 exportLIDs.getRawPtr (),
1007 exportLIDs.size (), true,
1008 "export_lids");
1009 // This is an input array, so we have to copy to device here.
1010 // However, we never need to copy it back to host.
1011 auto export_pids_d =
1013 sourcePIDs.getRawPtr (),
1014 sourcePIDs.size (), true,
1015 "export_pids");
1016 constexpr bool pack_pids = true;
1017 try {
1018 PackCrsMatrixImpl::packCrsMatrix
1019 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1020 export_pids_d, constantNumPackets, pack_pids);
1021 }
1022 catch (std::exception& e) {
1023 if (verbose) {
1024 std::ostringstream os;
1025 os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1026 << e.what () << std::endl;
1027 std::cerr << os.str ();
1028 }
1029 throw;
1030 }
1031 catch (...) {
1032 if (verbose) {
1033 std::ostringstream os;
1034 os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1035 "not a subclass of std::exception" << std::endl;
1036 std::cerr << os.str ();
1037 }
1038 throw;
1039 }
1040
1041 if (numPacketsPerLID.size () != 0) {
1042 try {
1043 // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1044 // so we have to copy them back to host.
1045 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1046 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1047 Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1048 }
1049 catch (std::exception& e) {
1050 if (verbose) {
1051 std::ostringstream os;
1052 os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1053 std::cerr << os.str ();
1054 }
1055 throw;
1056 }
1057 catch (...) {
1058 if (verbose) {
1059 std::ostringstream os;
1060 os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1061 "of std::exception" << std::endl;
1062 std::cerr << os.str ();
1063 }
1064 throw;
1065 }
1066 }
1067
1068 if (verbose) {
1069 std::ostringstream os;
1070 os << *prefix << "done" << std::endl;
1071 std::cerr << os.str ();
1072 }
1073}
1074
1075} // namespace Details
1076} // namespace Tpetra
1077
1078#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1079 template void \
1080 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1081 Teuchos::Array<char>&, \
1082 const Teuchos::ArrayView<size_t>&, \
1083 const Teuchos::ArrayView<const LO>&, \
1084 size_t&); \
1085 template void \
1086 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1087 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1088 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1089 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1090 size_t&); \
1091 template void \
1092 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1093 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1094 const Teuchos::ArrayView<size_t>&, \
1095 const Teuchos::ArrayView<const LO>&, \
1096 const Teuchos::ArrayView<const int>&, \
1097 size_t&);
1098
1099#endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.