Tpetra parallel linear algebra Version of the Day
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DISTOBJECT_DEF_HPP
41#define TPETRA_DISTOBJECT_DEF_HPP
42
50
51#include "Tpetra_Distributor.hpp"
54#include "Tpetra_Details_checkGlobalError.hpp"
56#include "Tpetra_Util.hpp" // Details::createPrefix
57#include "Teuchos_CommHelpers.hpp"
58#include "Teuchos_TypeNameTraits.hpp"
59#include <typeinfo>
60#include <memory>
61#include <sstream>
62
63namespace Tpetra {
64
65 namespace { // (anonymous)
66 template<class DeviceType, class IndexType = size_t>
67 struct SumFunctor {
68 SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69 viewToSum_ (viewToSum) {}
70 KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
71 lclSum += viewToSum_(i);
72 }
73 Kokkos::View<const size_t*, DeviceType> viewToSum_;
74 };
75
76 template<class DeviceType, class IndexType = size_t>
77 size_t
78 countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
79 {
80 using Kokkos::parallel_reduce;
81 typedef DeviceType DT;
82 typedef typename DT::execution_space DES;
83 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
84
85 const IndexType numOut = numImportPacketsPerLID.extent (0);
86 size_t totalImportPackets = 0;
87 parallel_reduce ("Count import packets",
88 range_type (0, numOut),
89 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
90 totalImportPackets);
91 return totalImportPackets;
92 }
93 } // namespace (anonymous)
94
95
96 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
98 DistObject (const Teuchos::RCP<const map_type>& map) :
99 map_ (map)
100 {
101#ifdef HAVE_TPETRA_TRANSFER_TIMERS
102 using Teuchos::RCP;
103 using Teuchos::Time;
104 using Teuchos::TimeMonitor;
105
106 RCP<Time> doXferTimer =
107 TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
108 if (doXferTimer.is_null ()) {
109 doXferTimer =
110 TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
111 }
112 doXferTimer_ = doXferTimer;
113
114 RCP<Time> copyAndPermuteTimer =
115 TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
116 if (copyAndPermuteTimer.is_null ()) {
117 copyAndPermuteTimer =
118 TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
119 }
120 copyAndPermuteTimer_ = copyAndPermuteTimer;
121
122 RCP<Time> packAndPrepareTimer =
123 TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
124 if (packAndPrepareTimer.is_null ()) {
125 packAndPrepareTimer =
126 TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
127 }
128 packAndPrepareTimer_ = packAndPrepareTimer;
129
130 RCP<Time> doPostsAndWaitsTimer =
131 TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
132 if (doPostsAndWaitsTimer.is_null ()) {
133 doPostsAndWaitsTimer =
134 TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
135 }
136 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
137
138 RCP<Time> unpackAndCombineTimer =
139 TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
140 if (unpackAndCombineTimer.is_null ()) {
141 unpackAndCombineTimer =
142 TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
143 }
144 unpackAndCombineTimer_ = unpackAndCombineTimer;
145#endif // HAVE_TPETRA_TRANSFER_TIMERS
146 }
147
148 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
149 std::string
151 description () const
152 {
153 using Teuchos::TypeNameTraits;
154
155 std::ostringstream os;
156 os << "\"Tpetra::DistObject\": {"
157 << "Packet: " << TypeNameTraits<packet_type>::name ()
158 << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159 << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160 << ", Node: " << TypeNameTraits<Node>::name ();
161 if (this->getObjectLabel () != "") {
162 os << "Label: \"" << this->getObjectLabel () << "\"";
163 }
164 os << "}";
165 return os.str ();
166 }
167
168 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
169 void
171 describe (Teuchos::FancyOStream &out,
172 const Teuchos::EVerbosityLevel verbLevel) const
173 {
174 using Teuchos::rcpFromRef;
175 using Teuchos::TypeNameTraits;
176 using std::endl;
177 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178 Teuchos::VERB_LOW : verbLevel;
179 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180 const int myRank = comm.is_null () ? 0 : comm->getRank ();
181 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
182
183 if (vl != Teuchos::VERB_NONE) {
184 Teuchos::OSTab tab0 (out);
185 if (myRank == 0) {
186 out << "\"Tpetra::DistObject\":" << endl;
187 }
188 Teuchos::OSTab tab1 (out);
189 if (myRank == 0) {
190 out << "Template parameters:" << endl;
191 {
192 Teuchos::OSTab tab2 (out);
193 out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
194 << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195 << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196 << "Node: " << TypeNameTraits<node_type>::name () << endl;
197 }
198 if (this->getObjectLabel () != "") {
199 out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
200 }
201 } // if myRank == 0
202
203 // Describe the Map.
204 {
205 if (myRank == 0) {
206 out << "Map:" << endl;
207 }
208 Teuchos::OSTab tab2 (out);
209 map_->describe (out, vl);
210 }
211
212 // At verbosity > VERB_LOW, each process prints something.
213 if (vl > Teuchos::VERB_LOW) {
214 for (int p = 0; p < numProcs; ++p) {
215 if (myRank == p) {
216 out << "Process " << myRank << ":" << endl;
217 Teuchos::OSTab tab2 (out);
218 out << "Export buffer size (in packets): "
219 << exports_.extent (0)
220 << endl
221 << "Import buffer size (in packets): "
222 << imports_.extent (0)
223 << endl;
224 }
225 if (! comm.is_null ()) {
226 comm->barrier (); // give output time to finish
227 comm->barrier ();
228 comm->barrier ();
229 }
230 } // for each process rank p
231 } // if vl > VERB_LOW
232 } // if vl != VERB_NONE
233 }
234
235 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
236 void
238 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
239 {
240 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
241 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
242 }
243
244 /* These are provided in base DistObject template
245 template<class DistObjectType>
246 void
247 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
248 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
249 typename DistObjectType::global_ordinal_type,
250 typename DistObjectType::node_type> >& newMap)
251 {
252 input->removeEmptyProcessesInPlace (newMap);
253 if (newMap.is_null ()) { // my process is excluded
254 input = Teuchos::null;
255 }
256 }
257
258 template<class DistObjectType>
259 void
260 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
261 {
262 using Teuchos::RCP;
263 typedef typename DistObjectType::local_ordinal_type LO;
264 typedef typename DistObjectType::global_ordinal_type GO;
265 typedef typename DistObjectType::node_type NT;
266 typedef Map<LO, GO, NT> map_type;
267
268 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
269 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
270 }
271 */
272
273 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
274 void
276 doImport (const SrcDistObject& source,
278 const CombineMode CM,
279 const bool restrictedMode)
280 {
281 using Details::Behavior;
282 using std::endl;
283 const char modeString[] = "doImport (forward mode)";
284
285 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
286 // output to std::cerr on every MPI process. This is unwise for
287 // runs with large numbers of MPI processes.
288 const bool verbose = Behavior::verbose("DistObject");
289 std::unique_ptr<std::string> prefix;
290 if (verbose) {
291 prefix = this->createPrefix("DistObject", modeString);
292 std::ostringstream os;
293 os << *prefix << "Start" << endl;
294 std::cerr << os.str ();
295 }
296 this->beginImport(source, importer, CM, restrictedMode);
297 this->endImport(source, importer, CM, restrictedMode);
298 if (verbose) {
299 std::ostringstream os;
300 os << *prefix << "Done" << endl;
301 std::cerr << os.str ();
302 }
303 }
304
305 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
306 void
308 doExport (const SrcDistObject& source,
310 const CombineMode CM,
311 const bool restrictedMode)
312 {
313 using Details::Behavior;
314 using std::endl;
315 const char modeString[] = "doExport (forward mode)";
316
317 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
318 // output to std::cerr on every MPI process. This is unwise for
319 // runs with large numbers of MPI processes.
320 const bool verbose = Behavior::verbose("DistObject");
321 std::unique_ptr<std::string> prefix;
322 if (verbose) {
323 prefix = this->createPrefix("DistObject", modeString);
324 std::ostringstream os;
325 os << *prefix << "Start" << endl;
326 std::cerr << os.str ();
327 }
328 this->beginExport(source, exporter, CM, restrictedMode);
329 this->endExport(source, exporter, CM, restrictedMode);
330 if (verbose) {
331 std::ostringstream os;
332 os << *prefix << "Done" << endl;
333 std::cerr << os.str ();
334 }
335 }
336
337 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338 void
340 doImport (const SrcDistObject& source,
342 const CombineMode CM,
343 const bool restrictedMode)
344 {
345 using Details::Behavior;
346 using std::endl;
347 const char modeString[] = "doImport (reverse mode)";
348
349 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
350 // output to std::cerr on every MPI process. This is unwise for
351 // runs with large numbers of MPI processes.
352 const bool verbose = Behavior::verbose("DistObject");
353 std::unique_ptr<std::string> prefix;
354 if (verbose) {
355 prefix = this->createPrefix("DistObject", modeString);
356 std::ostringstream os;
357 os << *prefix << "Start" << endl;
358 std::cerr << os.str ();
359 }
360 this->beginImport(source, exporter, CM, restrictedMode);
361 this->endImport(source, exporter, CM, restrictedMode);
362 if (verbose) {
363 std::ostringstream os;
364 os << *prefix << "Done" << endl;
365 std::cerr << os.str ();
366 }
367 }
368
369 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
370 void
372 doExport (const SrcDistObject& source,
374 const CombineMode CM,
375 const bool restrictedMode)
376 {
377 using Details::Behavior;
378 using std::endl;
379 const char modeString[] = "doExport (reverse mode)";
380
381 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
382 // output to std::cerr on every MPI process. This is unwise for
383 // runs with large numbers of MPI processes.
384 const bool verbose = Behavior::verbose("DistObject");
385 std::unique_ptr<std::string> prefix;
386 if (verbose) {
387 prefix = this->createPrefix("DistObject", modeString);
388 std::ostringstream os;
389 os << *prefix << "Start" << endl;
390 std::cerr << os.str ();
391 }
392 this->beginExport(source, importer, CM, restrictedMode);
393 this->endExport(source, importer, CM, restrictedMode);
394 if (verbose) {
395 std::ostringstream os;
396 os << *prefix << "Done" << endl;
397 std::cerr << os.str ();
398 }
399 }
400
401 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
402 void
404 beginImport(const SrcDistObject& source,
406 const CombineMode CM,
407 const bool restrictedMode)
408 {
409 using Details::Behavior;
410 using std::endl;
411 const char modeString[] = "doImport (forward mode)";
412
413 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
414 // output to std::cerr on every MPI process. This is unwise for
415 // runs with large numbers of MPI processes.
416 const bool verbose = Behavior::verbose("DistObject");
417 std::unique_ptr<std::string> prefix;
418 if (verbose) {
419 prefix = this->createPrefix("DistObject", modeString);
420 std::ostringstream os;
421 os << *prefix << "Start" << endl;
422 std::cerr << os.str ();
423 }
424 this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
425 if (verbose) {
426 std::ostringstream os;
427 os << *prefix << "Done" << endl;
428 std::cerr << os.str ();
429 }
430 }
431
432 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
433 void
434 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
435 beginExport(const SrcDistObject& source,
436 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
437 const CombineMode CM,
438 const bool restrictedMode)
439 {
440 using Details::Behavior;
441 using std::endl;
442 const char modeString[] = "doExport (forward mode)";
443
444 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
445 // output to std::cerr on every MPI process. This is unwise for
446 // runs with large numbers of MPI processes.
447 const bool verbose = Behavior::verbose("DistObject");
448 std::unique_ptr<std::string> prefix;
449 if (verbose) {
450 prefix = this->createPrefix("DistObject", modeString);
451 std::ostringstream os;
452 os << *prefix << "Start" << endl;
453 std::cerr << os.str ();
454 }
455 this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
456 if (verbose) {
457 std::ostringstream os;
458 os << *prefix << "Done" << endl;
459 std::cerr << os.str ();
460 }
461 }
462
463 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
464 void
465 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
466 beginImport(const SrcDistObject& source,
467 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
468 const CombineMode CM,
469 const bool restrictedMode)
470 {
471 using Details::Behavior;
472 using std::endl;
473 const char modeString[] = "doImport (reverse mode)";
474
475 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
476 // output to std::cerr on every MPI process. This is unwise for
477 // runs with large numbers of MPI processes.
478 const bool verbose = Behavior::verbose("DistObject");
479 std::unique_ptr<std::string> prefix;
480 if (verbose) {
481 prefix = this->createPrefix("DistObject", modeString);
482 std::ostringstream os;
483 os << *prefix << "Start" << endl;
484 std::cerr << os.str ();
485 }
486 this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
487 if (verbose) {
488 std::ostringstream os;
489 os << *prefix << "Done" << endl;
490 std::cerr << os.str ();
491 }
492 }
493
494 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
495 void
496 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
497 beginExport(const SrcDistObject& source,
498 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
499 const CombineMode CM,
500 const bool restrictedMode)
501 {
502 using Details::Behavior;
503 using std::endl;
504 const char modeString[] = "doExport (reverse mode)";
505
506 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
507 // output to std::cerr on every MPI process. This is unwise for
508 // runs with large numbers of MPI processes.
509 const bool verbose = Behavior::verbose("DistObject");
510 std::unique_ptr<std::string> prefix;
511 if (verbose) {
512 prefix = this->createPrefix("DistObject", modeString);
513 std::ostringstream os;
514 os << *prefix << "Start" << endl;
515 std::cerr << os.str ();
516 }
517 this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
518 if (verbose) {
519 std::ostringstream os;
520 os << *prefix << "Done" << endl;
521 std::cerr << os.str ();
522 }
523 }
524
525 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
526 void
527 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
528 endImport(const SrcDistObject& source,
529 const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
530 const CombineMode CM,
531 const bool restrictedMode)
532 {
533 using Details::Behavior;
534 using std::endl;
535 const char modeString[] = "doImport (forward mode)";
536
537 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
538 // output to std::cerr on every MPI process. This is unwise for
539 // runs with large numbers of MPI processes.
540 const bool verbose = Behavior::verbose("DistObject");
541 std::unique_ptr<std::string> prefix;
542 if (verbose) {
543 prefix = this->createPrefix("DistObject", modeString);
544 std::ostringstream os;
545 os << *prefix << "Start" << endl;
546 std::cerr << os.str ();
547 }
548 this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
549 if (verbose) {
550 std::ostringstream os;
551 os << *prefix << "Done" << endl;
552 std::cerr << os.str ();
553 }
554 }
555
556 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
557 void
558 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
559 endExport(const SrcDistObject& source,
560 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
561 const CombineMode CM,
562 const bool restrictedMode)
563 {
564 using Details::Behavior;
565 using std::endl;
566 const char modeString[] = "doExport (forward mode)";
567
568 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
569 // output to std::cerr on every MPI process. This is unwise for
570 // runs with large numbers of MPI processes.
571 const bool verbose = Behavior::verbose("DistObject");
572 std::unique_ptr<std::string> prefix;
573 if (verbose) {
574 prefix = this->createPrefix("DistObject", modeString);
575 std::ostringstream os;
576 os << *prefix << "Start" << endl;
577 std::cerr << os.str ();
578 }
579 this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
580 if (verbose) {
581 std::ostringstream os;
582 os << *prefix << "Done" << endl;
583 std::cerr << os.str ();
584 }
585 }
586
587 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
588 void
589 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
590 endImport(const SrcDistObject& source,
591 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
592 const CombineMode CM,
593 const bool restrictedMode)
595 using Details::Behavior;
596 using std::endl;
597 const char modeString[] = "doImport (reverse mode)";
598
599 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
600 // output to std::cerr on every MPI process. This is unwise for
601 // runs with large numbers of MPI processes.
602 const bool verbose = Behavior::verbose("DistObject");
603 std::unique_ptr<std::string> prefix;
604 if (verbose) {
605 prefix = this->createPrefix("DistObject", modeString);
606 std::ostringstream os;
607 os << *prefix << "Start" << endl;
608 std::cerr << os.str ();
609 }
610 this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
611 if (verbose) {
612 std::ostringstream os;
613 os << *prefix << "Done" << endl;
614 std::cerr << os.str ();
615 }
616 }
617
618 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
619 void
620 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
621 endExport(const SrcDistObject& source,
622 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
623 const CombineMode CM,
624 const bool restrictedMode)
625 {
626 using Details::Behavior;
627 using std::endl;
628 const char modeString[] = "doExport (reverse mode)";
629
630 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
631 // output to std::cerr on every MPI process. This is unwise for
632 // runs with large numbers of MPI processes.
633 const bool verbose = Behavior::verbose("DistObject");
634 std::unique_ptr<std::string> prefix;
635 if (verbose) {
636 prefix = this->createPrefix("DistObject", modeString);
637 std::ostringstream os;
638 os << *prefix << "Start" << endl;
639 std::cerr << os.str ();
640 }
641 this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
642 if (verbose) {
643 std::ostringstream os;
644 os << *prefix << "Done" << endl;
645 std::cerr << os.str ();
646 }
647 }
648
649 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
650 bool
652 isDistributed () const {
653 return map_->isDistributed ();
654 }
655
656 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
657 size_t
660 return 0; // default implementation; subclasses may override
661 }
662
663 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
664 void
667 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
668 const char modeString[],
669 const ReverseOption revOp,
670 const CombineMode CM,
671 bool restrictedMode)
672 {
673 beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
674 endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
675 }
676
677 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
678 bool
680 reallocImportsIfNeeded (const size_t newSize,
681 const bool verbose,
682 const std::string* prefix,
683 const bool /*remoteLIDsContiguous*/,
684 const CombineMode /*CM*/)
685 {
686 if (verbose) {
687 std::ostringstream os;
688 os << *prefix << "Realloc (if needed) imports_ from "
689 << imports_.extent (0) << " to " << newSize << std::endl;
690 std::cerr << os.str ();
691 }
693 const bool reallocated =
694 reallocDualViewIfNeeded (this->imports_, newSize, "imports");
695 if (verbose) {
696 std::ostringstream os;
697 os << *prefix << "Finished realloc'ing imports_" << std::endl;
698 std::cerr << os.str ();
699 }
700 return reallocated;
701 }
702
703 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
704 bool
706 reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
707 const size_t numImportLIDs)
708 {
709 using Details::Behavior;
712 using std::endl;
713 // If an array is already allocated, and if is at least
714 // tooBigFactor times bigger than it needs to be, free it and
715 // reallocate to the size we need, in order to save space.
716 // Otherwise, take subviews to reduce allocation size.
717 constexpr size_t tooBigFactor = 10;
718
719 const bool verbose = Behavior::verbose("DistObject");
720 std::unique_ptr<std::string> prefix;
721 if (verbose) {
722 prefix = this->createPrefix("DistObject",
723 "reallocArraysForNumPacketsPerLid");
724 std::ostringstream os;
725 os << *prefix
726 << "numExportLIDs: " << numExportLIDs
727 << ", numImportLIDs: " << numImportLIDs
728 << endl;
729 os << *prefix << "DualView status before:" << endl
730 << *prefix
731 << dualViewStatusToString (this->numExportPacketsPerLID_,
732 "numExportPacketsPerLID_")
733 << endl
734 << *prefix
735 << dualViewStatusToString (this->numImportPacketsPerLID_,
736 "numImportPacketsPerLID_")
737 << endl;
738 std::cerr << os.str ();
739 }
740
741 // Reallocate numExportPacketsPerLID_ if needed.
742 const bool firstReallocated =
743 reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
744 numExportLIDs,
745 "numExportPacketsPerLID",
746 tooBigFactor,
747 true); // need fence before, if realloc'ing
748
749 // If we reallocated above, then we fenced after that
750 // reallocation. This means that we don't need to fence again,
751 // before the next reallocation.
752 const bool needFenceBeforeNextAlloc = ! firstReallocated;
753 const bool secondReallocated =
754 reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
755 numImportLIDs,
756 "numImportPacketsPerLID",
757 tooBigFactor,
758 needFenceBeforeNextAlloc);
759
760 if (verbose) {
761 std::ostringstream os;
762 os << *prefix << "DualView status after:" << endl
763 << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
764 "numExportPacketsPerLID_")
765 << endl
766 << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
767 "numImportPacketsPerLID_")
768 << endl;
769 std::cerr << os.str ();
770 }
771
772 return firstReallocated || secondReallocated;
774
775 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
776 void
779 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
780 const char modeString[],
781 const ReverseOption revOp,
782 const CombineMode CM,
783 bool restrictedMode)
784 {
785 using Details::Behavior;
789 using Kokkos::Compat::getArrayView;
790 using Kokkos::Compat::getConstArrayView;
791 using Kokkos::Compat::getKokkosViewDeepCopy;
792 using Kokkos::Compat::create_const_view;
793 using std::endl;
796 const char funcName[] = "Tpetra::DistObject::doTransfer";
797
798 ProfilingRegion region_doTransfer(funcName);
799 const bool verbose = Behavior::verbose("DistObject");
800 std::shared_ptr<std::string> prefix;
801 if (verbose) {
802 std::ostringstream os;
803 prefix = this->createPrefix("DistObject", "doTransfer");
804 os << *prefix << "Source type: " << Teuchos::typeName(src)
805 << ", Target type: " << Teuchos::typeName(*this) << endl;
806 std::cerr << os.str();
807 }
808
809 // "Restricted Mode" does two things:
810 // 1) Skips copyAndPermute
811 // 2) Allows the "target" Map of the transfer to be a subset of
812 // the Map of *this, in a "locallyFitted" sense.
813 //
814 // This cannot be used if #2 is not true, OR there are permutes.
815 // Source Maps still need to match
816
817 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
818 // checks. These may communicate more.
819 const bool debug = Behavior::debug("DistObject");
820 if (debug) {
821 if (! restrictedMode && revOp == DoForward) {
822 const bool myMapSameAsTransferTgtMap =
823 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
824 TEUCHOS_TEST_FOR_EXCEPTION
825 (! myMapSameAsTransferTgtMap, std::invalid_argument,
826 "Tpetra::DistObject::" << modeString << ": For forward-mode "
827 "communication, the target DistObject's Map must be the same "
828 "(in the sense of Tpetra::Map::isSameAs) as the input "
829 "Export/Import object's target Map.");
830 }
831 else if (! restrictedMode && revOp == DoReverse) {
832 const bool myMapSameAsTransferSrcMap =
833 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
834 TEUCHOS_TEST_FOR_EXCEPTION
835 (! myMapSameAsTransferSrcMap, std::invalid_argument,
836 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
837 "communication, the target DistObject's Map must be the same "
838 "(in the sense of Tpetra::Map::isSameAs) as the input "
839 "Export/Import object's source Map.");
840 }
841 else if (restrictedMode && revOp == DoForward) {
842 const bool myMapLocallyFittedTransferTgtMap =
843 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
844 TEUCHOS_TEST_FOR_EXCEPTION
845 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
846 "Tpetra::DistObject::" << modeString << ": For forward-mode "
847 "communication using restricted mode, Export/Import object's "
848 "target Map must be locally fitted (in the sense of "
849 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
851 else { // if (restrictedMode && revOp == DoReverse)
852 const bool myMapLocallyFittedTransferSrcMap =
853 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
854 TEUCHOS_TEST_FOR_EXCEPTION
855 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
856 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
857 "communication using restricted mode, Export/Import object's "
858 "source Map must be locally fitted (in the sense of "
859 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
860 }
861
862 // SrcDistObject need not even _have_ Maps. However, if the
863 // source object is a DistObject, it has a Map, and we may
864 // compare that Map with the Transfer's Maps.
865 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
866 if (srcDistObj != nullptr) {
867 if (revOp == DoForward) {
868 const bool srcMapSameAsImportSrcMap =
869 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
870 TEUCHOS_TEST_FOR_EXCEPTION
871 (! srcMapSameAsImportSrcMap, std::invalid_argument,
872 "Tpetra::DistObject::" << modeString << ": For forward-mode "
873 "communication, the source DistObject's Map must be the same "
874 "as the input Export/Import object's source Map.");
875 }
876 else { // revOp == DoReverse
877 const bool srcMapSameAsImportTgtMap =
878 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
879 TEUCHOS_TEST_FOR_EXCEPTION
880 (! srcMapSameAsImportTgtMap, std::invalid_argument,
881 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
882 "communication, the source DistObject's Map must be the same "
883 "as the input Export/Import object's target Map.");
884 }
885 }
886 }
887
888 const size_t numSameIDs = transfer.getNumSameIDs ();
889 Distributor& distor = transfer.getDistributor ();
890 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
891
892 TEUCHOS_TEST_FOR_EXCEPTION
893 (debug && restrictedMode &&
894 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
895 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
896 std::invalid_argument,
897 "Tpetra::DistObject::" << modeString << ": Transfer object "
898 "cannot have permutes in restricted mode.");
899
900 // Do we need all communication buffers to live on host?
901 const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
902 if (verbose) {
903 std::ostringstream os;
904 os << *prefix << "doTransfer: Use new interface; "
905 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
906 std::cerr << os.str ();
907 }
908
909 using const_lo_dv_type =
910 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
911 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
912 transfer.getPermuteToLIDs_dv () :
913 transfer.getPermuteFromLIDs_dv ();
914 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
915 transfer.getPermuteFromLIDs_dv () :
916 transfer.getPermuteToLIDs_dv ();
917 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
918 transfer.getRemoteLIDs_dv () :
919 transfer.getExportLIDs_dv ();
920 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
921 transfer.getExportLIDs_dv () :
922 transfer.getRemoteLIDs_dv ();
923 const bool canTryAliasing = (revOp == DoForward) ?
924 transfer.areRemoteLIDsContiguous() :
925 transfer.areExportLIDsContiguous();
926 // const bool canTryAliasing = false;
927
928 ProfilingRegion region_dTN(funcName);
929#ifdef HAVE_TPETRA_TRANSFER_TIMERS
930 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
931 // of Kokkos profiling.
932 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
933#endif // HAVE_TPETRA_TRANSFER_TIMERS
934
935 if (verbose) {
936 std::ostringstream os;
937 os << *prefix << "Input arguments:" << endl
938 << *prefix << " combineMode: " << combineModeToString (CM) << endl
939 << *prefix << " numSameIDs: " << numSameIDs << endl
940 << *prefix << " "
941 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
942 << *prefix << " "
943 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
944 << *prefix << " "
945 << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
946 << *prefix << " "
947 << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
948 << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
949 << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
950 std::cerr << os.str ();
951 }
952
953 {
954 ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
955 if (verbose) {
956 std::ostringstream os;
957 os << *prefix << "1. checkSizes" << endl;
958 std::cerr << os.str ();
959 }
960 const bool checkSizesResult = this->checkSizes (src);
961 TEUCHOS_TEST_FOR_EXCEPTION
962 (! checkSizesResult, std::invalid_argument,
963 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
964 "destination object is not a legal target for redistribution from the "
965 "source object. This probably means that they do not have the same "
966 "dimensions. For example, MultiVectors must have the same number of "
967 "rows and columns.");
968 }
969
970 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
971 // that if CM == INSERT || CM == REPLACE, the target object could
972 // be write only. We don't optimize for that here.
973
974 if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
975 // There is at least one GID to copy or permute.
976 if (verbose) {
977 std::ostringstream os;
978 os << *prefix << "2. copyAndPermute" << endl;
979 std::cerr << os.str ();
980 }
981 ProfilingRegion region_cp
982 ("Tpetra::DistObject::doTransferNew::copyAndPermute");
983#ifdef HAVE_TPETRA_TRANSFER_TIMERS
984 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
985 // of Kokkos profiling.
986 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
987#endif // HAVE_TPETRA_TRANSFER_TIMERS
988
989 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
990 // There is at least one GID to copy or permute.
991 if (verbose) {
992 std::ostringstream os;
993 os << *prefix << "2. copyAndPermute" << endl;
994 std::cerr << os.str ();
995 }
996 this->copyAndPermute (src, numSameIDs, permuteToLIDs,
997 permuteFromLIDs, CM);
998 if (verbose) {
999 std::ostringstream os;
1000 os << *prefix << "After copyAndPermute:" << endl
1001 << *prefix << " "
1002 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1003 << endl
1004 << *prefix << " "
1005 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1006 << endl;
1007 std::cerr << os.str ();
1008 }
1009 }
1010 }
1011
1012 // The method may return zero even if the implementation actually
1013 // does have a constant number of packets per LID. However, if it
1014 // returns nonzero, we may use this information to avoid
1015 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1016 // will set this to its final value.
1017 //
1018 // We only need this if CM != ZERO, but it has to be lifted out of
1019 // that scope because there are multiple tests for CM != ZERO.
1020 size_t constantNumPackets = this->constantNumberOfPackets ();
1021 if (verbose) {
1022 std::ostringstream os;
1023 os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1024 std::cerr << os.str ();
1025 }
1026
1027 // We only need to pack communication buffers if the combine mode
1028 // is not ZERO. A "ZERO combine mode" means that the results are
1029 // the same as if we had received all zeros, and added them to the
1030 // existing values. That means we don't need to communicate.
1031 if (CM != ZERO) {
1032 if (constantNumPackets == 0) {
1033 if (verbose) {
1034 std::ostringstream os;
1035 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1036 << endl;
1037 std::cerr << os.str ();
1038 }
1039 // This only reallocates if necessary, that is, if the sizes
1040 // don't match.
1041 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1042 remoteLIDs.extent (0));
1043 }
1044
1045 if (verbose) {
1046 std::ostringstream os;
1047 os << *prefix << "4. packAndPrepare: before, "
1048 << dualViewStatusToString (this->exports_, "exports_")
1049 << endl;
1050 std::cerr << os.str ();
1051 }
1052
1053 doPackAndPrepare(src, exportLIDs, constantNumPackets);
1054 if (commOnHost) {
1055 this->exports_.sync_host();
1056 }
1057 else {
1058 this->exports_.sync_device();
1059 }
1060
1061 if (verbose) {
1062 std::ostringstream os;
1063 os << *prefix << "5.1. After packAndPrepare, "
1064 << dualViewStatusToString (this->exports_, "exports_")
1065 << endl;
1066 std::cerr << os.str ();
1067 }
1068 } // if (CM != ZERO)
1069
1070 // We only need to send data if the combine mode is not ZERO.
1071 if (CM != ZERO) {
1072 if (constantNumPackets != 0) {
1073 // There are a constant number of packets per element. We
1074 // already know (from the number of "remote" (incoming)
1075 // elements) how many incoming elements we expect, so we can
1076 // resize the buffer accordingly.
1077 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1078 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1079 }
1080
1081 // Do we need to do communication (via doPostsAndWaits)?
1082 bool needCommunication = true;
1083
1084 // This may be NULL. It will be used below.
1085 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1086
1087 if (revOp == DoReverse && ! this->isDistributed ()) {
1088 needCommunication = false;
1089 }
1090 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1091 // is distributed requires a cast to DistObject. If it's not a
1092 // DistObject, then I'm not quite sure what to do. Perhaps it
1093 // would be more appropriate for SrcDistObject to have an
1094 // isDistributed() method. For now, I'll just assume that we
1095 // need to do communication unless the cast succeeds and the
1096 // source is not distributed.
1097 else if (revOp == DoForward && srcDistObj != NULL &&
1098 ! srcDistObj->isDistributed ()) {
1099 needCommunication = false;
1100 }
1101
1102 if (! needCommunication) {
1103 if (verbose) {
1104 std::ostringstream os;
1105 os << *prefix << "Comm not needed; skipping" << endl;
1106 std::cerr << os.str ();
1107 }
1108 }
1109 else {
1110 ProfilingRegion region_dpw
1111 ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1112#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1113 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1114 // favor of Kokkos profiling.
1115 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1116#endif // HAVE_TPETRA_TRANSFER_TIMERS
1117
1118 if (verbose) {
1119 std::ostringstream os;
1120 os << *prefix << "7.0. "
1121 << (revOp == DoReverse ? "Reverse" : "Forward")
1122 << " mode" << endl;
1123 std::cerr << os.str ();
1124 }
1125
1126 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1127 } // if (needCommunication)
1128 } // if (CM != ZERO)
1129 }
1130
1131 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1132 void
1133 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1134 endTransfer(const SrcDistObject& src,
1135 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1136 const char modeString[],
1137 const ReverseOption revOp,
1138 const CombineMode CM,
1139 bool restrictedMode)
1140 {
1141 using Details::Behavior;
1144 using Details::ProfilingRegion;
1145 using Kokkos::Compat::getArrayView;
1146 using Kokkos::Compat::getConstArrayView;
1147 using Kokkos::Compat::getKokkosViewDeepCopy;
1148 using Kokkos::Compat::create_const_view;
1149 using std::endl;
1151 using Details::ProfilingRegion;
1152 const char funcName[] = "Tpetra::DistObject::doTransfer";
1153
1154 ProfilingRegion region_doTransfer(funcName);
1155 const bool verbose = Behavior::verbose("DistObject");
1156 std::shared_ptr<std::string> prefix;
1157 if (verbose) {
1158 std::ostringstream os;
1159 prefix = this->createPrefix("DistObject", "doTransfer");
1160 os << *prefix << "Source type: " << Teuchos::typeName(src)
1161 << ", Target type: " << Teuchos::typeName(*this) << endl;
1162 std::cerr << os.str();
1163 }
1164
1165 // "Restricted Mode" does two things:
1166 // 1) Skips copyAndPermute
1167 // 2) Allows the "target" Map of the transfer to be a subset of
1168 // the Map of *this, in a "locallyFitted" sense.
1169 //
1170 // This cannot be used if #2 is not true, OR there are permutes.
1171 // Source Maps still need to match
1172
1173 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1174 // checks. These may communicate more.
1175 const bool debug = Behavior::debug("DistObject");
1176 if (debug) {
1177 if (! restrictedMode && revOp == DoForward) {
1178 const bool myMapSameAsTransferTgtMap =
1179 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1180 TEUCHOS_TEST_FOR_EXCEPTION
1181 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1182 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1183 "communication, the target DistObject's Map must be the same "
1184 "(in the sense of Tpetra::Map::isSameAs) as the input "
1185 "Export/Import object's target Map.");
1186 }
1187 else if (! restrictedMode && revOp == DoReverse) {
1188 const bool myMapSameAsTransferSrcMap =
1189 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1190 TEUCHOS_TEST_FOR_EXCEPTION
1191 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1192 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1193 "communication, the target DistObject's Map must be the same "
1194 "(in the sense of Tpetra::Map::isSameAs) as the input "
1195 "Export/Import object's source Map.");
1196 }
1197 else if (restrictedMode && revOp == DoForward) {
1198 const bool myMapLocallyFittedTransferTgtMap =
1199 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1200 TEUCHOS_TEST_FOR_EXCEPTION
1201 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1202 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1203 "communication using restricted mode, Export/Import object's "
1204 "target Map must be locally fitted (in the sense of "
1205 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1206 }
1207 else { // if (restrictedMode && revOp == DoReverse)
1208 const bool myMapLocallyFittedTransferSrcMap =
1209 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1210 TEUCHOS_TEST_FOR_EXCEPTION
1211 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1212 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1213 "communication using restricted mode, Export/Import object's "
1214 "source Map must be locally fitted (in the sense of "
1215 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1216 }
1217
1218 // SrcDistObject need not even _have_ Maps. However, if the
1219 // source object is a DistObject, it has a Map, and we may
1220 // compare that Map with the Transfer's Maps.
1221 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1222 if (srcDistObj != nullptr) {
1223 if (revOp == DoForward) {
1224 const bool srcMapSameAsImportSrcMap =
1225 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1226 TEUCHOS_TEST_FOR_EXCEPTION
1227 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1228 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1229 "communication, the source DistObject's Map must be the same "
1230 "as the input Export/Import object's source Map.");
1231 }
1232 else { // revOp == DoReverse
1233 const bool srcMapSameAsImportTgtMap =
1234 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1235 TEUCHOS_TEST_FOR_EXCEPTION
1236 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1237 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1238 "communication, the source DistObject's Map must be the same "
1239 "as the input Export/Import object's target Map.");
1240 }
1241 }
1242 }
1243
1244 Distributor& distor = transfer.getDistributor ();
1245 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1246
1247 TEUCHOS_TEST_FOR_EXCEPTION
1248 (debug && restrictedMode &&
1249 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1250 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1251 std::invalid_argument,
1252 "Tpetra::DistObject::" << modeString << ": Transfer object "
1253 "cannot have permutes in restricted mode.");
1254
1255 // Do we need all communication buffers to live on host?
1256 const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
1257 if (verbose) {
1258 std::ostringstream os;
1259 os << *prefix << "doTransfer: Use new interface; "
1260 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1261 std::cerr << os.str ();
1262 }
1263
1264 using const_lo_dv_type =
1265 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1266 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1267 transfer.getPermuteToLIDs_dv () :
1268 transfer.getPermuteFromLIDs_dv ();
1269 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1270 transfer.getPermuteFromLIDs_dv () :
1271 transfer.getPermuteToLIDs_dv ();
1272 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1273 transfer.getRemoteLIDs_dv () :
1274 transfer.getExportLIDs_dv ();
1275 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1276 transfer.getExportLIDs_dv () :
1277 transfer.getRemoteLIDs_dv ();
1278 const bool canTryAliasing = (revOp == DoForward) ?
1279 transfer.areRemoteLIDsContiguous() :
1280 transfer.areExportLIDsContiguous();
1281
1282 size_t constantNumPackets = this->constantNumberOfPackets ();
1283
1284 // We only need to send data if the combine mode is not ZERO.
1285 if (CM != ZERO) {
1286 if (constantNumPackets != 0) {
1287 // There are a constant number of packets per element. We
1288 // already know (from the number of "remote" (incoming)
1289 // elements) how many incoming elements we expect, so we can
1290 // resize the buffer accordingly.
1291 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1292 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1293 }
1294
1295 // Do we need to do communication (via doPostsAndWaits)?
1296 bool needCommunication = true;
1297
1298 // This may be NULL. It will be used below.
1299 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1300
1301 if (revOp == DoReverse && ! this->isDistributed ()) {
1302 needCommunication = false;
1303 }
1304 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1305 // is distributed requires a cast to DistObject. If it's not a
1306 // DistObject, then I'm not quite sure what to do. Perhaps it
1307 // would be more appropriate for SrcDistObject to have an
1308 // isDistributed() method. For now, I'll just assume that we
1309 // need to do communication unless the cast succeeds and the
1310 // source is not distributed.
1311 else if (revOp == DoForward && srcDistObj != NULL &&
1312 ! srcDistObj->isDistributed ()) {
1313 needCommunication = false;
1314 }
1315
1316 if (! needCommunication) {
1317 if (verbose) {
1318 std::ostringstream os;
1319 os << *prefix << "Comm not needed; skipping" << endl;
1320 std::cerr << os.str ();
1321 }
1322 }
1323 else {
1324 distributorActor_.doWaits(distributorPlan);
1325
1326 if (verbose) {
1327 std::ostringstream os;
1328 os << *prefix << "8. unpackAndCombine" << endl;
1329 std::cerr << os.str ();
1330 }
1331 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM);
1332 } // if (needCommunication)
1333 } // if (CM != ZERO)
1334
1335 if (verbose) {
1336 std::ostringstream os;
1337 os << *prefix << "9. Done!" << endl;
1338 std::cerr << os.str ();
1339 }
1340
1341 if (verbose) {
1342 std::ostringstream os;
1343 os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1344 std::cerr << os.str ();
1345 }
1346 }
1347
1348 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1349 void
1350 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1351 doPosts(const Details::DistributorPlan& distributorPlan,
1352 size_t constantNumPackets,
1353 bool commOnHost,
1354 std::shared_ptr<std::string> prefix,
1355 const bool canTryAliasing,
1356 const CombineMode CM)
1357 {
1360 using Kokkos::Compat::create_const_view;
1361 using std::endl;
1362
1363 const bool verbose = Details::Behavior::verbose("DistObject");
1364
1365 if (constantNumPackets == 0) { // variable num packets per LID
1366 if (verbose) {
1367 std::ostringstream os;
1368 os << *prefix << "7.1. Variable # packets / LID: first comm "
1369 << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1370 << endl;
1371 std::cerr << os.str ();
1372 }
1373 size_t totalImportPackets = 0;
1374 if (commOnHost) {
1375 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1376 this->numExportPacketsPerLID_.sync_host ();
1377 }
1378 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1379 this->numImportPacketsPerLID_.sync_host ();
1380 }
1381 this->numImportPacketsPerLID_.modify_host (); // out arg
1382 auto numExp_h =
1383 create_const_view (this->numExportPacketsPerLID_.view_host ());
1384 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1385
1386 // MPI communication happens here.
1387 if (verbose) {
1388 std::ostringstream os;
1389 os << *prefix << "Call doPostsAndWaits"
1390 << endl;
1391 std::cerr << os.str ();
1392 }
1393 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1394
1395 if (verbose) {
1396 std::ostringstream os;
1397 os << *prefix << "Count totalImportPackets" << std::endl;
1398 std::cerr << os.str ();
1399 }
1400 using the_dev_type = typename decltype (numImp_h)::device_type;
1401 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1402 }
1403 else { // ! commOnHost
1404 this->numExportPacketsPerLID_.sync_device ();
1405 this->numImportPacketsPerLID_.sync_device ();
1406 this->numImportPacketsPerLID_.modify_device (); // out arg
1407 auto numExp_d = create_const_view
1408 (this->numExportPacketsPerLID_.view_device ());
1409 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1410
1411 // MPI communication happens here.
1412 if (verbose) {
1413 std::ostringstream os;
1414 os << *prefix << "Call doPostsAndWaits"
1415 << endl;
1416 std::cerr << os.str ();
1417 }
1418 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1419
1420 if (verbose) {
1421 std::ostringstream os;
1422 os << *prefix << "Count totalImportPackets" << std::endl;
1423 std::cerr << os.str ();
1424 }
1425 using the_dev_type = typename decltype (numImp_d)::device_type;
1426 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1427 }
1428
1429 if (verbose) {
1430 std::ostringstream os;
1431 os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1432 std::cerr << os.str ();
1433 }
1434 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1435 prefix.get (), canTryAliasing, CM);
1436 if (verbose) {
1437 std::ostringstream os;
1438 os << *prefix << "7.3. Second comm" << std::endl;
1439 std::cerr << os.str ();
1440 }
1441
1442 // mfh 04 Feb 2019: Distributor expects the "num packets per
1443 // LID" arrays on host, so that it can issue MPI sends and
1444 // receives correctly.
1445 this->numExportPacketsPerLID_.sync_host ();
1446 this->numImportPacketsPerLID_.sync_host ();
1447
1448 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1449 // doReversePostsAndWaits currently want
1450 // numExportPacketsPerLID and numImportPacketsPerLID as
1451 // Teuchos::ArrayView, rather than as Kokkos::View.
1452 //
1453 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1454 // device. The above syncs might.
1455 auto numExportPacketsPerLID_av =
1456 getArrayViewFromDualView (this->numExportPacketsPerLID_);
1457 auto numImportPacketsPerLID_av =
1458 getArrayViewFromDualView (this->numImportPacketsPerLID_);
1459
1460 // imports_ is for output only, so we don't need to sync it
1461 // before marking it as modified. However, in order to
1462 // prevent spurious debug-mode errors (e.g., "modified on
1463 // both device and host"), we first need to clear its
1464 // "modified" flags.
1465 this->imports_.clear_sync_state ();
1466
1467 if (verbose) {
1468 std::ostringstream os;
1469 os << *prefix << "Comm on "
1470 << (commOnHost ? "host" : "device")
1471 << "; call doPosts" << endl;
1472 std::cerr << os.str ();
1473 }
1474
1475 if (commOnHost) {
1476 this->imports_.modify_host ();
1477 distributorActor_.doPosts
1478 (distributorPlan,
1479 create_const_view (this->exports_.view_host ()),
1480 numExportPacketsPerLID_av,
1481 this->imports_.view_host (),
1482 numImportPacketsPerLID_av);
1483 }
1484 else { // pack on device
1485 Kokkos::fence(); // for UVM
1486 this->imports_.modify_device ();
1487 distributorActor_.doPosts
1488 (distributorPlan,
1489 create_const_view (this->exports_.view_device ()),
1490 numExportPacketsPerLID_av,
1491 this->imports_.view_device (),
1492 numImportPacketsPerLID_av);
1493 }
1494 }
1495 else { // constant number of packets per LID
1496 if (verbose) {
1497 std::ostringstream os;
1498 os << *prefix << "7.1. Const # packets per LID: " << endl
1499 << *prefix << " "
1500 << dualViewStatusToString (this->exports_, "exports_")
1501 << endl
1502 << *prefix << " "
1503 << dualViewStatusToString (this->exports_, "imports_")
1504 << endl;
1505 std::cerr << os.str ();
1506 }
1507 // imports_ is for output only, so we don't need to sync it
1508 // before marking it as modified. However, in order to
1509 // prevent spurious debug-mode errors (e.g., "modified on
1510 // both device and host"), we first need to clear its
1511 // "modified" flags.
1512 this->imports_.clear_sync_state ();
1513
1514 if (verbose) {
1515 std::ostringstream os;
1516 os << *prefix << "7.2. Comm on "
1517 << (commOnHost ? "host" : "device")
1518 << "; call doPosts" << endl;
1519 std::cerr << os.str ();
1520 }
1521 if (commOnHost) {
1522 this->imports_.modify_host ();
1523 distributorActor_.doPosts
1524 (distributorPlan,
1525 create_const_view (this->exports_.view_host ()),
1526 constantNumPackets,
1527 this->imports_.view_host ());
1528 }
1529 else { // pack on device
1530 Kokkos::fence(); // for UVM
1531 this->imports_.modify_device ();
1532 distributorActor_.doPosts
1533 (distributorPlan,
1534 create_const_view (this->exports_.view_device ()),
1535 constantNumPackets,
1536 this->imports_.view_device ());
1537 } // commOnHost
1538 } // constant or variable num packets per LID
1539 }
1540
1541 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1542 void
1543 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1544 doPackAndPrepare(const SrcDistObject& src,
1545 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1546 size_t& constantNumPackets)
1547 {
1548 using Details::ProfilingRegion;
1549 using std::endl;
1550 const bool debug = Details::Behavior::debug("DistObject");
1551
1552 ProfilingRegion region_pp
1553 ("Tpetra::DistObject::doTransferNew::packAndPrepare");
1554#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1555 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1556 // favor of Kokkos profiling.
1557 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1558#endif // HAVE_TPETRA_TRANSFER_TIMERS
1559
1560 // Ask the source to pack data. Also ask it whether there are
1561 // a constant number of packets per element
1562 // (constantNumPackets is an output argument). If there are,
1563 // constantNumPackets will come back nonzero. Otherwise, the
1564 // source will fill the numExportPacketsPerLID_ array.
1565
1566 // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1567 // Alternately, make packAndPrepare take a "commOnHost"
1568 // argument to tell it where to leave the data?
1569 //
1570 // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1571 // the freedom to pack and unpack either on host or device.
1572 // We should prefer sync'ing only on demand. Thus, we can
1573 // answer the above question: packAndPrepare should not
1574 // take a commOnHost argument, and doTransferNew should sync
1575 // where needed, if needed.
1576 if (debug) {
1577 std::ostringstream lclErrStrm;
1578 bool lclSuccess = false;
1579 try {
1580 this->packAndPrepare (src, exportLIDs, this->exports_,
1581 this->numExportPacketsPerLID_,
1582 constantNumPackets);
1583 lclSuccess = true;
1584 }
1585 catch (std::exception& e) {
1586 lclErrStrm << "packAndPrepare threw an exception: "
1587 << endl << e.what();
1588 }
1589 catch (...) {
1590 lclErrStrm << "packAndPrepare threw an exception "
1591 "not a subclass of std::exception.";
1592 }
1593 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1594 "threw an exception in packAndPrepare on "
1595 "one or more processes in the DistObject's communicator.";
1596 auto comm = getMap()->getComm();
1597 Details::checkGlobalError(std::cerr, lclSuccess,
1598 lclErrStrm.str().c_str(),
1599 gblErrMsgHeader, *comm);
1600 }
1601 else {
1602 this->packAndPrepare (src, exportLIDs, this->exports_,
1603 this->numExportPacketsPerLID_,
1604 constantNumPackets);
1605 }
1606 }
1607
1608 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1609 void
1610 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1611 doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1612 size_t constantNumPackets,
1613 CombineMode CM)
1614 {
1615 using Details::ProfilingRegion;
1616 using std::endl;
1617 const bool debug = Details::Behavior::debug("DistObject");
1618
1619 ProfilingRegion region_uc
1620 ("Tpetra::DistObject::doTransferNew::unpackAndCombine");
1621#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1622 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1623 // favor of Kokkos profiling.
1624 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1625#endif // HAVE_TPETRA_TRANSFER_TIMERS
1626
1627 if (debug) {
1628 std::ostringstream lclErrStrm;
1629 bool lclSuccess = false;
1630 try {
1631 this->unpackAndCombine (remoteLIDs, this->imports_,
1632 this->numImportPacketsPerLID_,
1633 constantNumPackets, CM);
1634 lclSuccess = true;
1635 }
1636 catch (std::exception& e) {
1637 lclErrStrm << "unpackAndCombine threw an exception: "
1638 << endl << e.what();
1639 }
1640 catch (...) {
1641 lclErrStrm << "unpackAndCombine threw an exception "
1642 "not a subclass of std::exception.";
1643 }
1644 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1645 "threw an exception in unpackAndCombine on "
1646 "one or more processes in the DistObject's communicator.";
1647 auto comm = getMap()->getComm();
1648 Details::checkGlobalError(std::cerr, lclSuccess,
1649 lclErrStrm.str().c_str(),
1650 gblErrMsgHeader, *comm);
1651 }
1652 else {
1653 this->unpackAndCombine (remoteLIDs, this->imports_,
1654 this->numImportPacketsPerLID_,
1655 constantNumPackets, CM);
1656 }
1657 }
1658
1659 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1660 void
1663 (const SrcDistObject&,
1664 const size_t,
1665 const Kokkos::DualView<
1666 const local_ordinal_type*,
1668 const Kokkos::DualView<
1669 const local_ordinal_type*,
1671 const CombineMode CM)
1672 {}
1673
1674 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1675 void
1678 (const SrcDistObject&,
1679 const Kokkos::DualView<
1680 const local_ordinal_type*,
1682 Kokkos::DualView<
1683 packet_type*,
1685 Kokkos::DualView<
1686 size_t*,
1688 size_t&)
1689 {}
1690
1691 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1692 void
1695 (const Kokkos::DualView<
1696 const local_ordinal_type*,
1697 buffer_device_type>& /* importLIDs */,
1698 Kokkos::DualView<
1699 packet_type*,
1700 buffer_device_type> /* imports */,
1701 Kokkos::DualView<
1702 size_t*,
1703 buffer_device_type> /* numPacketsPerLID */,
1704 const size_t /* constantNumPackets */,
1705 const CombineMode /* combineMode */)
1706 {}
1707
1708
1709 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1710 void
1712 print (std::ostream& os) const
1713 {
1714 using Teuchos::FancyOStream;
1715 using Teuchos::getFancyOStream;
1716 using Teuchos::RCP;
1717 using Teuchos::rcpFromRef;
1718 using std::endl;
1719
1720 RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1721 this->describe (*out, Teuchos::VERB_DEFAULT);
1722 }
1723
1724 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1725 std::unique_ptr<std::string>
1727 createPrefix(const char className[],
1728 const char methodName[]) const
1729 {
1730 auto map = this->getMap();
1731 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1732 return Details::createPrefix(
1733 comm.getRawPtr(), className, methodName);
1734 }
1735
1736 template<class DistObjectType>
1737 void
1739 Teuchos::RCP<DistObjectType>& input,
1740 const Teuchos::RCP<const Map<
1741 typename DistObjectType::local_ordinal_type,
1742 typename DistObjectType::global_ordinal_type,
1743 typename DistObjectType::node_type>>& newMap)
1744 {
1745 input->removeEmptyProcessesInPlace (newMap);
1746 if (newMap.is_null ()) { // my process is excluded
1747 input = Teuchos::null;
1748 }
1749 }
1750
1751 template<class DistObjectType>
1752 void
1753 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1754 {
1755 auto newMap = input->getMap ()->removeEmptyProcesses ();
1756 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1757 }
1758
1759// Explicit instantiation macro for general DistObject.
1760#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1761 template class DistObject< SCALAR , LO , GO , NODE >;
1762
1763// Explicit instantiation macro for DistObject<char, ...>.
1764// The "SLGN" stuff above doesn't work for Packet=char.
1765#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1766 template class DistObject< char , LO , GO , NODE >;
1767
1768} // namespace Tpetra
1769
1770#endif // TPETRA_DISTOBJECT_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
LocalOrdinal local_ordinal_type
The type of local indices.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
typename ::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.