Tpetra parallel linear algebra Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_CRSGRAPH_DEF_HPP
41#define TPETRA_CRSGRAPH_DEF_HPP
42
45
50#include "Tpetra_Details_getGraphDiagOffsets.hpp"
51#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52#include "Tpetra_Details_makeColMap.hpp"
56#include "Tpetra_Distributor.hpp"
57#include "Teuchos_SerialDenseMatrix.hpp"
58#include "Tpetra_Vector.hpp"
61#include "Tpetra_Details_packCrsGraph.hpp"
62#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63#include "Tpetra_Details_CrsPadding.hpp"
64#include "Tpetra_Util.hpp"
65#include <algorithm>
66#include <limits>
67#include <map>
68#include <sstream>
69#include <string>
70#include <type_traits>
71#include <utility>
72#include <vector>
73
74namespace Tpetra {
75 namespace Details {
76 namespace Impl {
77
78 template<class MapIter>
79 void
80 verbosePrintMap(std::ostream& out,
81 MapIter beg,
82 MapIter end,
83 const size_t numEnt,
84 const char mapName[])
85 {
86 using ::Tpetra::Details::Behavior;
88
89 out << mapName << ": {";
90 const size_t maxNumToPrint =
92 if (maxNumToPrint == 0) {
93 if (numEnt != 0) {
94 out << "...";
95 }
96 }
97 else {
98 const size_t numToPrint = numEnt > maxNumToPrint ?
99 maxNumToPrint : numEnt;
100 size_t count = 0;
101 for (MapIter it = beg; it != end; ++it) {
102 out << "(" << (*it).first << ", ";
103 verbosePrintArray(out, (*it).second, "gblColInds",
104 maxNumToPrint);
105 out << ")";
106 if (count + size_t(1) < numToPrint) {
107 out << ", ";
108 }
109 ++count;
110 }
111 if (count < numEnt) {
112 out << ", ...";
113 }
114 }
115 out << "}";
116 }
117
118 template<class LO, class GO, class Node>
119 Teuchos::ArrayView<GO>
120 getRowGraphGlobalRow(
121 std::vector<GO>& gblColIndsStorage,
122 const RowGraph<LO, GO, Node>& graph,
123 const GO gblRowInd)
124 {
125 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126 if (gblColIndsStorage.size() < origNumEnt) {
127 gblColIndsStorage.resize(origNumEnt);
128 }
129 typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130 origNumEnt);
131 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133 return retval;
134 }
135
136 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137 class ConvertColumnIndicesFromGlobalToLocal {
138 public:
139 ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140 const ::Kokkos::View<const GO*, DT>& gblColInds,
141 const ::Kokkos::View<const OffsetType*, DT>& ptr,
142 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144 lclColInds_ (lclColInds),
145 gblColInds_ (gblColInds),
146 ptr_ (ptr),
147 lclColMap_ (lclColMap),
148 numRowEnt_ (numRowEnt)
149 {}
150
151 KOKKOS_FUNCTION void
152 operator () (const LO& lclRow, OffsetType& curNumBad) const
153 {
154 const OffsetType offset = ptr_(lclRow);
155 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156 // of entries in a row to LO, as long as the row doesn't have
157 // too many duplicate entries.
158 const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159 for (LO j = 0; j < numEnt; ++j) {
160 const GO gid = gblColInds_(offset + j);
161 const LO lid = lclColMap_.getLocalElement (gid);
162 lclColInds_(offset + j) = lid;
163 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164 ++curNumBad;
165 }
166 }
167 }
168
169 static OffsetType
170 run (const ::Kokkos::View<LO*, DT>& lclColInds,
171 const ::Kokkos::View<const GO*, DT>& gblColInds,
172 const ::Kokkos::View<const OffsetType*, DT>& ptr,
173 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175 {
176 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178
179 const LO lclNumRows = ptr.extent (0) == 0 ?
180 static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181 OffsetType numBad = 0;
182 // Count of "bad" column indices is a reduction over rows.
183 ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184 functor_type (lclColInds, gblColInds, ptr,
185 lclColMap, numRowEnt),
186 numBad);
187 return numBad;
188 }
189
190 private:
191 ::Kokkos::View<LO*, DT> lclColInds_;
192 ::Kokkos::View<const GO*, DT> gblColInds_;
193 ::Kokkos::View<const OffsetType*, DT> ptr_;
195 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196 };
197
198 } // namespace Impl
199
214 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215 OffsetType
216 convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217 const Kokkos::View<const GO*, DT>& gblColInds,
218 const Kokkos::View<const OffsetType*, DT>& ptr,
219 const LocalMap<LO, GO, DT>& lclColMap,
220 const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221 {
222 using Impl::ConvertColumnIndicesFromGlobalToLocal;
223 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224 return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225 }
226
227 template<class ViewType, class LO>
228 class MaxDifference {
229 public:
230 MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231
232 KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233 dst = 0;
234 }
235
236 KOKKOS_INLINE_FUNCTION void
237 join (volatile LO& dst, const volatile LO& src) const
238 {
239 dst = (src > dst) ? src : dst;
240 }
241
242 KOKKOS_INLINE_FUNCTION void
243 operator () (const LO lclRow, LO& maxNumEnt) const
244 {
245 const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247 }
248 private:
249 typename ViewType::const_type ptr_;
250 };
251
252 template<class ViewType, class LO>
253 typename ViewType::non_const_value_type
254 maxDifference (const char kernelLabel[],
255 const ViewType& ptr,
256 const LO lclNumRows)
257 {
258 if (lclNumRows == 0) {
259 // mfh 07 May 2018: Weirdly, I need this special case,
260 // otherwise I get the wrong answer.
261 return static_cast<LO> (0);
262 }
263 else {
264 using execution_space = typename ViewType::execution_space;
265 using range_type = Kokkos::RangePolicy<execution_space, LO>;
266 LO theMaxNumEnt {0};
267 Kokkos::parallel_reduce (kernelLabel,
268 range_type (0, lclNumRows),
269 MaxDifference<ViewType, LO> (ptr),
270 theMaxNumEnt);
271 return theMaxNumEnt;
272 }
273 }
274
275 } // namespace Details
276
277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
278 bool
279 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
280 getDebug() {
281 return Details::Behavior::debug("CrsGraph");
282 }
283
284 template <class LocalOrdinal, class GlobalOrdinal, class Node>
285 bool
286 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
287 getVerbose() {
288 return Details::Behavior::verbose("CrsGraph");
289 }
290
291 template <class LocalOrdinal, class GlobalOrdinal, class Node>
293 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294 const size_t maxNumEntriesPerRow,
295 const ProfileType /* pftype */,
296 const Teuchos::RCP<Teuchos::ParameterList>& params) :
297 dist_object_type (rowMap)
298 , rowMap_ (rowMap)
299 , numAllocForAllRows_ (maxNumEntriesPerRow)
300 {
301 const char tfecfFuncName[] =
302 "CrsGraph(rowMap,maxNumEntriesPerRow,pftype,params): ";
303 staticAssertions ();
304 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
305 (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
306 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
307 "a valid size_t value, which in this case means it must not be "
308 "Teuchos::OrdinalTraits<size_t>::invalid().");
309 resumeFill (params);
311 }
312
313 template <class LocalOrdinal, class GlobalOrdinal, class Node>
315 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
316 const Teuchos::RCP<const map_type>& colMap,
317 const size_t maxNumEntriesPerRow,
318 const ProfileType /* pftype */,
319 const Teuchos::RCP<Teuchos::ParameterList>& params) :
320 dist_object_type (rowMap)
321 , rowMap_ (rowMap)
322 , colMap_ (colMap)
323 , numAllocForAllRows_ (maxNumEntriesPerRow)
324 {
325 const char tfecfFuncName[] =
326 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,pftype,params): ";
327 staticAssertions ();
328 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
329 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
330 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
331 "a valid size_t value, which in this case means it must not be "
332 "Teuchos::OrdinalTraits<size_t>::invalid().");
333 resumeFill (params);
335 }
336
337 template <class LocalOrdinal, class GlobalOrdinal, class Node>
339 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
340 const Teuchos::ArrayView<const size_t>& numEntPerRow,
341 const ProfileType /* pftype */,
342 const Teuchos::RCP<Teuchos::ParameterList>& params) :
343 dist_object_type (rowMap)
344 , rowMap_ (rowMap)
345 , numAllocForAllRows_ (0)
346 {
347 const char tfecfFuncName[] =
348 "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
349 staticAssertions ();
350
351 const size_t lclNumRows = rowMap.is_null () ?
352 static_cast<size_t> (0) : rowMap->getNodeNumElements ();
353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
354 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
355 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
356 << " != the local number of rows " << lclNumRows << " as specified by "
357 "the input row Map.");
358
359 if (debug_) {
360 for (size_t r = 0; r < lclNumRows; ++r) {
361 const size_t curRowCount = numEntPerRow[r];
362 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
363 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
364 std::invalid_argument, "numEntPerRow(" << r << ") "
365 "specifies an invalid number of entries "
366 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
367 }
368 }
369
370 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
371 // The latter is a const View, so we have to copy into a nonconst
372 // View first, then assign.
373 typedef decltype (k_numAllocPerRow_) out_view_type;
374 typedef typename out_view_type::non_const_type nc_view_type;
375 typedef Kokkos::View<const size_t*,
376 typename nc_view_type::array_layout,
377 Kokkos::HostSpace,
378 Kokkos::MemoryUnmanaged> in_view_type;
379 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
380 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
381 lclNumRows);
382 Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
383 k_numAllocPerRow_ = numAllocPerRowOut;
384
385 resumeFill (params);
386 checkInternalState ();
387 }
388
389
390
391 template <class LocalOrdinal, class GlobalOrdinal, class Node>
393 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395 const ProfileType /* pftype */,
396 const Teuchos::RCP<Teuchos::ParameterList>& params) :
397 dist_object_type (rowMap)
398 , rowMap_ (rowMap)
399 , k_numAllocPerRow_ (numEntPerRow.h_view)
400 , numAllocForAllRows_ (0)
401 {
402 const char tfecfFuncName[] =
403 "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
404 staticAssertions ();
405
406 const size_t lclNumRows = rowMap.is_null () ?
407 static_cast<size_t> (0) : rowMap->getNodeNumElements ();
408 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
409 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
410 std::invalid_argument, "numEntPerRow has length " <<
411 numEntPerRow.extent (0) << " != the local number of rows " <<
412 lclNumRows << " as specified by " "the input row Map.");
413
414 if (debug_) {
415 for (size_t r = 0; r < lclNumRows; ++r) {
416 const size_t curRowCount = numEntPerRow.h_view(r);
417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
418 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
419 std::invalid_argument, "numEntPerRow(" << r << ") "
420 "specifies an invalid number of entries "
421 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
422 }
423 }
424
425 resumeFill (params);
427 }
428
429
430 template <class LocalOrdinal, class GlobalOrdinal, class Node>
432 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
433 const Teuchos::RCP<const map_type>& colMap,
434 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
435 const ProfileType /* pftype */,
436 const Teuchos::RCP<Teuchos::ParameterList>& params) :
437 dist_object_type (rowMap)
438 , rowMap_ (rowMap)
439 , colMap_ (colMap)
440 , k_numAllocPerRow_ (numEntPerRow.h_view)
441 , numAllocForAllRows_ (0)
442 {
443 const char tfecfFuncName[] =
444 "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
445 staticAssertions ();
446
447 const size_t lclNumRows = rowMap.is_null () ?
448 static_cast<size_t> (0) : rowMap->getNodeNumElements ();
449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
450 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
451 std::invalid_argument, "numEntPerRow has length " <<
452 numEntPerRow.extent (0) << " != the local number of rows " <<
453 lclNumRows << " as specified by " "the input row Map.");
454
455 if (debug_) {
456 for (size_t r = 0; r < lclNumRows; ++r) {
457 const size_t curRowCount = numEntPerRow.h_view(r);
458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
459 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
460 std::invalid_argument, "numEntPerRow(" << r << ") "
461 "specifies an invalid number of entries "
462 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
463 }
464 }
465
466 resumeFill (params);
468 }
469
470
471 template <class LocalOrdinal, class GlobalOrdinal, class Node>
473 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
474 const Teuchos::RCP<const map_type>& colMap,
475 const Teuchos::ArrayView<const size_t>& numEntPerRow,
476 const ProfileType /* pftype */,
477 const Teuchos::RCP<Teuchos::ParameterList>& params) :
478 dist_object_type (rowMap)
479 , rowMap_ (rowMap)
480 , colMap_ (colMap)
481 , numAllocForAllRows_ (0)
482 {
483 const char tfecfFuncName[] =
484 "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
485 staticAssertions ();
486
487 const size_t lclNumRows = rowMap.is_null () ?
488 static_cast<size_t> (0) : rowMap->getNodeNumElements ();
489 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
490 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
491 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
492 << " != the local number of rows " << lclNumRows << " as specified by "
493 "the input row Map.");
494
495 if (debug_) {
496 for (size_t r = 0; r < lclNumRows; ++r) {
497 const size_t curRowCount = numEntPerRow[r];
498 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
499 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
500 std::invalid_argument, "numEntPerRow(" << r << ") "
501 "specifies an invalid number of entries "
502 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
503 }
504 }
505
506 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
507 // The latter is a const View, so we have to copy into a nonconst
508 // View first, then assign.
509 typedef decltype (k_numAllocPerRow_) out_view_type;
510 typedef typename out_view_type::non_const_type nc_view_type;
511 typedef Kokkos::View<const size_t*,
512 typename nc_view_type::array_layout,
513 Kokkos::HostSpace,
514 Kokkos::MemoryUnmanaged> in_view_type;
515 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
516 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
517 lclNumRows);
518 Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
519 k_numAllocPerRow_ = numAllocPerRowOut;
520
521 resumeFill (params);
523 }
524
525
526 template <class LocalOrdinal, class GlobalOrdinal, class Node>
529 const Teuchos::RCP<const map_type>& rowMap,
530 const Teuchos::RCP<Teuchos::ParameterList>& params) :
531 dist_object_type (rowMap)
532 , rowMap_(rowMap)
533 , colMap_(originalGraph.colMap_)
534 , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
535 , storageStatus_(originalGraph.storageStatus_)
536 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
537 , indicesAreLocal_(originalGraph.indicesAreLocal_)
538 , indicesAreSorted_(originalGraph.indicesAreSorted_)
539 {
540 staticAssertions();
541
542 int numRows = rowMap->getNodeNumElements();
543 size_t numNonZeros = originalGraph.rowPtrsPacked_host_(numRows);
544 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
545
546 rowPtrsUnpacked_dev_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_dev_, rowsToUse);
547 rowPtrsUnpacked_host_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_host_, rowsToUse);
549 rowPtrsPacked_dev_ = Kokkos::subview(originalGraph.rowPtrsPacked_dev_, rowsToUse);
550 rowPtrsPacked_host_ = Kokkos::subview(originalGraph.rowPtrsPacked_host_, rowsToUse);
551
552 if (indicesAreLocal_) {
553 lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
554 lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
555 }
556 else {
557 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
558 }
559
561 }
562
563 template <class LocalOrdinal, class GlobalOrdinal, class Node>
565 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
566 const Teuchos::RCP<const map_type>& colMap,
567 const typename local_graph_device_type::row_map_type& rowPointers,
568 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
569 const Teuchos::RCP<Teuchos::ParameterList>& params) :
570 dist_object_type (rowMap)
571 , rowMap_(rowMap)
572 , colMap_(colMap)
573 , numAllocForAllRows_(0)
574 , storageStatus_(Details::STORAGE_1D_PACKED)
575 , indicesAreAllocated_(true)
576 , indicesAreLocal_(true)
577 {
578 staticAssertions ();
579 if (! params.is_null() && params->isParameter("sorted") &&
580 ! params->get<bool>("sorted")) {
581 indicesAreSorted_ = false;
582 }
583 else {
584 indicesAreSorted_ = true;
585 }
586 setAllIndices (rowPointers, columnIndices);
588 }
589
590 template <class LocalOrdinal, class GlobalOrdinal, class Node>
592 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
593 const Teuchos::RCP<const map_type>& colMap,
594 const Teuchos::ArrayRCP<size_t>& rowPointers,
595 const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
596 const Teuchos::RCP<Teuchos::ParameterList>& params) :
597 dist_object_type (rowMap)
598 , rowMap_ (rowMap)
599 , colMap_ (colMap)
600 , numAllocForAllRows_ (0)
601 , storageStatus_ (Details::STORAGE_1D_PACKED)
602 , indicesAreAllocated_ (true)
603 , indicesAreLocal_ (true)
604 {
605 staticAssertions ();
606 if (! params.is_null() && params->isParameter("sorted") &&
607 ! params->get<bool>("sorted")) {
608 indicesAreSorted_ = false;
609 }
610 else {
611 indicesAreSorted_ = true;
612 }
613 setAllIndices (rowPointers, columnIndices);
615 }
616
617 template <class LocalOrdinal, class GlobalOrdinal, class Node>
619 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
620 const Teuchos::RCP<const map_type>& colMap,
621 const local_graph_device_type& k_local_graph_,
622 const Teuchos::RCP<Teuchos::ParameterList>& params)
623 : CrsGraph (k_local_graph_,
624 rowMap,
625 colMap,
626 Teuchos::null,
627 Teuchos::null,
628 params)
629 {}
630
631 template <class LocalOrdinal, class GlobalOrdinal, class Node>
633 CrsGraph (const local_graph_device_type& k_local_graph_,
634 const Teuchos::RCP<const map_type>& rowMap,
635 const Teuchos::RCP<const map_type>& colMap,
636 const Teuchos::RCP<const map_type>& domainMap,
637 const Teuchos::RCP<const map_type>& rangeMap,
638 const Teuchos::RCP<Teuchos::ParameterList>& params)
639 : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
640 , rowMap_ (rowMap)
641 , colMap_ (colMap)
642 , numAllocForAllRows_ (0)
643 , storageStatus_ (Details::STORAGE_1D_PACKED)
644 , indicesAreAllocated_ (true)
645 , indicesAreLocal_ (true)
646 {
647 staticAssertions();
648 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
649
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
651 colMap.is_null (), std::runtime_error,
652 ": The input column Map must be nonnull.");
653 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
654 k_local_graph_.numRows () != rowMap->getNodeNumElements (),
655 std::runtime_error,
656 ": The input row Map and the input local graph need to have the same "
657 "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
658 << " row(s), but the local graph claims " << k_local_graph_.numRows ()
659 << " row(s).");
660
661 // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
662 // rowMap_->getNodeNumElements(), but it doesn't have to.
663 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
664 // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
665 // ": The input row Map and the input local graph need to have the same "
666 // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
667 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
668 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
669 lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
670 ": cannot have 1D data structures allocated.");
671
672 if(! params.is_null() && params->isParameter("sorted") &&
673 ! params->get<bool>("sorted")) {
674 indicesAreSorted_ = false;
676 else {
677 indicesAreSorted_ = true;
678 }
680 setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
681 rangeMap .is_null() ? rowMap_ : rangeMap);
682 Teuchos::Array<int> remotePIDs (0); // unused output argument
683 this->makeImportExport (remotePIDs, false);
684
685 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
687 this->setRowPtrsUnpacked(k_local_graph_.row_map);
688 this->setRowPtrsPacked(k_local_graph_.row_map);
689
690 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
691
692 const bool callComputeGlobalConstants = params.get () == nullptr ||
693 params->get ("compute global constants", true);
694
695 if (callComputeGlobalConstants) {
696 this->computeGlobalConstants ();
697 }
698 this->fillComplete_ = true;
699 this->checkInternalState ();
700 }
701
702 template <class LocalOrdinal, class GlobalOrdinal, class Node>
704 CrsGraph (const local_graph_device_type& lclGraph,
705 const Teuchos::RCP<const map_type>& rowMap,
706 const Teuchos::RCP<const map_type>& colMap,
707 const Teuchos::RCP<const map_type>& domainMap,
708 const Teuchos::RCP<const map_type>& rangeMap,
709 const Teuchos::RCP<const import_type>& importer,
710 const Teuchos::RCP<const export_type>& exporter,
711 const Teuchos::RCP<Teuchos::ParameterList>& params) :
712 DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
713 rowMap_ (rowMap),
714 colMap_ (colMap),
715 rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
716 domainMap_ (domainMap.is_null () ? rowMap : domainMap),
717 importer_ (importer),
718 exporter_ (exporter),
719 numAllocForAllRows_ (0),
720 storageStatus_ (Details::STORAGE_1D_PACKED),
721 indicesAreAllocated_ (true),
722 indicesAreLocal_ (true)
723 {
724 staticAssertions();
725 const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
726 "Map,Map,Map,Map,Import,Export,params): ";
727
728 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
729 (colMap.is_null (), std::runtime_error,
730 "The input column Map must be nonnull.");
731
732 lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
733 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
734 setRowPtrsUnpacked(lclGraph.row_map);
735 setRowPtrsPacked(lclGraph.row_map);
736
737 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
738
739 if (! params.is_null() && params->isParameter("sorted") &&
740 ! params->get<bool>("sorted")) {
741 indicesAreSorted_ = false;
742 }
743 else {
744 indicesAreSorted_ = true;
745 }
746
747 const bool callComputeGlobalConstants =
748 params.get () == nullptr ||
749 params->get ("compute global constants", true);
750 if (callComputeGlobalConstants) {
751 this->computeGlobalConstants ();
752 }
753 fillComplete_ = true;
754 checkInternalState ();
755 }
756
757 template <class LocalOrdinal, class GlobalOrdinal, class Node>
758 Teuchos::RCP<const Teuchos::ParameterList>
760 getValidParameters () const
762 using Teuchos::RCP;
763 using Teuchos::ParameterList;
764 using Teuchos::parameterList;
765
766 RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
767
768 // Make a sublist for the Import.
769 RCP<ParameterList> importSublist = parameterList ("Import");
770
771 // FIXME (mfh 02 Apr 2012) We should really have the Import and
772 // Export objects fill in these lists. However, we don't want to
773 // create an Import or Export unless we need them. For now, we
774 // know that the Import and Export just pass the list directly to
775 // their Distributor, so we can create a Distributor here
776 // (Distributor's constructor is a lightweight operation) and have
777 // it fill in the list.
778
779 // Fill in Distributor default parameters by creating a
780 // Distributor and asking it to do the work.
781 Distributor distributor (rowMap_->getComm (), importSublist);
782 params->set ("Import", *importSublist, "How the Import performs communication.");
783
784 // Make a sublist for the Export. For now, it's a clone of the
785 // Import sublist. It's not a shallow copy, though, since we
786 // might like the Import to do communication differently than the
787 // Export.
788 params->set ("Export", *importSublist, "How the Export performs communication.");
789
790 return params;
791 }
792
793 template <class LocalOrdinal, class GlobalOrdinal, class Node>
794 void
796 setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
797 {
798 Teuchos::RCP<const Teuchos::ParameterList> validParams =
799 getValidParameters ();
800 params->validateParametersAndSetDefaults (*validParams);
801 this->setMyParamList (params);
802 }
803
804 template <class LocalOrdinal, class GlobalOrdinal, class Node>
807 getGlobalNumRows () const
808 {
809 return rowMap_->getGlobalNumElements ();
810 }
811
812 template <class LocalOrdinal, class GlobalOrdinal, class Node>
815 getGlobalNumCols () const
816 {
817 const char tfecfFuncName[] = "getGlobalNumCols: ";
818 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
819 ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
820 "The graph does not have a domain Map. You may not call this method in "
821 "that case.");
822 return getDomainMap ()->getGlobalNumElements ();
823 }
824
825 template <class LocalOrdinal, class GlobalOrdinal, class Node>
826 size_t
828 getNodeNumRows () const
829 {
830 return this->rowMap_.is_null () ?
831 static_cast<size_t> (0) :
832 this->rowMap_->getNodeNumElements ();
833 }
834
835 template <class LocalOrdinal, class GlobalOrdinal, class Node>
836 size_t
838 getNodeNumCols () const
839 {
840 const char tfecfFuncName[] = "getNodeNumCols: ";
841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
842 ! hasColMap (), std::runtime_error,
843 "The graph does not have a column Map. You may not call this method "
844 "unless the graph has a column Map. This requires either that a custom "
845 "column Map was given to the constructor, or that fillComplete() has "
846 "been called.");
847 return colMap_.is_null () ? static_cast<size_t> (0) :
848 colMap_->getNodeNumElements ();
849 }
850
851
852
853 template <class LocalOrdinal, class GlobalOrdinal, class Node>
854 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
856 getRowMap () const
857 {
858 return rowMap_;
859 }
860
861 template <class LocalOrdinal, class GlobalOrdinal, class Node>
862 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
864 getColMap () const
865 {
866 return colMap_;
867 }
869 template <class LocalOrdinal, class GlobalOrdinal, class Node>
870 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
872 getDomainMap () const
873 {
874 return domainMap_;
875 }
876
877 template <class LocalOrdinal, class GlobalOrdinal, class Node>
878 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
880 getRangeMap () const
881 {
882 return rangeMap_;
883 }
884
885 template <class LocalOrdinal, class GlobalOrdinal, class Node>
886 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
888 getImporter () const
889 {
890 return importer_;
891 }
892
893 template <class LocalOrdinal, class GlobalOrdinal, class Node>
894 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
896 getExporter () const
897 {
898 return exporter_;
900
901 template <class LocalOrdinal, class GlobalOrdinal, class Node>
902 bool
904 hasColMap () const
905 {
906 return ! colMap_.is_null ();
907 }
908
909 template <class LocalOrdinal, class GlobalOrdinal, class Node>
910 bool
913 {
914 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
915 // getNodeNumRows() is zero?
917 const bool isOpt = indicesAreAllocated_ &&
918 k_numRowEntries_.extent (0) == 0 &&
919 getNodeNumRows () > 0;
920
921 return isOpt;
923
924 template <class LocalOrdinal, class GlobalOrdinal, class Node>
927 getProfileType () const
929 return StaticProfile;
930 }
932 template <class LocalOrdinal, class GlobalOrdinal, class Node>
937 const char tfecfFuncName[] = "getGlobalNumEntries: ";
938 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
939 (! this->haveGlobalConstants_, std::logic_error,
940 "The graph does not have global constants computed, "
941 "but the user has requested them.");
943 return globalNumEntries_;
944 }
946 template <class LocalOrdinal, class GlobalOrdinal, class Node>
947 size_t
949 getNodeNumEntries () const
951 typedef LocalOrdinal LO;
952
953 if (this->indicesAreAllocated_) {
954 const LO lclNumRows = this->getNodeNumRows ();
955 if (lclNumRows == 0) {
956 return static_cast<size_t> (0);
957 }
958 else {
959 // Avoid the "*this capture" issue by creating a local Kokkos::View.
960 auto numEntPerRow = this->k_numRowEntries_;
961 const LO numNumEntPerRow = numEntPerRow.extent (0);
962 if (numNumEntPerRow == 0) {
963 if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
964 static_cast<LO> (lclNumRows + 1)) {
965 return static_cast<size_t> (0);
966 }
967 else {
968 return this->rowPtrsPacked_host_(lclNumRows);
970 }
971 else { // k_numRowEntries_ is populated
972 // k_numRowEntries_ is actually be a host View, so we run
973 // the sum in its native execution space. This also means
974 // that we can use explicit capture (which could perhaps
975 // improve build time) instead of KOKKOS_LAMBDA, and avoid
976 // any CUDA build issues with trying to run a __device__ -
977 // only function on host.
978 typedef typename num_row_entries_type::execution_space
979 host_exec_space;
980 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
981
982 const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
983 lclNumRows :
984 numNumEntPerRow;
985 size_t nodeNumEnt = 0;
986 Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
987 range_type (0, upperLoopBound),
988 [=] (const LO& k, size_t& lclSum) {
989 lclSum += numEntPerRow(k);
990 }, nodeNumEnt);
991 return nodeNumEnt;
992 }
993 }
994 }
995 else { // nothing allocated on this process, so no entries
996 return static_cast<size_t> (0);
997 }
998 }
999
1000 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1005 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1007 (! this->haveGlobalConstants_, std::logic_error,
1008 "The graph does not have global constants computed, "
1009 "but the user has requested them.");
1010
1011 return globalMaxNumRowEntries_;
1012 }
1014 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1015 size_t
1018 {
1019 return nodeMaxNumRowEntries_;
1020 }
1021
1022 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1023 bool
1025 isFillComplete () const
1026 {
1027 return fillComplete_;
1028 }
1029
1030 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031 bool
1033 isFillActive () const
1034 {
1035 return ! fillComplete_;
1036 }
1038
1039 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040 bool
1042 isLocallyIndexed () const
1044 return indicesAreLocal_;
1045 }
1046
1047 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048 bool
1050 isGloballyIndexed () const
1051 {
1052 return indicesAreGlobal_;
1053 }
1054
1055 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056 size_t
1059 {
1060 typedef LocalOrdinal LO;
1061
1062 if (this->indicesAreAllocated_) {
1063 const LO lclNumRows = this->getNodeNumRows ();
1064 if (lclNumRows == 0) {
1065 return static_cast<size_t> (0);
1066 }
1067 else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1068 if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
1069 static_cast<LO> (lclNumRows + 1)) {
1070 return static_cast<size_t> (0);
1071 }
1072 else {
1073 return this->rowPtrsPacked_host_(lclNumRows);
1074 }
1075 }
1076 else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1077 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1078 return static_cast<size_t> (0);
1080 else {
1081 return rowPtrsUnpacked_host_(lclNumRows);
1083 }
1084 else {
1085 return static_cast<size_t> (0);
1086 }
1087 }
1088 else {
1089 return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1090 }
1091 }
1092
1093 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1094 Teuchos::RCP<const Teuchos::Comm<int> >
1096 getComm () const
1097 {
1098 return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1099 }
1100
1101 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1102 GlobalOrdinal
1104 getIndexBase () const
1105 {
1106 return rowMap_->getIndexBase ();
1107 }
1108
1109 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1110 bool
1112 indicesAreAllocated () const
1113 {
1114 return indicesAreAllocated_;
1115 }
1116
1117 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1118 bool
1120 isSorted () const
1121 {
1122 return indicesAreSorted_;
1123 }
1124
1125 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126 bool
1128 isMerged () const
1129 {
1130 return noRedundancies_;
1132
1133 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1134 void
1137 {
1138 // FIXME (mfh 07 May 2013) How do we know that the change
1139 // introduced a redundancy, or even that it invalidated the sorted
1140 // order of indices? CrsGraph has always made this conservative
1141 // guess. It could be a bit costly to check at insertion time,
1142 // though.
1143 indicesAreSorted_ = false;
1144 noRedundancies_ = false;
1145
1146 // We've modified the graph, so we'll have to recompute local
1147 // constants like the number of diagonal entries on this process.
1148 haveLocalConstants_ = false;
1149 }
1150
1151 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1152 void
1154 allocateIndices (const ELocalGlobal lg, const bool verbose)
1155 {
1157 using Teuchos::arcp;
1158 using Teuchos::Array;
1159 using Teuchos::ArrayRCP;
1160 using std::endl;
1161 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1162 typedef typename local_graph_device_type::row_map_type::non_const_type
1163 non_const_row_map_type;
1164 const char tfecfFuncName[] = "allocateIndices: ";
1165 const char suffix[] =
1166 " Please report this bug to the Tpetra developers.";
1167 ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1169 std::unique_ptr<std::string> prefix;
1170 if (verbose) {
1171 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1172 std::ostringstream os;
1173 os << *prefix << "Start: lg="
1174 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1175 << ", numRows: " << this->getNodeNumRows() << endl;
1176 std::cerr << os.str();
1177 }
1178
1179 // This is a protected function, only callable by us. If it was
1180 // called incorrectly, it is our fault. That's why the tests
1181 // below throw std::logic_error instead of std::invalid_argument.
1182 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1183 (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1184 ": The graph is locally indexed, but Tpetra code is calling "
1185 "this method with lg=GlobalIndices." << suffix);
1186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1187 (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1188 ": The graph is globally indexed, but Tpetra code is calling "
1189 "this method with lg=LocalIndices." << suffix);
1190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1191 (indicesAreAllocated (), std::logic_error, ": The graph's "
1192 "indices are already allocated, but Tpetra is calling "
1193 "allocateIndices again." << suffix);
1194 const size_t numRows = this->getNodeNumRows ();
1195
1196 //
1197 // STATIC ALLOCATION PROFILE
1198 //
1199 {
1200 if (verbose) {
1201 std::ostringstream os;
1202 os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1203 std::cerr << os.str();
1205 non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1206
1207 if (this->k_numAllocPerRow_.extent (0) != 0) {
1208 // It's OK to throw std::invalid_argument here, because we
1209 // haven't incurred any side effects yet. Throwing that
1210 // exception (and not, say, std::logic_error) implies that the
1211 // instance can recover.
1212 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1213 (this->k_numAllocPerRow_.extent (0) != numRows,
1214 std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1215 "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1216 << ", but its length != numRows = " << numRows << ".");
1217
1218 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1219 // we want to compute here) lives on device. That's OK;
1220 // computeOffsetsFromCounts can handle this case.
1222
1223 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1224 // doesn't attempt to check its input for "invalid" flag
1225 // values. For now, we omit that feature of the sequential
1226 // code disabled below.
1227 computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1228 }
1229 else {
1230 // It's OK to throw std::invalid_argument here, because we
1231 // haven't incurred any side effects yet. Throwing that
1232 // exception (and not, say, std::logic_error) implies that the
1233 // instance can recover.
1234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1235 (this->numAllocForAllRows_ ==
1236 Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1237 std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1238 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1239 Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1240
1242 computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1243 }
1244
1245 // "Commit" the resulting row offsets.
1246 setRowPtrsUnpacked(k_rowPtrs);
1247 }
1248
1249 const size_type numInds = rowPtrsUnpacked_host_(numRows);
1250 if (lg == LocalIndices) {
1251 if (verbose) {
1252 std::ostringstream os;
1253 os << *prefix << "Allocate local column indices "
1254 "lclIndsUnpacked_wdv: " << numInds << endl;
1255 std::cerr << os.str();
1256 }
1257 lclIndsUnpacked_wdv = local_inds_wdv_type (
1258 local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1259 }
1260 else {
1261 if (verbose) {
1262 std::ostringstream os;
1263 os << *prefix << "Allocate global column indices "
1264 "gblInds_wdv: " << numInds << endl;
1265 std::cerr << os.str();
1266 }
1267 gblInds_wdv = global_inds_wdv_type (
1268 global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1269 }
1270 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1271
1272 this->indicesAreLocal_ = (lg == LocalIndices);
1273 this->indicesAreGlobal_ = (lg == GlobalIndices);
1274
1275 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1276 using Kokkos::ViewAllocateWithoutInitializing;
1277 typedef decltype (k_numRowEntries_) row_ent_type;
1278 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1279 if (verbose) {
1280 std::ostringstream os;
1281 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1282 << endl;
1283 std::cerr << os.str();
1284 }
1285 row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1286 Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1287 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1288 }
1289
1290 // Once indices are allocated, CrsGraph needs to free this information.
1291 this->numAllocForAllRows_ = 0;
1292 this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1293 this->indicesAreAllocated_ = true;
1294
1295 try {
1296 this->checkInternalState ();
1297 }
1298 catch (std::logic_error& e) {
1299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1300 (true, std::logic_error, "At end of allocateIndices, "
1301 "checkInternalState threw std::logic_error: "
1302 << e.what ());
1303 }
1304 catch (std::exception& e) {
1305 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1306 (true, std::runtime_error, "At end of allocateIndices, "
1307 "checkInternalState threw std::exception: "
1308 << e.what ());
1310 catch (...) {
1311 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1312 (true, std::runtime_error, "At end of allocateIndices, "
1313 "checkInternalState threw an exception "
1314 "not a subclass of std::exception.");
1315 }
1316
1317 if (verbose) {
1318 std::ostringstream os;
1319 os << *prefix << "Done" << endl;
1320 std::cerr << os.str();
1321 }
1322 }
1323
1324 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1325 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1326 local_inds_dualv_type::t_host::const_type
1328 getLocalIndsViewHost (const RowInfo& rowinfo) const
1329 {
1330 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1331 return typename local_inds_dualv_type::t_host::const_type ();
1332 else
1333 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1334 rowinfo.allocSize,
1335 Access::ReadOnly);
1336 }
1337
1338 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1340 local_inds_dualv_type::t_host
1342 getLocalIndsViewHostNonConst (const RowInfo& rowinfo)
1343 {
1344 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1345 return typename local_inds_dualv_type::t_host ();
1346 else
1347 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1348 rowinfo.allocSize,
1349 Access::ReadWrite);
1350 }
1351
1352 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1354 global_inds_dualv_type::t_host::const_type
1356 getGlobalIndsViewHost (const RowInfo& rowinfo) const
1357 {
1358 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1359 return typename global_inds_dualv_type::t_host::const_type ();
1360 else
1361 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1362 rowinfo.allocSize,
1363 Access::ReadOnly);
1364 }
1365
1366 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1368 local_inds_dualv_type::t_dev::const_type
1370 getLocalIndsViewDevice (const RowInfo& rowinfo) const
1371 {
1372 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1373 return typename local_inds_dualv_type::t_dev::const_type ();
1374 else
1375 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1376 rowinfo.allocSize,
1377 Access::ReadOnly);
1378 }
1379
1380 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1382 global_inds_dualv_type::t_dev::const_type
1384 getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1385 {
1386 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1387 return typename global_inds_dualv_type::t_dev::const_type ();
1388 else
1389 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1390 rowinfo.allocSize,
1391 Access::ReadOnly);
1392 }
1393
1394#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1395 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1396 Teuchos::ArrayView<const LocalOrdinal>
1398 getLocalView (const RowInfo& rowinfo) const
1399 {
1400 using Kokkos::subview;
1401 typedef LocalOrdinal LO;
1402
1403 if (rowinfo.allocSize == 0) {
1404 return Teuchos::ArrayView<const LO> ();
1405 }
1406 else {
1407 if (lclIndsUnpacked_wdv.extent (0) != 0) { // 1-D storage
1408 const size_t start = rowinfo.offset1D;
1409 const size_t len = rowinfo.allocSize;
1410 const std::pair<size_t, size_t> rng (start, start + len);
1411 // mfh 23 Nov 2015: Don't just create a subview of
1412 // lclIndsUnpacked_wdv directly, because that first creates a
1413 // _managed_ subview, then returns an unmanaged version of
1414 // that. That touches the reference count, which costs
1415 // performance in a measurable way.
1416 // KDDKDD Function is deprecated; we ignore the unmanaged bit above.
1417 // KDDKDD Breaks the reference counting paradigm; reference to
1418 // KDDKDD host view is lost.
1419 auto rowViewHost = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
1420 auto rowView = subview(rowViewHost, rng);
1421 const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1422 return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1423 }
1424 else {
1425 return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1426 }
1427 }
1428 }
1430
1431 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1432 Teuchos::ArrayView<const GlobalOrdinal>
1434 getGlobalView (const RowInfo& rowinfo) const
1435 {
1436 using GO = global_ordinal_type;
1437
1438 Teuchos::ArrayView<const GO> view;
1439 if (rowinfo.allocSize > 0 && gblInds_wdv.extent (0) != 0) {
1440 const auto rng =
1441 std::make_pair (rowinfo.offset1D,
1442 rowinfo.offset1D + rowinfo.allocSize);
1443 // mfh 23 Nov 2015: Don't just create a subview of
1444 // gblInds_wdv directly, because that first creates a
1445 // _managed_ subview, then returns an unmanaged version of
1446 // that. That touches the reference count, which costs
1447 // performance in a measurable way.
1448 // KDDKDD This method is deprecated; we ignore the unmanaged bit above
1449 // KDDKDD Breaks the reference counting paradigm; unmanaged
1450 // KDDKDD memory does not do reference counting
1451 auto gblInds = gblInds_wdv.getHostView(Access::ReadOnly);
1452 using Kokkos::Compat::getConstArrayView;
1453 using Kokkos::subview;
1454 view = getConstArrayView (subview (gblInds, rng));
1455 }
1456 return view;
1457 }
1458#endif // TPETRA_ENABLE_DEPRECATED_CODE
1459
1460 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1461 RowInfo
1463 getRowInfo (const LocalOrdinal myRow) const
1464 {
1465 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1467 if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1468 ret.localRow = STINV;
1469 ret.allocSize = 0;
1470 ret.numEntries = 0;
1471 ret.offset1D = STINV;
1472 return ret;
1473 }
1474
1475 ret.localRow = static_cast<size_t> (myRow);
1476 if (this->indicesAreAllocated ()) {
1477 // Offsets tell us the allocation size in this case.
1478 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1479 ret.offset1D = 0;
1480 ret.allocSize = 0;
1481 }
1482 else {
1483 ret.offset1D = rowPtrsUnpacked_host_(myRow);
1484 ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1485 }
1486
1487 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1488 ret.allocSize :
1489 this->k_numRowEntries_(myRow);
1490 }
1491 else { // haven't performed allocation yet; probably won't hit this code
1492 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1493 // allocate, rather than doing lazy allocation at first insert.
1494 // This will make k_numAllocPerRow_ obsolete.
1495 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1496 this->k_numAllocPerRow_(myRow) : // this is a host View
1497 this->numAllocForAllRows_;
1498 ret.numEntries = 0;
1499 ret.offset1D = STINV;
1500 }
1501
1502 return ret;
1503 }
1504
1505
1506 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1507 RowInfo
1509 getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1510 {
1511 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1512 RowInfo ret;
1513 if (this->rowMap_.is_null ()) {
1514 ret.localRow = STINV;
1515 ret.allocSize = 0;
1516 ret.numEntries = 0;
1517 ret.offset1D = STINV;
1518 return ret;
1519 }
1520 const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1521 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1522 ret.localRow = STINV;
1523 ret.allocSize = 0;
1524 ret.numEntries = 0;
1525 ret.offset1D = STINV;
1526 return ret;
1527 }
1528
1529 ret.localRow = static_cast<size_t> (myRow);
1530 if (this->indicesAreAllocated ()) {
1531 // graph data structures have the info that we need
1532 //
1533 // if static graph, offsets tell us the allocation size
1534 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1535 ret.offset1D = 0;
1536 ret.allocSize = 0;
1537 }
1538 else {
1539 ret.offset1D = rowPtrsUnpacked_host_(myRow);
1540 ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1541 }
1542
1543 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1544 ret.allocSize :
1545 this->k_numRowEntries_(myRow);
1546 }
1547 else { // haven't performed allocation yet; probably won't hit this code
1548 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1549 // allocate, rather than doing lazy allocation at first insert.
1550 // This will make k_numAllocPerRow_ obsolete.
1551 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1552 this->k_numAllocPerRow_(myRow) : // this is a host View
1553 this->numAllocForAllRows_;
1554 ret.numEntries = 0;
1555 ret.offset1D = STINV;
1556 }
1557
1558 return ret;
1559 }
1560
1561
1562 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1563 void
1564 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1565 staticAssertions () const
1567 using Teuchos::OrdinalTraits;
1568 typedef LocalOrdinal LO;
1569 typedef GlobalOrdinal GO;
1570 typedef global_size_t GST;
1571
1572 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1573 // This is so that we can store local indices in the memory
1574 // formerly occupied by global indices.
1575 static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1576 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1577 // Assumption: max(size_t) >= max(LocalOrdinal)
1578 // This is so that we can represent any LocalOrdinal as a size_t.
1579 static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1580 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1581 static_assert (sizeof(GST) >= sizeof(size_t),
1582 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1583
1584 // FIXME (mfh 30 Sep 2015) We're not using
1585 // Teuchos::CompileTimeAssert any more. Can we do these checks
1586 // with static_assert?
1587
1588 // can't call max() with CompileTimeAssert, because it isn't a
1589 // constant expression; will need to make this a runtime check
1590 const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1591 "given template arguments: size assumptions are not valid.";
1592 TEUCHOS_TEST_FOR_EXCEPTION(
1593 static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1594 std::runtime_error, msg);
1595 TEUCHOS_TEST_FOR_EXCEPTION(
1596 static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1597 std::runtime_error, msg);
1598 TEUCHOS_TEST_FOR_EXCEPTION(
1599 static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1600 std::runtime_error, msg);
1601 TEUCHOS_TEST_FOR_EXCEPTION(
1602 Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1603 std::runtime_error, msg);
1604 }
1605
1606
1607 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1608 size_t
1610 insertIndices (RowInfo& rowinfo,
1611 const SLocalGlobalViews &newInds,
1612 const ELocalGlobal lg,
1613 const ELocalGlobal I)
1614 {
1615 using Teuchos::ArrayView;
1616 typedef LocalOrdinal LO;
1617 typedef GlobalOrdinal GO;
1618 const char tfecfFuncName[] = "insertIndices: ";
1619
1620 size_t oldNumEnt = 0;
1621 if (debug_) {
1622 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1623 (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1624 "lg must be either GlobalIndices or LocalIndices.");
1625 oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1626 }
1627
1628 size_t numNewInds = 0;
1629 if (lg == GlobalIndices) { // input indices are global
1630 ArrayView<const GO> new_ginds = newInds.ginds;
1631 numNewInds = new_ginds.size();
1632 if (I == GlobalIndices) { // store global indices
1633 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1634 if (debug_) {
1635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1636 (static_cast<size_t> (gind_view.size ()) <
1637 rowinfo.numEntries + numNewInds, std::logic_error,
1638 "gind_view.size() = " << gind_view.size ()
1639 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1640 << ") + numNewInds (= " << numNewInds << ").");
1641 }
1642 GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1643 + rowinfo.numEntries;
1644 for (size_t k = 0; k < numNewInds; ++k) {
1645 gblColInds_out[k] = new_ginds[k];
1646 }
1647 }
1648 else if (I == LocalIndices) { // store local indices
1649 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1650 if (debug_) {
1651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1652 (static_cast<size_t> (lind_view.size ()) <
1653 rowinfo.numEntries + numNewInds, std::logic_error,
1654 "lind_view.size() = " << lind_view.size ()
1655 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1656 << ") + numNewInds (= " << numNewInds << ").");
1657 }
1658 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1659 + rowinfo.numEntries;
1660 for (size_t k = 0; k < numNewInds; ++k) {
1661 lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1662 }
1663 }
1664 }
1665 else if (lg == LocalIndices) { // input indices are local
1666 ArrayView<const LO> new_linds = newInds.linds;
1667 numNewInds = new_linds.size();
1668 if (I == LocalIndices) { // store local indices
1669 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1670 if (debug_) {
1671 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1672 (static_cast<size_t> (lind_view.size ()) <
1673 rowinfo.numEntries + numNewInds, std::logic_error,
1674 "lind_view.size() = " << lind_view.size ()
1675 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1676 << ") + numNewInds (= " << numNewInds << ").");
1677 }
1678 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1679 + rowinfo.numEntries;
1680 for (size_t k = 0; k < numNewInds; ++k) {
1681 lclColInds_out[k] = new_linds[k];
1682 }
1683 }
1684 else if (I == GlobalIndices) {
1685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1686 (true, std::logic_error, "The case where the input indices are local "
1687 "and the indices to write are global (lg=LocalIndices, I="
1688 "GlobalIndices) is not implemented, because it does not make sense."
1689 << std::endl << "If you have correct local column indices, that "
1690 "means the graph has a column Map. In that case, you should be "
1691 "storing local indices.");
1692 }
1693 }
1694
1695 rowinfo.numEntries += numNewInds;
1696 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1697 this->setLocallyModified ();
1698
1699 if (debug_) {
1700 const size_t chkNewNumEnt =
1701 this->getNumEntriesInLocalRow (rowinfo.localRow);
1702 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1703 (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1704 "chkNewNumEnt = " << chkNewNumEnt
1705 << " != oldNumEnt (= " << oldNumEnt
1706 << ") + numNewInds (= " << numNewInds << ").");
1707 }
1708
1709 return numNewInds;
1710 }
1711
1712 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1713 size_t
1715 insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1716 const GlobalOrdinal inputGblColInds[],
1717 const size_t numInputInds)
1718 {
1719 return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1720 inputGblColInds, numInputInds);
1721 }
1722
1723 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1724 size_t
1726 insertGlobalIndicesImpl (const RowInfo& rowInfo,
1727 const GlobalOrdinal inputGblColInds[],
1728 const size_t numInputInds,
1729 std::function<void(const size_t, const size_t, const size_t)> fun)
1730 {
1732 using Kokkos::View;
1733 using Kokkos::subview;
1734 using Kokkos::MemoryUnmanaged;
1735 using Teuchos::ArrayView;
1736 using LO = LocalOrdinal;
1737 using GO = GlobalOrdinal;
1738 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1739 const LO lclRow = static_cast<LO> (rowInfo.localRow);
1740
1741 auto numEntries = rowInfo.numEntries;
1742 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1743 inp_view_type inputInds(inputGblColInds, numInputInds);
1744 size_t numInserted;
1745 {
1746 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1747 numInserted = Details::insertCrsIndices(lclRow, this->rowPtrsUnpacked_host_,
1748 gblIndsHostView,
1749 numEntries, inputInds, fun);
1750 }
1751
1752 const bool insertFailed =
1753 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1754 if(insertFailed) {
1755 constexpr size_t ONE (1);
1756 const int myRank = this->getComm()->getRank();
1757 std::ostringstream os;
1758
1759 os << "Proc " << myRank << ": Not enough capacity to insert "
1760 << numInputInds
1761 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1762 << " into local row " << lclRow << ", which currently has "
1763 << rowInfo.numEntries
1764 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1765 << " and total allocation size " << rowInfo.allocSize
1766 << ". ";
1767 const size_t maxNumToPrint =
1769 ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1770 numInputInds);
1771 verbosePrintArray(os, inputGblColIndsView, "Input global "
1772 "column indices", maxNumToPrint);
1773 os << ", ";
1774 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1775 ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1776 rowInfo.numEntries);
1777 verbosePrintArray(os, curGblColIndsView, "Current global "
1778 "column indices", maxNumToPrint);
1779 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1780 (true, std::runtime_error, os.str());
1781 }
1783 this->k_numRowEntries_(lclRow) += numInserted;
1784
1785 this->setLocallyModified();
1786 return numInserted;
1787 }
1788
1789
1790 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1791 void
1793 insertLocalIndicesImpl (const LocalOrdinal myRow,
1794 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1795 std::function<void(const size_t, const size_t, const size_t)> fun)
1796 {
1797 using Kokkos::MemoryUnmanaged;
1798 using Kokkos::subview;
1799 using Kokkos::View;
1800 using LO = LocalOrdinal;
1801 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1802
1803 const RowInfo rowInfo = this->getRowInfo(myRow);
1804
1805 size_t numNewInds = 0;
1806 size_t newNumEntries = 0;
1807
1808 auto numEntries = rowInfo.numEntries;
1809 // Note: Teuchos::ArrayViews are in HostSpace
1810 using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1811 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1812 size_t numInserted = 0;
1813 {
1814 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1815 numInserted = Details::insertCrsIndices(myRow, rowPtrsUnpacked_host_, lclInds,
1816 numEntries, inputInds, fun);
1817 }
1818
1819 const bool insertFailed =
1820 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1821 if(insertFailed) {
1822 constexpr size_t ONE (1);
1823 const size_t numInputInds(indices.size());
1824 const int myRank = this->getComm()->getRank();
1825 std::ostringstream os;
1826 os << "On MPI Process " << myRank << ": Not enough capacity to "
1827 "insert " << numInputInds
1828 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1829 << " into local row " << myRow << ", which currently has "
1830 << rowInfo.numEntries
1831 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1832 << " and total allocation size " << rowInfo.allocSize << ".";
1833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1834 (true, std::runtime_error, os.str());
1835 }
1836 numNewInds = numInserted;
1837 newNumEntries = rowInfo.numEntries + numNewInds;
1838
1839 this->k_numRowEntries_(myRow) += numNewInds;
1840 this->setLocallyModified ();
1841
1842 if (debug_) {
1843 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1844 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1845 (chkNewNumEntries != newNumEntries, std::logic_error,
1846 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1847 << " != newNumEntries = " << newNumEntries
1848 << ". Please report this bug to the Tpetra developers.");
1849 }
1850 }
1851
1852 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1853 size_t
1855 findGlobalIndices(const RowInfo& rowInfo,
1856 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1857 std::function<void(const size_t, const size_t, const size_t)> fun) const
1858 {
1859 using GO = GlobalOrdinal;
1860 using Kokkos::View;
1861 using Kokkos::MemoryUnmanaged;
1862 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1863
1864 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1865 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1866
1867 size_t numFound = 0;
1868 LocalOrdinal lclRow = rowInfo.localRow;
1869 if (this->isLocallyIndexed())
1870 {
1871 if (this->colMap_.is_null())
1872 return invalidCount;
1873 const auto& colMap = *(this->colMap_);
1874 auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1875 numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1876 rowInfo.numEntries,
1877 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1878 }
1879 else if (this->isGloballyIndexed())
1880 {
1881 numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1882 rowInfo.numEntries,
1883 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1884 }
1885 return numFound;
1886 }
1887
1888
1889 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1890 size_t
1892 sortAndMergeRowIndices (const RowInfo& rowInfo,
1893 const bool sorted,
1894 const bool merged)
1895 {
1896 const size_t origNumEnt = rowInfo.numEntries;
1897 if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1898 origNumEnt != 0) {
1899 auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1900
1901 LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1902 if (! sorted) {
1903 std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1904 }
1905
1906 if (! merged) {
1907 LocalOrdinal* const beg = lclColIndsRaw;
1908 LocalOrdinal* const end = beg + rowInfo.numEntries;
1909 LocalOrdinal* const newend = std::unique (beg, end);
1910 const size_t newNumEnt = newend - beg;
1911
1912 // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1913 this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1914 return origNumEnt - newNumEnt; // the number of duplicates in the row
1915 }
1916 else {
1917 return static_cast<size_t> (0); // assume no duplicates
1918 }
1919 }
1920 else {
1921 return static_cast<size_t> (0); // no entries in the row
1922 }
1923 }
1925
1926 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1927 void
1929 setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1930 const Teuchos::RCP<const map_type>& rangeMap)
1931 {
1932 // simple pointer comparison for equality
1933 if (domainMap_ != domainMap) {
1934 domainMap_ = domainMap;
1935 importer_ = Teuchos::null;
1936 }
1937 if (rangeMap_ != rangeMap) {
1938 rangeMap_ = rangeMap;
1939 exporter_ = Teuchos::null;
1940 }
1941 }
1942
1943
1944 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1945 void
1946 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1947 clearGlobalConstants ()
1948 {
1949 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1950
1951 globalNumEntries_ = INV;
1952 globalMaxNumRowEntries_ = INV;
1953 haveGlobalConstants_ = false;
1954 }
1955
1956
1957 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1958 void
1960 checkInternalState () const
1961 {
1962 if (debug_) {
1963 using std::endl;
1964 const char tfecfFuncName[] = "checkInternalState: ";
1965 const char suffix[] = " Please report this bug to the Tpetra developers.";
1966
1967 std::unique_ptr<std::string> prefix;
1968 if (verbose_) {
1969 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1970 std::ostringstream os;
1971 os << *prefix << "Start" << endl;
1972 std::cerr << os.str();
1973 }
1974
1975 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1976 //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1977 // check the internal state of this data structure
1978 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1979 // always remains in a valid state
1980
1981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1982 (this->rowMap_.is_null (), std::logic_error,
1983 "Row Map is null." << suffix);
1984 // This may access the row Map, so we need to check first (above)
1985 // whether the row Map is null.
1986 const LocalOrdinal lclNumRows =
1987 static_cast<LocalOrdinal> (this->getNodeNumRows ());
1988
1989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1990 (this->isFillActive () == this->isFillComplete (), std::logic_error,
1991 "Graph cannot be both fill active and fill complete." << suffix);
1992 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1993 (this->isFillComplete () &&
1994 (this->colMap_.is_null () ||
1995 this->rangeMap_.is_null () ||
1996 this->domainMap_.is_null ()),
1997 std::logic_error,
1998 "Graph is full complete, but at least one of {column, range, domain} "
1999 "Map is null." << suffix);
2000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2001 (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2002 std::logic_error, "Storage is optimized, but indices are not "
2003 "allocated, not even trivially." << suffix);
2004
2005 size_t nodeAllocSize = 0;
2006 try {
2007 nodeAllocSize = this->getNodeAllocationSize ();
2008 }
2009 catch (std::logic_error& e) {
2010 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2011 (true, std::runtime_error, "getNodeAllocationSize threw "
2012 "std::logic_error: " << e.what ());
2014 catch (std::exception& e) {
2015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2016 (true, std::runtime_error, "getNodeAllocationSize threw an "
2017 "std::exception: " << e.what ());
2018 }
2019 catch (...) {
2020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2021 (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2022 "not a subclass of std::exception.");
2023 }
2024
2025 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2026 (this->isStorageOptimized () &&
2027 nodeAllocSize != this->getNodeNumEntries (),
2028 std::logic_error, "Storage is optimized, but "
2029 "this->getNodeAllocationSize() = " << nodeAllocSize
2030 << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2031 << "." << suffix);
2032 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2033 (! this->haveGlobalConstants_ &&
2034 (this->globalNumEntries_ != GSTI ||
2035 this->globalMaxNumRowEntries_ != GSTI),
2036 std::logic_error, "Graph claims not to have global constants, but "
2037 "some of the global constants are not marked as invalid." << suffix);
2038 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2039 (this->haveGlobalConstants_ &&
2040 (this->globalNumEntries_ == GSTI ||
2041 this->globalMaxNumRowEntries_ == GSTI),
2042 std::logic_error, "Graph claims to have global constants, but "
2043 "some of them are marked as invalid." << suffix);
2044 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2045 (this->haveGlobalConstants_ &&
2046 (this->globalNumEntries_ < this->getNodeNumEntries () ||
2047 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2048 std::logic_error, "Graph claims to have global constants, and "
2049 "all of the values of the global constants are valid, but "
2050 "some of the local constants are greater than "
2051 "their corresponding global constants." << suffix);
2052 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2053 (this->indicesAreAllocated () &&
2054 (this->numAllocForAllRows_ != 0 ||
2055 this->k_numAllocPerRow_.extent (0) != 0),
2056 std::logic_error, "The graph claims that its indices are allocated, but "
2057 "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2058 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2059 "the graph is supposed to release its \"allocation specifications\" "
2060 "when it allocates its indices." << suffix);
2061 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2062 (rowPtrsUnpacked_host_.extent(0) != rowPtrsUnpacked_dev_.extent(0),
2063 std::logic_error, "The host and device views of k_rowPtrs_ have "
2064 "different sizes; rowPtrsUnpacked_host_ has size "
2065 << rowPtrsUnpacked_host_.extent(0)
2066 << ", but rowPtrsUnpacked_dev_ has size "
2067 << rowPtrsUnpacked_dev_.extent(0)
2068 << "." << suffix);
2069 if (isGloballyIndexed() && rowPtrsUnpacked_host_.extent(0) != 0) {
2070 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2071 (size_t(rowPtrsUnpacked_host_.extent(0)) != size_t(lclNumRows + 1),
2072 std::logic_error, "The graph is globally indexed and "
2073 "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2074 << ", but that size does not equal lclNumRows+1 = "
2075 << (lclNumRows+1) << "." << suffix);
2076 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2077 (rowPtrsUnpacked_host_(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2078 std::logic_error, "The graph is globally indexed and "
2079 "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2080 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2081 << rowPtrsUnpacked_host_(lclNumRows)
2082 << " != gblInds_wdv.extent(0)="
2083 << gblInds_wdv.extent(0) << "." << suffix);
2084 }
2085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2086 (this->isLocallyIndexed () &&
2087 this->rowPtrsUnpacked_host_.extent (0) != 0 &&
2088 (static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) !=
2089 static_cast<size_t> (lclNumRows + 1) ||
2090 this->rowPtrsUnpacked_host_(lclNumRows) !=
2091 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2092 std::logic_error, "If k_rowPtrs_ has nonzero size and "
2093 "the graph is locally indexed, then "
2094 "k_rowPtrs_ must have N+1 rows, and "
2095 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2096
2097 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2098 (this->indicesAreAllocated () &&
2099 nodeAllocSize > 0 &&
2100 this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2101 this->gblInds_wdv.extent (0) == 0,
2102 std::logic_error, "Graph is allocated nontrivially, but "
2103 "but 1-D allocations are not present." << suffix);
2104
2105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2106 (! this->indicesAreAllocated () &&
2107 ((this->rowPtrsUnpacked_host_.extent (0) != 0 ||
2108 this->k_numRowEntries_.extent (0) != 0) ||
2109 this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2110 this->gblInds_wdv.extent (0) != 0),
2111 std::logic_error, "If indices are not allocated, "
2112 "then none of the buffers should be." << suffix);
2113 // indices may be local or global only if they are allocated
2114 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2115 // indicesAreGlobal_)
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2118 ! this->indicesAreAllocated_,
2119 std::logic_error, "Indices may be local or global only if they are "
2120 "allocated." << suffix);
2121 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2122 (this->indicesAreLocal_ && this->indicesAreGlobal_,
2123 std::logic_error, "Indices may not be both local and global." << suffix);
2124 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2125 (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2126 std::logic_error, "Indices are local, but "
2127 "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2128 << ") != 0. In other words, if indices are local, then "
2129 "allocations of global indices should not be present."
2130 << suffix);
2131 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2132 (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2133 std::logic_error, "Indices are global, but "
2134 "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2135 << ") != 0. In other words, if indices are global, "
2136 "then allocations for local indices should not be present."
2137 << suffix);
2138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2139 (indicesAreLocal_ && nodeAllocSize > 0 &&
2140 lclIndsUnpacked_wdv.extent (0) == 0 && getNodeNumRows () > 0,
2141 std::logic_error, "Indices are local and "
2142 "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2143 "lclIndsUnpacked_wdv.extent(0) = 0 and getNodeNumRows() = "
2144 << getNodeNumRows () << " > 0." << suffix);
2145 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2146 (indicesAreGlobal_ && nodeAllocSize > 0 &&
2147 gblInds_wdv.extent (0) == 0 && getNodeNumRows () > 0,
2148 std::logic_error, "Indices are global and "
2149 "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2150 "gblInds_wdv.extent(0) = 0 and getNodeNumRows() = "
2151 << getNodeNumRows () << " > 0." << suffix);
2152 // check the actual allocations
2153 if (this->indicesAreAllocated () &&
2154 this->rowPtrsUnpacked_host_.extent (0) != 0) {
2155 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2156 (static_cast<size_t> (this->rowPtrsUnpacked_host_.extent (0)) !=
2157 this->getNodeNumRows () + 1,
2158 std::logic_error, "Indices are allocated and "
2159 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2160 << this->rowPtrsUnpacked_host_.extent (0) << " != getNodeNumRows()+1 = "
2161 << (this->getNodeNumRows () + 1) << "." << suffix);
2162 const size_t actualNumAllocated =
2163 this->rowPtrsUnpacked_host_(this->getNodeNumRows());
2164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2165 (this->isLocallyIndexed () &&
2166 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2167 std::logic_error, "Graph is locally indexed, indices are "
2168 "are allocated, and k_rowPtrs_ has nonzero length, but "
2169 "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2170 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2172 (this->isGloballyIndexed () &&
2173 static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2174 std::logic_error, "Graph is globally indexed, indices "
2175 "are allocated, and k_rowPtrs_ has nonzero length, but "
2176 "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2177 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2178 }
2179
2180 if (verbose_) {
2181 std::ostringstream os;
2182 os << *prefix << "Done" << endl;
2183 std::cerr << os.str();
2184 }
2185 }
2186 }
2187
2189 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2190 size_t
2192 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2194 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2195 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2196 return Teuchos::OrdinalTraits<size_t>::invalid ();
2197 }
2198 else {
2199 return rowInfo.numEntries;
2200 }
2201 }
2202
2203
2204 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2205 size_t
2207 getNumEntriesInLocalRow (LocalOrdinal localRow) const
2208 {
2209 const RowInfo rowInfo = this->getRowInfo (localRow);
2210 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2211 return Teuchos::OrdinalTraits<size_t>::invalid ();
2212 }
2213 else {
2214 return rowInfo.numEntries;
2215 }
2216 }
2217
2218
2219 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2220 size_t
2222 getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2223 {
2224 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2225 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2226 return Teuchos::OrdinalTraits<size_t>::invalid ();
2227 }
2228 else {
2229 return rowInfo.allocSize;
2230 }
2231 }
2232
2233
2234 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2235 size_t
2237 getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2238 {
2239 const RowInfo rowInfo = this->getRowInfo (localRow);
2240 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2241 return Teuchos::OrdinalTraits<size_t>::invalid ();
2242 }
2243 else {
2244 return rowInfo.allocSize;
2245 }
2246 }
2247
2248
2249 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2250 Teuchos::ArrayRCP<const size_t>
2252 getNodeRowPtrs () const
2253 {
2254 using Kokkos::ViewAllocateWithoutInitializing;
2255 using Teuchos::ArrayRCP;
2256 typedef typename local_graph_device_type::row_map_type row_map_type;
2257 typedef typename row_map_type::non_const_value_type row_offset_type;
2258 const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2259 const char suffix[] = " Please report this bug to the Tpetra developers.";
2260
2261 const size_t size = rowPtrsUnpacked_host_.extent (0);
2262 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2263
2264 if (size == 0) {
2265 return ArrayRCP<const size_t> ();
2266 }
2267
2268 ArrayRCP<const row_offset_type> ptr_rot;
2269 ArrayRCP<const size_t> ptr_st;
2270 if (same) { // size_t == row_offset_type
2271 ptr_rot = Kokkos::Compat::persistingView (rowPtrsUnpacked_host_);
2272 }
2273 else { // size_t != row_offset_type
2274 typedef Kokkos::View<size_t*, device_type> ret_view_type;
2275 ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2276
2277 ::Tpetra::Details::copyOffsets (ptr_d, rowPtrsUnpacked_dev_);
2278
2279 typename ret_view_type::HostMirror ptr_h =
2280 Kokkos::create_mirror_view (ptr_d);
2281 Kokkos::deep_copy(ptr_h, ptr_d);
2282 ptr_st = Kokkos::Compat::persistingView (ptr_h);
2283 }
2284 if (debug_) {
2285 TEUCHOS_TEST_FOR_EXCEPTION
2286 (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2287 prefix << "size_t == row_offset_type and size = " << size
2288 << " != 0, but ptr_rot is null." << suffix);
2289 TEUCHOS_TEST_FOR_EXCEPTION
2290 (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2291 prefix << "size_t != row_offset_type and size = " << size
2292 << " != 0, but ptr_st is null." << suffix);
2293 }
2294
2295 // If size_t == row_offset_type, return a persisting host view of
2296 // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2297 ArrayRCP<const size_t> retval =
2298 Kokkos::Impl::if_c<same,
2299 ArrayRCP<const row_offset_type>,
2300 ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2301 if (debug_) {
2302 TEUCHOS_TEST_FOR_EXCEPTION
2303 (size != 0 && retval.is_null (), std::logic_error,
2304 prefix << "size = " << size << " != 0, but retval is null." << suffix);
2305 }
2306 return retval;
2307 }
2308
2309
2310 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2311 Teuchos::ArrayRCP<const LocalOrdinal>
2313 getNodePackedIndices () const
2314 {
2315 // KDDKDD UVM REMOVAL: 3/21
2316 // KDDKDD This function used to return k_lclInds1D_.
2317 // KDDKDD I retain its behavior by return lclIndsUnpacked_wdv.getHostView.
2318 // KDDKDD However, k_lclInds1D_ was not necessarily PACKED;
2319 // KDDKDD PACKED indices are in the static graph.
2320 // KDDKDD However, with OptimizeStorage, k_lclInds1D_ was PACKED.
2321 // return Kokkos::Compat::persistingView (k_lclInds1D_);
2322 return Kokkos::Compat::persistingView (
2323 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly));
2324 }
2325
2326
2327 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2328 void
2330 getLocalRowCopy (LocalOrdinal localRow,
2331 nonconst_local_inds_host_view_type & indices,
2332 size_t& numEntries) const
2333 {
2334 using Teuchos::ArrayView;
2335 const char tfecfFuncName[] = "getLocalRowCopy: ";
2336
2337 TEUCHOS_TEST_FOR_EXCEPTION(
2338 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2339 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2340 "does not have a column Map yet. That means we don't have local indices "
2341 "for columns yet, so it doesn't make sense to call this method. If the "
2342 "graph doesn't have a column Map yet, you should call fillComplete on "
2343 "it first.");
2344
2345 // This does the right thing (reports an empty row) if the input
2346 // row is invalid.
2347 const RowInfo rowinfo = this->getRowInfo (localRow);
2348 // No side effects on error.
2349 const size_t theNumEntries = rowinfo.numEntries;
2350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2351 (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2352 "Specified storage (size==" << indices.size () << ") does not suffice "
2353 "to hold all " << theNumEntries << " entry/ies for this row.");
2354 numEntries = theNumEntries;
2355
2356 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2357 if (isLocallyIndexed ()) {
2358 auto lclInds = getLocalIndsViewHost(rowinfo);
2359 for (size_t j = 0; j < theNumEntries; ++j) {
2360 indices[j] = lclInds(j);
2361 }
2362 }
2363 else if (isGloballyIndexed ()) {
2364 auto gblInds = getGlobalIndsViewHost(rowinfo);
2365 for (size_t j = 0; j < theNumEntries; ++j) {
2366 indices[j] = colMap_->getLocalElement (gblInds(j));
2367 }
2368 }
2369 }
2370 }
2371
2372#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2373 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2374 void
2376 getLocalRowCopy (LocalOrdinal localRow,
2377 const Teuchos::ArrayView<LocalOrdinal>&indices,
2378 size_t& numEntries) const
2379 {
2380 using Teuchos::ArrayView;
2381 const char tfecfFuncName[] = "getLocalRowCopy: ";
2382
2383 TEUCHOS_TEST_FOR_EXCEPTION(
2384 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2385 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2386 "does not have a column Map yet. That means we don't have local indices "
2387 "for columns yet, so it doesn't make sense to call this method. If the "
2388 "graph doesn't have a column Map yet, you should call fillComplete on "
2389 "it first.");
2390
2391 // This does the right thing (reports an empty row) if the input
2392 // row is invalid.
2393 const RowInfo rowinfo = this->getRowInfo (localRow);
2394 // No side effects on error.
2395 const size_t theNumEntries = rowinfo.numEntries;
2396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2397 (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2398 "Specified storage (size==" << indices.size () << ") does not suffice "
2399 "to hold all " << theNumEntries << " entry/ies for this row.");
2400 numEntries = theNumEntries;
2401
2402 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2403 if (isLocallyIndexed ()) {
2404 auto lclInds = getLocalIndsViewHost(rowinfo);
2405 for (size_t j = 0; j < theNumEntries; ++j) {
2406 indices[j] = lclInds(j);
2407 }
2408 }
2409 else if (isGloballyIndexed ()) {
2410 auto gblInds = getGlobalIndsViewHost(rowinfo);
2411 for (size_t j = 0; j < theNumEntries; ++j) {
2412 indices[j] = colMap_->getLocalElement (gblInds(j));
2413 }
2414 }
2415 }
2416 }
2417#endif
2418
2419
2420 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2421 void
2423 getGlobalRowCopy (GlobalOrdinal globalRow,
2424 nonconst_global_inds_host_view_type &indices,
2425 size_t& numEntries) const
2426 {
2427 using Teuchos::ArrayView;
2428 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2429
2430 // This does the right thing (reports an empty row) if the input
2431 // row is invalid.
2432 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2433 const size_t theNumEntries = rowinfo.numEntries;
2434 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2435 static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2436 "Specified storage (size==" << indices.size () << ") does not suffice "
2437 "to hold all " << theNumEntries << " entry/ies for this row.");
2438 numEntries = theNumEntries; // first side effect
2439
2440 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2441 if (isLocallyIndexed ()) {
2442 auto lclInds = getLocalIndsViewHost(rowinfo);
2443 for (size_t j = 0; j < theNumEntries; ++j) {
2444 indices[j] = colMap_->getGlobalElement (lclInds(j));
2445 }
2446 }
2447 else if (isGloballyIndexed ()) {
2448 auto gblInds = getGlobalIndsViewHost(rowinfo);
2449 for (size_t j = 0; j < theNumEntries; ++j) {
2450 indices[j] = gblInds(j);
2451 }
2452 }
2453 }
2454 }
2455
2456
2457#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2458 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2459 void
2461 getGlobalRowCopy (GlobalOrdinal globalRow,
2462 const Teuchos::ArrayView<GlobalOrdinal>& indices,
2463 size_t& numEntries) const
2464 {
2465 using Teuchos::ArrayView;
2466 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2467
2468 // This does the right thing (reports an empty row) if the input
2469 // row is invalid.
2470 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2471 const size_t theNumEntries = rowinfo.numEntries;
2472 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2473 static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2474 "Specified storage (size==" << indices.size () << ") does not suffice "
2475 "to hold all " << theNumEntries << " entry/ies for this row.");
2476 numEntries = theNumEntries; // first side effect
2477
2478 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2479 if (isLocallyIndexed ()) {
2480 auto lclInds = getLocalIndsViewHost(rowinfo);
2481 for (size_t j = 0; j < theNumEntries; ++j) {
2482 indices[j] = colMap_->getGlobalElement (lclInds(j));
2483 }
2484 }
2485 else if (isGloballyIndexed ()) {
2486 auto gblInds = getGlobalIndsViewHost(rowinfo);
2487 for (size_t j = 0; j < theNumEntries; ++j) {
2488 indices[j] = gblInds(j);
2489 }
2490 }
2491 }
2492 }
2493#endif
2494
2495
2496 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2497 void
2500 const LocalOrdinal localRow,
2501 local_inds_host_view_type &indices) const
2502 {
2503 const char tfecfFuncName[] = "getLocalRowView: ";
2504
2505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2506 (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2507 "currently stored as global indices, so we cannot return a view with "
2508 "local column indices, whether or not the graph has a column Map. If "
2509 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2510
2511 const RowInfo rowInfo = getRowInfo (localRow);
2512 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2513 rowInfo.numEntries > 0) {
2514 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2515 rowInfo.numEntries,
2516 Access::ReadOnly);
2517 }
2518 else {
2519 // This does the right thing (reports an empty row) if the input
2520 // row is invalid.
2521 indices = local_inds_host_view_type();
2522 }
2523
2524 if (debug_) {
2525 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2526 (static_cast<size_t> (indices.size ()) !=
2527 getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2528 "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2529 localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2530 ". Please report this bug to the Tpetra developers.");
2531 }
2532 }
2533
2534
2535 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2536 void
2539 const GlobalOrdinal globalRow,
2540 global_inds_host_view_type &indices) const
2541 {
2542 const char tfecfFuncName[] = "getGlobalRowView: ";
2543
2544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2545 (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2546 "currently stored as local indices, so we cannot return a view with "
2547 "global column indices. Use getGlobalRowCopy() instead.");
2548
2549 // This does the right thing (reports an empty row) if the input
2550 // row is invalid.
2551 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2552 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2553 rowInfo.numEntries > 0) {
2554 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2555 rowInfo.numEntries,
2556 Access::ReadOnly);
2557 }
2558 else {
2559 indices = typename global_inds_dualv_type::t_host::const_type();
2560 }
2561 if (debug_) {
2562 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2563 (static_cast<size_t> (indices.size ()) !=
2564 getNumEntriesInGlobalRow (globalRow),
2565 std::logic_error, "indices.size() = " << indices.extent(0)
2566 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2567 << getNumEntriesInGlobalRow (globalRow)
2568 << ". Please report this bug to the Tpetra developers.");
2569 }
2570 }
2571
2572#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2573 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2574 void
2575// TPETRA_DEPRECATED
2577 getLocalRowView (const LocalOrdinal localRow,
2578 Teuchos::ArrayView<const LocalOrdinal>& indices) const
2579 {
2580 const char tfecfFuncName[] = "getLocalRowView: ";
2581
2582 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2583 (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2584 "currently stored as global indices, so we cannot return a view with "
2585 "local column indices, whether or not the graph has a column Map. If "
2586 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2587
2588 // This does the right thing (reports an empty row) if the input
2589 // row is invalid.
2590 const RowInfo rowInfo = getRowInfo (localRow);
2591 indices = Teuchos::null;
2592 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2593 rowInfo.numEntries > 0) {
2594 indices = this->getLocalView (rowInfo);
2595 // getLocalView returns a view of the _entire_ row, including
2596 // any extra space at the end (which 1-D unpacked storage
2597 // might have, for example). That's why we have to take a
2598 // subview of the returned view.
2599 indices = indices (0, rowInfo.numEntries);
2600 }
2601
2602 if (debug_) {
2603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2604 (static_cast<size_t> (indices.size ()) !=
2605 getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2606 "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2607 localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2608 ". Please report this bug to the Tpetra developers.");
2609 }
2610 }
2611
2612#endif
2613
2614#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2615 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2616 void
2617// TPETRA_DEPRECATED
2619 getGlobalRowView (const GlobalOrdinal globalRow,
2620 Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2621 {
2622 const char tfecfFuncName[] = "getGlobalRowView: ";
2623
2624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2625 (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2626 "currently stored as local indices, so we cannot return a view with "
2627 "global column indices. Use getGlobalRowCopy() instead.");
2628
2629 // This does the right thing (reports an empty row) if the input
2630 // row is invalid.
2631 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2632 indices = Teuchos::null;
2633 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2634 rowInfo.numEntries > 0) {
2635 indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2636 }
2637
2638 if (debug_) {
2639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2640 (static_cast<size_t> (indices.size ()) !=
2641 getNumEntriesInGlobalRow (globalRow),
2642 std::logic_error, "indices.size() = " << indices.size ()
2643 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2644 << getNumEntriesInGlobalRow (globalRow)
2645 << ". Please report this bug to the Tpetra developers.");
2646 }
2647 }
2648#endif
2649
2650
2651 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2652 void
2654 insertLocalIndices (const LocalOrdinal localRow,
2655 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2656 {
2657 const char tfecfFuncName[] = "insertLocalIndices: ";
2658
2659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2660 (! isFillActive (), std::runtime_error, "Fill must be active.");
2661 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2662 (isGloballyIndexed (), std::runtime_error,
2663 "Graph indices are global; use insertGlobalIndices().");
2664 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2665 (! hasColMap (), std::runtime_error,
2666 "Cannot insert local indices without a column Map.");
2667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2668 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2669 "Local row index " << localRow << " is not in the row Map "
2670 "on the calling process.");
2671 if (! indicesAreAllocated ()) {
2672 allocateIndices (LocalIndices, verbose_);
2673 }
2674
2675 if (debug_) {
2676 // In debug mode, if the graph has a column Map, test whether any
2677 // of the given column indices are not in the column Map. Keep
2678 // track of the invalid column indices so we can tell the user
2679 // about them.
2680 if (hasColMap ()) {
2681 using Teuchos::Array;
2682 using Teuchos::toString;
2683 using std::endl;
2684 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2685
2686 const map_type& colMap = *colMap_;
2687 Array<LocalOrdinal> badColInds;
2688 bool allInColMap = true;
2689 for (size_type k = 0; k < indices.size (); ++k) {
2690 if (! colMap.isNodeLocalElement (indices[k])) {
2691 allInColMap = false;
2692 badColInds.push_back (indices[k]);
2693 }
2694 }
2695 if (! allInColMap) {
2696 std::ostringstream os;
2697 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2698 "entries in owned row " << localRow << ", at the following column "
2699 "indices: " << toString (indices) << "." << endl;
2700 os << "Of those, the following indices are not in the column Map on "
2701 "this process: " << toString (badColInds) << "." << endl << "Since "
2702 "the graph has a column Map already, it is invalid to insert entries "
2703 "at those locations.";
2704 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2705 }
2706 }
2707 }
2708
2709 insertLocalIndicesImpl (localRow, indices);
2710
2711 if (debug_) {
2712 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2713 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2714 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2715 "! isLocallyIndexed() is true. Please report this bug to the "
2716 "Tpetra developers.");
2717 }
2718 }
2719
2720 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2721 void
2723 insertLocalIndices (const LocalOrdinal localRow,
2724 const LocalOrdinal numEnt,
2725 const LocalOrdinal inds[])
2726 {
2727 Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2728 this->insertLocalIndices (localRow, indsT);
2729 }
2730
2731
2732 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2733 void
2735 insertGlobalIndices (const GlobalOrdinal gblRow,
2736 const LocalOrdinal numInputInds,
2737 const GlobalOrdinal inputGblColInds[])
2738 {
2739 typedef LocalOrdinal LO;
2740 const char tfecfFuncName[] = "insertGlobalIndices: ";
2741
2742 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2743 (this->isLocallyIndexed (), std::runtime_error,
2744 "graph indices are local; use insertLocalIndices().");
2745 // This can't really be satisfied for now, because if we are
2746 // fillComplete(), then we are local. In the future, this may
2747 // change. However, the rule that modification require active
2748 // fill will not change.
2749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2750 (! this->isFillActive (), std::runtime_error,
2751 "You are not allowed to call this method if fill is not active. "
2752 "If fillComplete has been called, you must first call resumeFill "
2753 "before you may insert indices.");
2754 if (! indicesAreAllocated ()) {
2755 allocateIndices (GlobalIndices, verbose_);
2756 }
2757 const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2758 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2759 if (debug_) {
2760 if (this->hasColMap ()) {
2761 using std::endl;
2762 const map_type& colMap = * (this->colMap_);
2763 // In a debug build, keep track of the nonowned ("bad") column
2764 // indices, so that we can display them in the exception
2765 // message. In a release build, just ditch the loop early if
2766 // we encounter a nonowned column index.
2767 std::vector<GlobalOrdinal> badColInds;
2768 bool allInColMap = true;
2769 for (LO k = 0; k < numInputInds; ++k) {
2770 if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2771 allInColMap = false;
2772 badColInds.push_back (inputGblColInds[k]);
2773 }
2774 }
2775 if (! allInColMap) {
2776 std::ostringstream os;
2777 os << "You attempted to insert entries in owned row " << gblRow
2778 << ", at the following column indices: [";
2779 for (LO k = 0; k < numInputInds; ++k) {
2780 os << inputGblColInds[k];
2781 if (k + static_cast<LO> (1) < numInputInds) {
2782 os << ",";
2783 }
2784 }
2785 os << "]." << endl << "Of those, the following indices are not in "
2786 "the column Map on this process: [";
2787 for (size_t k = 0; k < badColInds.size (); ++k) {
2788 os << badColInds[k];
2789 if (k + size_t (1) < badColInds.size ()) {
2790 os << ",";
2791 }
2792 }
2793 os << "]." << endl << "Since the matrix has a column Map already, "
2794 "it is invalid to insert entries at those locations.";
2795 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2796 (true, std::invalid_argument, os.str ());
2797 }
2798 }
2799 } // debug_
2800 this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2801 }
2802 else { // a nonlocal row
2803 this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2804 numInputInds);
2805 }
2806 }
2807
2808
2809 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2810 void
2812 insertGlobalIndices (const GlobalOrdinal gblRow,
2813 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2814 {
2815 this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2816 inputGblColInds.getRawPtr ());
2817 }
2818
2819
2820 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2821 void
2823 insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2824 const GlobalOrdinal gblColInds[],
2825 const LocalOrdinal numGblColInds)
2826 {
2827 typedef LocalOrdinal LO;
2828 typedef GlobalOrdinal GO;
2829 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2830
2831 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2832 (this->isLocallyIndexed (), std::runtime_error,
2833 "Graph indices are local; use insertLocalIndices().");
2834 // This can't really be satisfied for now, because if we are
2835 // fillComplete(), then we are local. In the future, this may
2836 // change. However, the rule that modification require active
2837 // fill will not change.
2838 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2839 (! this->isFillActive (), std::runtime_error,
2840 "You are not allowed to call this method if fill is not active. "
2841 "If fillComplete has been called, you must first call resumeFill "
2842 "before you may insert indices.");
2843 if (! indicesAreAllocated ()) {
2844 allocateIndices (GlobalIndices, verbose_);
2845 }
2846
2847 Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2848 // If we have a column Map, use it to filter the entries.
2849 if (! colMap_.is_null ()) {
2850 const map_type& colMap = * (this->colMap_);
2851
2852 LO curOffset = 0;
2853 while (curOffset < numGblColInds) {
2854 // Find a sequence of input indices that are in the column Map
2855 // on the calling process. Doing a sequence at a time,
2856 // instead of one at a time, amortizes some overhead.
2857 LO endOffset = curOffset;
2858 for ( ; endOffset < numGblColInds; ++endOffset) {
2859 const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2860 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2861 break; // first entry, in current sequence, not in the column Map
2862 }
2863 }
2864 // curOffset, endOffset: half-exclusive range of indices in
2865 // the column Map on the calling process. If endOffset ==
2866 // curOffset, the range is empty.
2867 const LO numIndInSeq = (endOffset - curOffset);
2868 if (numIndInSeq != 0) {
2869 this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2870 numIndInSeq);
2871 }
2872 // Invariant before this line: Either endOffset ==
2873 // numGblColInds, or gblColInds[endOffset] is not in the
2874 // column Map on the calling process.
2875 curOffset = endOffset + 1;
2876 }
2877 }
2878 else {
2879 this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2880 gblColInds_av.size ());
2881 }
2882 }
2883
2884 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2885 void
2887 insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2888 const GlobalOrdinal gblColInds[],
2889 const LocalOrdinal numGblColInds)
2890 {
2891 // This creates the std::vector if it doesn't exist yet.
2892 // std::map's operator[] does a lookup each time, so it's better
2893 // to pull nonlocals_[grow] out of the loop.
2894 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2895 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2896 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2897 // order to avoid duplicates. globalAssemble() sorts these
2898 // anyway.
2899 nonlocalRow.push_back (gblColInds[k]);
2900 }
2901 }
2902
2903 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2904 void
2906 removeLocalIndices (LocalOrdinal lrow)
2907 {
2908 const char tfecfFuncName[] = "removeLocalIndices: ";
2909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2910 ! isFillActive (), std::runtime_error, "requires that fill is active.");
2911 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2912 isStorageOptimized (), std::runtime_error,
2913 "cannot remove indices after optimizeStorage() has been called.");
2914 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2915 isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2916 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2917 ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2918 "Local row " << lrow << " is not in the row Map on the calling process.");
2919 if (! indicesAreAllocated ()) {
2920 allocateIndices (LocalIndices, verbose_);
2921 }
2922
2923 // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
2924 // all processes?
2925 clearGlobalConstants ();
2926
2927 if (k_numRowEntries_.extent (0) != 0) {
2928 this->k_numRowEntries_(lrow) = 0;
2929 }
2930
2931 if (debug_) {
2932 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2933 (getNumEntriesInLocalRow (lrow) != 0 ||
2934 ! indicesAreAllocated () ||
2935 ! isLocallyIndexed (), std::logic_error,
2936 "Violated stated post-conditions. Please contact Tpetra team.");
2937 }
2938 }
2939
2940
2941 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2942 void
2944 setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2945 const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2946 {
2947 const char tfecfFuncName[] = "setAllIndices: ";
2948 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2949 ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2950 "The graph must have a column Map before you may call this method.");
2951 LocalOrdinal numLocalRows = this->getNodeNumRows ();
2952 {
2953 LocalOrdinal rowPtrLen = rowPointers.size();
2954 if(numLocalRows == 0) {
2955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2956 rowPtrLen != 0 && rowPtrLen != 1,
2957 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2958 }
2959 else {
2960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2961 rowPtrLen != numLocalRows + 1,
2962 std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2963 " != this->getNodeNumRows()+1 = " << (numLocalRows + 1) << ".");
2964 }
2965 }
2966
2967 if (debug_ && this->isSorted()) {
2968 // Verify that the local indices are actually sorted
2969 int notSorted = 0;
2970 using exec_space = typename local_graph_device_type::execution_space;
2971 using size_type = typename local_graph_device_type::size_type;
2972 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2973 KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2974 {
2975 size_type rowBegin = rowPointers(i);
2976 size_type rowEnd = rowPointers(i + 1);
2977 for(size_type j = rowBegin + 1; j < rowEnd; j++)
2978 {
2979 if(columnIndices(j - 1) > columnIndices(j))
2980 {
2981 lNotSorted = 1;
2982 }
2983 }
2984 }, notSorted);
2985 //All-reduce notSorted to avoid rank divergence
2986 int globalNotSorted = 0;
2987 auto comm = this->getComm();
2988 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2989 Teuchos::outArg (globalNotSorted));
2990 if (globalNotSorted)
2991 {
2992 std::string message;
2993 if (notSorted)
2994 {
2995 //Only print message from ranks with the problem
2996 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2997 }
2998 Details::gathervPrint(std::cout, message, *comm);
2999 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
3000 }
3001 }
3002
3003 // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3004 // since the future model will be allocation at construction, not
3005 // lazy allocation on first insert.
3006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3007 ((this->lclIndsUnpacked_wdv.extent (0) != 0 || this->gblInds_wdv.extent (0) != 0),
3008 std::runtime_error, "You may not call this method if 1-D data "
3009 "structures are already allocated.");
3010
3011 indicesAreAllocated_ = true;
3012 indicesAreLocal_ = true;
3013 indicesAreSorted_ = true;
3014 noRedundancies_ = true;
3015 lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
3016 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3017 setRowPtrsUnpacked(rowPointers);
3018 setRowPtrsPacked(rowPointers);
3019
3020 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
3021
3022 // Storage MUST be packed, since the interface doesn't give any
3023 // way to indicate any extra space at the end of each row.
3024 storageStatus_ = Details::STORAGE_1D_PACKED;
3025
3026 // These normally get cleared out at the end of allocateIndices.
3027 // It makes sense to clear them out here, because at the end of
3028 // this method, the graph is allocated on the calling process.
3029 numAllocForAllRows_ = 0;
3030 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3031
3032 checkInternalState ();
3033 }
3034
3035
3036 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3037 void
3039 setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3040 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3041 {
3042 using Kokkos::View;
3043 typedef typename local_graph_device_type::row_map_type row_map_type;
3044 typedef typename row_map_type::array_layout layout_type;
3045 typedef typename row_map_type::non_const_value_type row_offset_type;
3046 typedef View<size_t*, layout_type , Kokkos::HostSpace,
3047 Kokkos::MemoryUnmanaged> input_view_type;
3048 typedef typename row_map_type::non_const_type nc_row_map_type;
3049
3050 const size_t size = static_cast<size_t> (rowPointers.size ());
3051 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3052 input_view_type ptr_in (rowPointers.getRawPtr (), size);
3053
3054 nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3055
3056 if (same) { // size_t == row_offset_type
3057 // This compile-time logic ensures that the compiler never sees
3058 // an assignment of View<row_offset_type*, ...> to View<size_t*,
3059 // ...> unless size_t == row_offset_type.
3060 input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3061 Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3062 nc_row_map_type,
3063 input_view_type>::select (ptr_rot, ptr_decoy),
3064 ptr_in);
3065 }
3066 else { // size_t != row_offset_type
3067 // CudaUvmSpace != HostSpace, so this will be false in that case.
3068 constexpr bool inHostMemory =
3069 std::is_same<typename row_map_type::memory_space,
3070 Kokkos::HostSpace>::value;
3071 if (inHostMemory) {
3072 // Copy (with cast from size_t to row_offset_type, with bounds
3073 // checking if necessary) to ptr_rot.
3074 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3075 }
3076 else { // Copy input row offsets to device first.
3077 //
3078 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3079 // execution space would avoid the double copy.
3080 //
3081 View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
3082 Kokkos::deep_copy (ptr_st, ptr_in);
3083 // Copy on device (casting from size_t to row_offset_type,
3084 // with bounds checking if necessary) to ptr_rot. This
3085 // executes in the output View's execution space, which is the
3086 // same as execution_space.
3087 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3088 }
3089 }
3090
3091 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
3092 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3093 setAllIndices (ptr_rot, k_ind);
3094 }
3095
3096
3097 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3098 TPETRA_DEPRECATED
3099 void
3101 getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3102 size_t& boundForAllLocalRows,
3103 bool& boundSameForAllLocalRows) const
3104 {
3105 const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3106 const char suffix[] = " Please report this bug to the Tpetra developers.";
3107
3108 // The three output arguments. We assign them to the actual
3109 // output arguments at the end, in order to implement
3110 // transactional semantics.
3111 Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3112 size_t numEntriesForAll = 0;
3113 bool allRowsSame = true;
3114
3115 const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3116
3117 if (this->indicesAreAllocated ()) {
3118 if (this->isStorageOptimized ()) {
3119 // left with the case that we have optimized storage. in this
3120 // case, we have to construct a list of row sizes.
3121 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3122 (numRows != 0 && rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3123 "The graph has " << numRows << " (> 0) row"
3124 << (numRows != 1 ? "s" : "") << " on the calling process, "
3125 "but the k_rowPtrs_ array has zero entries." << suffix);
3126 Teuchos::ArrayRCP<size_t> numEnt;
3127 if (numRows != 0) {
3128 numEnt = Teuchos::arcp<size_t> (numRows);
3129 }
3130
3131 // We have to iterate through the row offsets anyway, so we
3132 // might as well check whether all rows' bounds are the same.
3133 bool allRowsReallySame = false;
3134 for (ptrdiff_t i = 0; i < numRows; ++i) {
3135 numEnt[i] = rowPtrsUnpacked_host_(i+1) - rowPtrsUnpacked_host_(i);
3136 if (i != 0 && numEnt[i] != numEnt[i-1]) {
3137 allRowsReallySame = false;
3138 }
3139 }
3140 if (allRowsReallySame) {
3141 if (numRows == 0) {
3142 numEntriesForAll = 0;
3143 } else {
3144 numEntriesForAll = numEnt[1] - numEnt[0];
3145 }
3146 allRowsSame = true;
3147 }
3148 else {
3149 numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3150 allRowsSame = false; // conservatively; we don't check the array
3151 }
3152 }
3153 else if (k_numRowEntries_.extent (0) != 0) {
3154 // This is a shallow copy; the ArrayRCP wraps the View in a
3155 // custom destructor, which ensures correct deallocation if
3156 // that is the only reference to the View. Furthermore, this
3157 // View is a host View, so this doesn't assume UVM.
3158 numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3159 allRowsSame = false; // conservatively; we don't check the array
3160 }
3161 else {
3162 numEntriesForAll = 0;
3163 allRowsSame = true;
3164 }
3165 }
3166 else { // indices not allocated
3167 if (k_numAllocPerRow_.extent (0) != 0) {
3168 // This is a shallow copy; the ArrayRCP wraps the View in a
3169 // custom destructor, which ensures correct deallocation if
3170 // that is the only reference to the View. Furthermore, this
3171 // View is a host View, so this doesn't assume UVM.
3172 numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3173 allRowsSame = false; // conservatively; we don't check the array
3174 }
3175 else {
3176 numEntriesForAll = numAllocForAllRows_;
3177 allRowsSame = true;
3178 }
3179 }
3180
3181 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3182 (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3183 "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3184 "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3185 "size " << numEntriesPerRow.size () << "." << suffix);
3186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3187 (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3188 "numEntriesForAll and allRowsSame are not consistent. The former "
3189 "is nonzero (" << numEntriesForAll << "), but the latter is false."
3190 << suffix);
3191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3192 (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3193 "numEntriesPerRow and allRowsSame are not consistent. The former has "
3194 "nonzero length " << numEntriesForAll << ", but the latter is true."
3195 << suffix);
3196
3197 boundPerLocalRow = numEntriesPerRow;
3198 boundForAllLocalRows = numEntriesForAll;
3199 boundSameForAllLocalRows = allRowsSame;
3200 }
3201
3202
3203 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3204 void
3207 {
3208 using Teuchos::Comm;
3209 using Teuchos::outArg;
3210 using Teuchos::RCP;
3211 using Teuchos::rcp;
3212 using Teuchos::REDUCE_MAX;
3213 using Teuchos::REDUCE_MIN;
3214 using Teuchos::reduceAll;
3215 using std::endl;
3216 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
3217 using LO = local_ordinal_type;
3218 using GO = global_ordinal_type;
3219 using size_type = typename Teuchos::Array<GO>::size_type;
3220 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3221
3222 std::unique_ptr<std::string> prefix;
3223 if (verbose_) {
3224 prefix = this->createPrefix("CrsGraph", "globalAssemble");
3225 std::ostringstream os;
3226 os << *prefix << "Start" << endl;
3227 std::cerr << os.str();
3228 }
3229 RCP<const Comm<int> > comm = getComm ();
3230
3231 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3232 (! isFillActive (), std::runtime_error, "Fill must be active before "
3233 "you may call this method.");
3234
3235 const size_t myNumNonlocalRows = this->nonlocals_.size ();
3236
3237 // If no processes have nonlocal rows, then we don't have to do
3238 // anything. Checking this is probably cheaper than constructing
3239 // the Map of nonlocal rows (see below) and noticing that it has
3240 // zero global entries.
3241 {
3242 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3243 int someoneHasNonlocalRows = 0;
3244 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3245 outArg (someoneHasNonlocalRows));
3246 if (someoneHasNonlocalRows == 0) {
3247 if (verbose_) {
3248 std::ostringstream os;
3249 os << *prefix << "Done: No nonlocal rows" << endl;
3250 std::cerr << os.str();
3251 }
3252 return;
3253 }
3254 else if (verbose_) {
3255 std::ostringstream os;
3256 os << *prefix << "At least 1 process has nonlocal rows"
3257 << endl;
3258 std::cerr << os.str();
3259 }
3260 }
3261
3262 // 1. Create a list of the "nonlocal" rows on each process. this
3263 // requires iterating over nonlocals_, so while we do this,
3264 // deduplicate the entries and get a count for each nonlocal
3265 // row on this process.
3266 // 2. Construct a new row Map corresponding to those rows. This
3267 // Map is likely overlapping. We know that the Map is not
3268 // empty on all processes, because the above all-reduce and
3269 // return exclude that case.
3270
3271 RCP<const map_type> nonlocalRowMap;
3272 // Keep this for CrsGraph's constructor.
3273 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3274 {
3275 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3276 size_type curPos = 0;
3277 for (auto mapIter = this->nonlocals_.begin ();
3278 mapIter != this->nonlocals_.end ();
3279 ++mapIter, ++curPos) {
3280 myNonlocalGblRows[curPos] = mapIter->first;
3281 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3282 std::sort (gblCols.begin (), gblCols.end ());
3283 auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3284 gblCols.erase (vecLast, gblCols.end ());
3285 numEntPerNonlocalRow[curPos] = gblCols.size ();
3286 }
3287
3288 // Currently, Map requires that its indexBase be the global min
3289 // of all its global indices. Map won't compute this for us, so
3290 // we must do it. If our process has no nonlocal rows, set the
3291 // "min" to the max possible GO value. This ensures that if
3292 // some process has at least one nonlocal row, then it will pick
3293 // that up as the min. We know that at least one process has a
3294 // nonlocal row, since the all-reduce and return at the top of
3295 // this method excluded that case.
3296 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3297 {
3298 auto iter = std::min_element (myNonlocalGblRows.begin (),
3299 myNonlocalGblRows.end ());
3300 if (iter != myNonlocalGblRows.end ()) {
3301 myMinNonlocalGblRow = *iter;
3302 }
3303 }
3304 GO gblMinNonlocalGblRow = 0;
3305 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3306 outArg (gblMinNonlocalGblRow));
3307 const GO indexBase = gblMinNonlocalGblRow;
3308 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3309 nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3310 }
3311
3312 if (verbose_) {
3313 std::ostringstream os;
3314 os << *prefix << "nonlocalRowMap->getIndexBase()="
3315 << nonlocalRowMap->getIndexBase() << endl;
3316 std::cerr << os.str();
3317 }
3318
3319 // 3. Use the column indices for each nonlocal row, as stored in
3320 // nonlocals_, to construct a CrsGraph corresponding to
3321 // nonlocal rows. We need, but we have, exact counts of the
3322 // number of entries in each nonlocal row.
3323
3324 RCP<crs_graph_type> nonlocalGraph =
3325 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow(),
3326 StaticProfile));
3327 {
3328 size_type curPos = 0;
3329 for (auto mapIter = this->nonlocals_.begin ();
3330 mapIter != this->nonlocals_.end ();
3331 ++mapIter, ++curPos) {
3332 const GO gblRow = mapIter->first;
3333 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3334 const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3335 nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3336 }
3337 }
3338 if (verbose_) {
3339 std::ostringstream os;
3340 os << *prefix << "Built nonlocal graph" << endl;
3341 std::cerr << os.str();
3342 }
3343 // There's no need to fill-complete the nonlocals graph.
3344 // We just use it as a temporary container for the Export.
3345
3346 // 4. If the original row Map is one to one, then we can Export
3347 // directly from nonlocalGraph into this. Otherwise, we have
3348 // to create a temporary graph with a one-to-one row Map,
3349 // Export into that, then Import from the temporary graph into
3350 // *this.
3351
3352 auto origRowMap = this->getRowMap ();
3353 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3354
3355 if (origRowMapIsOneToOne) {
3356 if (verbose_) {
3357 std::ostringstream os;
3358 os << *prefix << "Original row Map is 1-to-1" << endl;
3359 std::cerr << os.str();
3360 }
3361 export_type exportToOrig (nonlocalRowMap, origRowMap);
3362 this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3363 // We're done at this point!
3364 }
3365 else {
3366 if (verbose_) {
3367 std::ostringstream os;
3368 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3369 std::cerr << os.str();
3370 }
3371 // If you ask a Map whether it is one to one, it does some
3372 // communication and stashes intermediate results for later use
3373 // by createOneToOne. Thus, calling createOneToOne doesn't cost
3374 // much more then the original cost of calling isOneToOne.
3375 auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3376 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3377
3378 // Create a temporary graph with the one-to-one row Map.
3379 //
3380 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3381 // row, to avoid reallocation during the Export operation.
3382 crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3383
3384 // Export from graph of nonlocals into the temp one-to-one graph.
3385 if (verbose_) {
3386 std::ostringstream os;
3387 os << *prefix << "Export nonlocal graph" << endl;
3388 std::cerr << os.str();
3389 }
3390 oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3391
3392 // We don't need the graph of nonlocals anymore, so get rid of
3393 // it, to keep the memory high-water mark down.
3394 nonlocalGraph = Teuchos::null;
3395
3396 // Import from the one-to-one graph to the original graph.
3397 import_type importToOrig (oneToOneRowMap, origRowMap);
3398 if (verbose_) {
3399 std::ostringstream os;
3400 os << *prefix << "Import nonlocal graph" << endl;
3401 std::cerr << os.str();
3402 }
3403 this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3404 }
3405
3406 // It's safe now to clear out nonlocals_, since we've already
3407 // committed side effects to *this. The standard idiom for
3408 // clearing a Container like std::map, is to swap it with an empty
3409 // Container and let the swapped Container fall out of scope.
3410 decltype (this->nonlocals_) newNonlocals;
3411 std::swap (this->nonlocals_, newNonlocals);
3412
3413 checkInternalState ();
3414 if (verbose_) {
3415 std::ostringstream os;
3416 os << *prefix << "Done" << endl;
3417 std::cerr << os.str();
3418 }
3419 }
3420
3421
3422 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3423 void
3425 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3426 {
3427 clearGlobalConstants();
3428 if (params != Teuchos::null) this->setParameterList (params);
3429 // either still sorted/merged or initially sorted/merged
3430 indicesAreSorted_ = true;
3431 noRedundancies_ = true;
3432 fillComplete_ = false;
3433 }
3434
3435
3436 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3437 void
3439 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3440 {
3441 // If the graph already has domain and range Maps, don't clobber
3442 // them. If it doesn't, use the current row Map for both the
3443 // domain and range Maps.
3444 //
3445 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3446 // column Map, and column indices are inserted which are not in
3447 // the row Map on any process, this will cause troubles. However,
3448 // that is not a common case for most applications that we
3449 // encounter, and checking for it might require more
3450 // communication.
3451 Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3452 if (domMap.is_null ()) {
3453 domMap = this->getRowMap ();
3454 }
3455 Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3456 if (ranMap.is_null ()) {
3457 ranMap = this->getRowMap ();
3458 }
3459 this->fillComplete (domMap, ranMap, params);
3460 }
3461
3462
3463 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3464 void
3466 fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3467 const Teuchos::RCP<const map_type>& rangeMap,
3468 const Teuchos::RCP<Teuchos::ParameterList>& params)
3469 {
3470 using std::endl;
3471 const char tfecfFuncName[] = "fillComplete: ";
3472 const bool verbose = verbose_;
3473
3474 std::unique_ptr<std::string> prefix;
3475 if (verbose) {
3476 prefix = this->createPrefix("CrsGraph", "fillComplete");
3477 std::ostringstream os;
3478 os << *prefix << "Start" << endl;
3479 std::cerr << os.str();
3480 }
3481
3482 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3483 (! isFillActive () || isFillComplete (), std::runtime_error,
3484 "Graph fill state must be active (isFillActive() "
3485 "must be true) before calling fillComplete().");
3486
3487 const int numProcs = getComm ()->getSize ();
3488
3489 //
3490 // Read and set parameters
3491 //
3492
3493 // Does the caller want to sort remote GIDs (within those owned by
3494 // the same process) in makeColMap()?
3495 if (! params.is_null ()) {
3496 if (params->isParameter ("sort column map ghost gids")) {
3497 sortGhostsAssociatedWithEachProcessor_ =
3498 params->get<bool> ("sort column map ghost gids",
3499 sortGhostsAssociatedWithEachProcessor_);
3500 }
3501 else if (params->isParameter ("Sort column Map ghost GIDs")) {
3502 sortGhostsAssociatedWithEachProcessor_ =
3503 params->get<bool> ("Sort column Map ghost GIDs",
3504 sortGhostsAssociatedWithEachProcessor_);
3505 }
3506 }
3507
3508 // If true, the caller promises that no process did nonlocal
3509 // changes since the last call to fillComplete.
3510 bool assertNoNonlocalInserts = false;
3511 if (! params.is_null ()) {
3512 assertNoNonlocalInserts =
3513 params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3514 }
3515
3516 //
3517 // Allocate indices, if they haven't already been allocated
3518 //
3519 if (! indicesAreAllocated ()) {
3520 if (hasColMap ()) {
3521 // We have a column Map, so use local indices.
3522 allocateIndices (LocalIndices, verbose);
3523 } else {
3524 // We don't have a column Map, so use global indices.
3525 allocateIndices (GlobalIndices, verbose);
3526 }
3527 }
3528
3529 //
3530 // Do global assembly, if requested and if the communicator
3531 // contains more than one process.
3532 //
3533 const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3534 if (mayNeedGlobalAssemble) {
3535 // This first checks if we need to do global assembly.
3536 // The check costs a single all-reduce.
3537 globalAssemble ();
3538 }
3539 else {
3540 const size_t numNonlocals = nonlocals_.size();
3541 if (verbose) {
3542 std::ostringstream os;
3543 os << *prefix << "Do not need to call globalAssemble; "
3544 "assertNoNonlocalInserts="
3545 << (assertNoNonlocalInserts ? "true" : "false")
3546 << "numProcs=" << numProcs
3547 << ", nonlocals_.size()=" << numNonlocals << endl;
3548 std::cerr << os.str();
3549 }
3550 const int lclNeededGlobalAssemble =
3551 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3552 if (lclNeededGlobalAssemble != 0 && verbose) {
3553 std::ostringstream os;
3554 os << *prefix;
3555 Details::Impl::verbosePrintMap(
3556 os, nonlocals_.begin(), nonlocals_.end(),
3557 nonlocals_.size(), "nonlocals_");
3558 std::cerr << os.str() << endl;
3559 }
3560
3561 if (debug_) {
3562 auto map = this->getMap();
3563 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3564 int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3565 if (! comm.is_null()) {
3566 using Teuchos::REDUCE_MAX;
3567 using Teuchos::reduceAll;
3568 reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3569 Teuchos::outArg(gblNeededGlobalAssemble));
3570 }
3571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3572 (gblNeededGlobalAssemble != 0, std::runtime_error,
3573 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3574 "least one process in the CrsGraph's communicator. This "
3575 "means either that you incorrectly set the "
3576 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3577 "or that you inserted invalid entries. "
3578 "Rerun with the environment variable TPETRA_VERBOSE="
3579 "CrsGraph set to see the entries of nonlocals_ on every "
3580 "MPI process (WARNING: lots of output).");
3581 }
3582 else {
3583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584 (lclNeededGlobalAssemble != 0, std::runtime_error,
3585 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3586 "calling process. This means either that you incorrectly "
3587 "set the \"No Nonlocal Changes\" fillComplete parameter "
3588 "to true, or that you inserted invalid entries. "
3589 "Rerun with the environment "
3590 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3591 "of nonlocals_ on every MPI process (WARNING: lots of "
3592 "output).");
3593 }
3594 }
3595
3596 // Set domain and range Map. This may clear the Import / Export
3597 // objects if the new Maps differ from any old ones.
3598 setDomainRangeMaps (domainMap, rangeMap);
3599
3600 // If the graph does not already have a column Map (either from
3601 // the user constructor calling the version of the constructor
3602 // that takes a column Map, or from a previous fillComplete call),
3603 // then create it.
3604 Teuchos::Array<int> remotePIDs (0);
3605 const bool mustBuildColMap = ! this->hasColMap ();
3606 if (mustBuildColMap) {
3607 this->makeColMap (remotePIDs); // resized on output
3608 }
3609
3610 // Make indices local, if they aren't already.
3611 // The method doesn't do any work if the indices are already local.
3612 const std::pair<size_t, std::string> makeIndicesLocalResult =
3613 this->makeIndicesLocal(verbose);
3614
3615 if (debug_) {
3617 using Teuchos::RCP;
3618 using Teuchos::REDUCE_MIN;
3619 using Teuchos::reduceAll;
3620 using Teuchos::outArg;
3621
3622 RCP<const map_type> map = this->getMap ();
3623 RCP<const Teuchos::Comm<int> > comm;
3624 if (! map.is_null ()) {
3625 comm = map->getComm ();
3626 }
3627 if (comm.is_null ()) {
3628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3629 (makeIndicesLocalResult.first != 0, std::runtime_error,
3630 makeIndicesLocalResult.second);
3631 }
3632 else {
3633 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3634 int gblSuccess = 0; // output argument
3635 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3636 if (gblSuccess != 1) {
3637 std::ostringstream os;
3638 gathervPrint (os, makeIndicesLocalResult.second, *comm);
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640 (true, std::runtime_error, os.str ());
3641 }
3642 }
3643 }
3644 else {
3645 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3646 // the error state to makeImportExport or
3647 // computeGlobalConstants, which may do all-reduces and thus may
3648 // have the opportunity to communicate that error state.
3649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3650 (makeIndicesLocalResult.first != 0, std::runtime_error,
3651 makeIndicesLocalResult.second);
3652 }
3653
3654 // If this process has no indices, then CrsGraph considers it
3655 // already trivially sorted and merged. Thus, this method need
3656 // not be called on all processes in the row Map's communicator.
3657 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3658
3659 // Make Import and Export objects, if they haven't been made
3660 // already. If we made a column Map above, reuse information from
3661 // that process to avoid communiation in the Import setup.
3662 this->makeImportExport (remotePIDs, mustBuildColMap);
3663
3664 // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3665 this->fillLocalGraph (params);
3666
3667 const bool callComputeGlobalConstants = params.get () == nullptr ||
3668 params->get ("compute global constants", true);
3669 if (callComputeGlobalConstants) {
3670 this->computeGlobalConstants ();
3671 }
3672 else {
3673 this->computeLocalConstants ();
3674 }
3675 this->fillComplete_ = true;
3676 this->checkInternalState ();
3677
3678 if (verbose) {
3679 std::ostringstream os;
3680 os << *prefix << "Done" << endl;
3681 std::cerr << os.str();
3682 }
3683 }
3684
3685
3686 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3687 void
3689 expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3690 const Teuchos::RCP<const map_type>& rangeMap,
3691 const Teuchos::RCP<const import_type>& importer,
3692 const Teuchos::RCP<const export_type>& exporter,
3693 const Teuchos::RCP<Teuchos::ParameterList>& params)
3694 {
3695 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3696#ifdef HAVE_TPETRA_MMM_TIMINGS
3697 std::string label;
3698 if(!params.is_null())
3699 label = params->get("Timer Label",label);
3700 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3701 using Teuchos::TimeMonitor;
3702 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3703#endif
3704
3705
3706 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3707 domainMap.is_null () || rangeMap.is_null (),
3708 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3710 isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3711 "call this method unless the graph has a column Map.");
3712 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3713 getNodeNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3714 std::runtime_error, "The calling process has getNodeNumRows() = "
3715 << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3716 "been set.");
3717 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3718 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getNodeNumRows () + 1,
3719 std::runtime_error, "The row offsets array has length " <<
3720 rowPtrsUnpacked_host_.extent (0) << " != getNodeNumRows()+1 = " <<
3721 (getNodeNumRows () + 1) << ".");
3722
3723 // Note: We don't need to do the following things which are normally done in fillComplete:
3724 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3725
3726 // Constants from allocateIndices
3727 //
3728 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3729 // away once the graph is allocated. expertStaticFillComplete
3730 // either presumes that the graph is allocated, or "allocates" it.
3731 //
3732 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3733 // version of CrsGraph is to allocate in the constructor, not
3734 // lazily on first insert. That will make both
3735 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3736 numAllocForAllRows_ = 0;
3737 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3738 indicesAreAllocated_ = true;
3739
3740 // Constants from makeIndicesLocal
3741 //
3742 // The graph has a column Map, so its indices had better be local.
3743 indicesAreLocal_ = true;
3744 indicesAreGlobal_ = false;
3745
3746 // set domain/range map: may clear the import/export objects
3747#ifdef HAVE_TPETRA_MMM_TIMINGS
3748 MM = Teuchos::null;
3749 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3750#endif
3751 setDomainRangeMaps (domainMap, rangeMap);
3752
3753 // Presume the user sorted and merged the arrays first
3754 indicesAreSorted_ = true;
3755 noRedundancies_ = true;
3756
3757 // makeImportExport won't create a new importer/exporter if I set one here first.
3758#ifdef HAVE_TPETRA_MMM_TIMINGS
3759 MM = Teuchos::null;
3760 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3761#endif
3762
3763 importer_ = Teuchos::null;
3764 exporter_ = Teuchos::null;
3765 if (importer != Teuchos::null) {
3766 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3767 ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3768 ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3769 std::invalid_argument,": importer does not match matrix maps.");
3770 importer_ = importer;
3771
3772 }
3773
3774#ifdef HAVE_TPETRA_MMM_TIMINGS
3775 MM = Teuchos::null;
3776 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3777#endif
3778
3779 if (exporter != Teuchos::null) {
3780 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3781 ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3782 ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3783 std::invalid_argument,": exporter does not match matrix maps.");
3784 exporter_ = exporter;
3785 }
3786
3787#ifdef HAVE_TPETRA_MMM_TIMINGS
3788 MM = Teuchos::null;
3789 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3790#endif
3791 Teuchos::Array<int> remotePIDs (0); // unused output argument
3792 this->makeImportExport (remotePIDs, false);
3793
3794#ifdef HAVE_TPETRA_MMM_TIMINGS
3795 MM = Teuchos::null;
3796 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3797#endif
3798 this->fillLocalGraph (params);
3799
3800 const bool callComputeGlobalConstants = params.get () == nullptr ||
3801 params->get ("compute global constants", true);
3802
3803 if (callComputeGlobalConstants) {
3804#ifdef HAVE_TPETRA_MMM_TIMINGS
3805 MM = Teuchos::null;
3806 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3807#endif // HAVE_TPETRA_MMM_TIMINGS
3808 this->computeGlobalConstants ();
3809 }
3810 else {
3811#ifdef HAVE_TPETRA_MMM_TIMINGS
3812 MM = Teuchos::null;
3813 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3814#endif // HAVE_TPETRA_MMM_TIMINGS
3815 this->computeLocalConstants ();
3816 }
3817
3818 fillComplete_ = true;
3819
3820#ifdef HAVE_TPETRA_MMM_TIMINGS
3821 MM = Teuchos::null;
3822 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3823#endif
3824 checkInternalState ();
3825 }
3826
3827
3828 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3829 void
3831 fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3832 {
3834 typedef decltype (k_numRowEntries_) row_entries_type;
3835 typedef typename local_graph_device_type::row_map_type row_map_type;
3836 typedef typename row_map_type::non_const_type non_const_row_map_type;
3837 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3838 const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3839 "expertStaticFillComplete): ";
3840 const size_t lclNumRows = this->getNodeNumRows ();
3841
3842 // This method's goal is to fill in the two arrays (compressed
3843 // sparse row format) that define the sparse graph's structure.
3844
3845 bool requestOptimizedStorage = true;
3846 if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3847 requestOptimizedStorage = false;
3848 }
3849
3850 // The graph's column indices are currently stored in a 1-D
3851 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3852 // in k_lclInds1D_.
3853
3854 if (debug_) {
3855 // The graph's array of row offsets must already be allocated.
3856 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3857 (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3858 "k_rowPtrs_ has size zero, but shouldn't");
3859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3860 (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3861 "rowPtrsUnpacked_host_.extent(0) = "
3862 << rowPtrsUnpacked_host_.extent (0) << " != (lclNumRows + 1) = "
3863 << (lclNumRows + 1) << ".");
3864 const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3865 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3866 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3867 (numOffsets != 0 &&
3868 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3869 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3870 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3871 << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3872 << ".");
3873 }
3874
3875 size_t allocSize = 0;
3876 try {
3877 allocSize = this->getNodeAllocationSize ();
3878 }
3879 catch (std::logic_error& e) {
3880 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3881 (true, std::logic_error, "getNodeAllocationSize threw "
3882 "std::logic_error: " << e.what ());
3883 }
3884 catch (std::runtime_error& e) {
3885 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3886 (true, std::runtime_error, "getNodeAllocationSize threw "
3887 "std::runtime_error: " << e.what ());
3888 }
3889 catch (std::exception& e) {
3890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3891 (true, std::runtime_error, "getNodeAllocationSize threw "
3892 "std::exception: " << e.what ());
3893 }
3894 catch (...) {
3895 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3896 (true, std::runtime_error, "getNodeAllocationSize threw "
3897 "an exception not a subclass of std::exception.");
3898 }
3899
3900 if (this->getNodeNumEntries () != allocSize) {
3901 // Use the nonconst version of row_map_type for ptr_d, because
3902 // the latter is const and we need to modify ptr_d here.
3903 non_const_row_map_type ptr_d;
3904 row_map_type ptr_d_const;
3905
3906 // The graph's current 1-D storage is "unpacked." This means
3907 // the row offsets may differ from what the final row offsets
3908 // should be. This could happen, for example, if the user set
3909 // an upper bound on the number of entries in each row, but
3910 // didn't fill all those entries.
3911
3912 if (debug_) {
3913 if (rowPtrsUnpacked_host_.extent (0) != 0) {
3914 const size_t numOffsets =
3915 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3916 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3918 (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3919 std::logic_error, "(Unpacked branch) Before allocating "
3920 "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3921 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3922 << lclIndsUnpacked_wdv.extent (0) << ".");
3923 }
3924 }
3925
3926 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3927 // array of valid entry counts per row (k_numRowEntries_).
3928
3929 // Total number of entries in the matrix on the calling
3930 // process. We will compute this in the loop below. It's
3931 // cheap to compute and useful as a sanity check.
3932 size_t lclTotalNumEntries = 0;
3933 {
3934 // Allocate the packed row offsets array.
3935 ptr_d =
3936 non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3937 ptr_d_const = ptr_d;
3938
3939 // It's ok that k_numRowEntries_ is a host View; the
3940 // function can handle this.
3941 typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3942 if (debug_) {
3943 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3944 (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3945 std::logic_error, "(Unpacked branch) "
3946 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3947 << " != getNodeNumRows()=" << lclNumRows << "");
3948 }
3949
3950 lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3951
3952 if (debug_) {
3953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3954 (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3955 std::logic_error, "(Unpacked branch) After allocating "
3956 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3957 << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3958 const auto valToCheck =
3959 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3961 (valToCheck != lclTotalNumEntries, std::logic_error,
3962 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3963 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3964 << ") = " << valToCheck << " != total number of entries "
3965 "on the calling process = " << lclTotalNumEntries
3966 << ".");
3967 }
3968 }
3969
3970 // Allocate the array of packed column indices.
3971 lclinds_1d_type ind_d =
3972 lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3973
3974 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3975 // them, using the packed row offsets array ptr_d that we
3976 // created above.
3977 //
3978 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3979 // CrsMatrix?), we need to keep around the unpacked row
3980 // offsets and column indices.
3981
3982 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3983 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3984 typedef pack_functor<
3985 typename local_graph_device_type::entries_type::non_const_type,
3986 typename local_inds_dualv_type::t_dev::const_type,
3987 row_map_type,
3988 typename local_graph_device_type::row_map_type> inds_packer_type;
3989 inds_packer_type f (ind_d,
3990 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3991 ptr_d, rowPtrsUnpacked_dev_);
3992 {
3993 typedef typename decltype (ind_d)::execution_space exec_space;
3994 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3995 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3996 }
3997
3998 if (debug_) {
3999 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4000 (ptr_d.extent (0) == 0, std::logic_error,
4001 "(\"Optimize Storage\"=true branch) After packing, "
4002 "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
4003 "never allocated.");
4004 if (ptr_d.extent (0) != 0) {
4005 const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4006 const auto valToCheck =
4007 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4009 (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4010 std::logic_error, "(\"Optimize Storage\"=true branch) "
4011 "After packing, ptr_d(" << (numOffsets-1) << ")="
4012 << valToCheck << " != ind_d.extent(0)="
4013 << ind_d.extent(0) << ".");
4014 }
4015 }
4016 // Build the local graph.
4017 setRowPtrsPacked(ptr_d_const);
4018 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
4019 }
4020 else { // We don't have to pack, so just set the pointers.
4021 setRowPtrsPacked(rowPtrsUnpacked_dev_);
4022 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
4023
4024 if (debug_) {
4025 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4026 (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
4027 "(\"Optimize Storage\"=false branch) "
4028 "rowPtrsPacked_dev_.extent(0) = 0. "
4029 "This probably means that "
4030 "k_rowPtrs_ was never allocated.");
4031 if (rowPtrsPacked_dev_.extent (0) != 0) {
4032 const size_t numOffsets =
4033 static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4034 const size_t valToCheck =
4035 rowPtrsPacked_host_(numOffsets - 1);
4036 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4037 (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
4038 std::logic_error, "(\"Optimize Storage\"=false branch) "
4039 "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
4040 << valToCheck
4041 << " != lclIndsPacked_wdv.extent(0)="
4042 << lclIndsPacked_wdv.extent (0) << ".");
4043 }
4044 }
4045 }
4046
4047 if (debug_) {
4048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4049 (static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
4050 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
4051 rowPtrsPacked_dev_.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4052 << ".");
4053 if (rowPtrsPacked_dev_.extent (0) != 0) {
4054 const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4055 const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
4056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4057 (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
4058 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
4059 << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
4060 << lclIndsPacked_wdv.extent (0) << ".");
4061 }
4062 }
4063
4064 if (requestOptimizedStorage) {
4065 // With optimized storage, we don't need to store
4066 // the array of row entry counts.
4067
4068 // Free graph data structures that are only needed for
4069 // unpacked 1-D storage.
4070 k_numRowEntries_ = row_entries_type ();
4071
4072 // Keep the new 1-D packed allocations.
4073 setRowPtrsUnpacked(rowPtrsPacked_dev_);
4074 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
4075
4076 storageStatus_ = Details::STORAGE_1D_PACKED;
4077 }
4078
4079 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
4080 }
4081
4082 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4083 void
4085 replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4086 {
4087 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4088 //
4089 // FIXME (mfh 18 Aug 2014) This will break if the calling process
4090 // has no entries, because in that case, currently it is neither
4091 // locally nor globally indexed. This will change once we get rid
4092 // of lazy allocation (so that the constructor allocates indices
4093 // and therefore commits to local vs. global).
4094 const char tfecfFuncName[] = "replaceColMap: ";
4095 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4096 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4097 "Requires matching maps and non-static graph.");
4098 colMap_ = newColMap;
4099 }
4100
4101 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4102 void
4104 reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4105 const Teuchos::RCP<const import_type>& newImport,
4106 const bool sortIndicesInEachRow)
4107 {
4108 using Teuchos::REDUCE_MIN;
4109 using Teuchos::reduceAll;
4110 using Teuchos::RCP;
4111 typedef GlobalOrdinal GO;
4112 typedef LocalOrdinal LO;
4113 typedef typename local_inds_dualv_type::t_host col_inds_type;
4114 const char tfecfFuncName[] = "reindexColumns: ";
4115
4116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4117 isFillComplete (), std::runtime_error, "The graph is fill complete "
4118 "(isFillComplete() returns true). You must call resumeFill() before "
4119 "you may call this method.");
4120
4121 // mfh 19 Aug 2014: This method does NOT redistribute data; it
4122 // doesn't claim to do the work of an Import or Export. This
4123 // means that for all processes, the calling process MUST own all
4124 // column indices, in both the old column Map (if it exists) and
4125 // the new column Map. We check this via an all-reduce.
4126 //
4127 // Some processes may be globally indexed, others may be locally
4128 // indexed, and others (that have no graph entries) may be
4129 // neither. This method will NOT change the graph's current
4130 // state. If it's locally indexed, it will stay that way, and
4131 // vice versa. It would easy to add an option to convert indices
4132 // from global to local, so as to save a global-to-local
4133 // conversion pass. However, we don't do this here. The intended
4134 // typical use case is that the graph already has a column Map and
4135 // is locally indexed, and this is the case for which we optimize.
4136
4137 const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4138
4139 // Attempt to convert indices to the new column Map's version of
4140 // local. This will fail if on the calling process, the graph has
4141 // indices that are not on that process in the new column Map.
4142 // After the local conversion attempt, we will do an all-reduce to
4143 // see if any processes failed.
4144
4145 // If this is false, then either the graph contains a column index
4146 // which is invalid in the CURRENT column Map, or the graph is
4147 // locally indexed but currently has no column Map. In either
4148 // case, there is no way to convert the current local indices into
4149 // global indices, so that we can convert them into the new column
4150 // Map's local indices. It's possible for this to be true on some
4151 // processes but not others, due to replaceColMap.
4152 bool allCurColIndsValid = true;
4153 // On the calling process, are all valid current column indices
4154 // also in the new column Map on the calling process? In other
4155 // words, does local reindexing suffice, or should the user have
4156 // done an Import or Export instead?
4157 bool localSuffices = true;
4158
4159 // Final arrays for the local indices. We will allocate exactly
4160 // one of these ONLY if the graph is locally indexed on the
4161 // calling process, and ONLY if the graph has one or more entries
4162 // (is not empty) on the calling process. In that case, we
4163 // allocate the first (1-D storage) if the graph has a static
4164 // profile, else we allocate the second (2-D storage).
4165 col_inds_type newLclInds1D;
4166 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4167
4168 // If indices aren't allocated, that means the calling process
4169 // owns no entries in the graph. Thus, there is nothing to
4170 // convert, and it trivially succeeds locally.
4171 if (indicesAreAllocated ()) {
4172 if (isLocallyIndexed ()) {
4173 if (hasColMap ()) { // locally indexed, and currently has a column Map
4174 const map_type& oldColMap = * (getColMap ());
4175 // Allocate storage for the new local indices.
4176 const size_t allocSize = this->getNodeAllocationSize ();
4177 newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
4178 allocSize);
4179 // Attempt to convert the new indices locally.
4180 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4181 const RowInfo rowInfo = this->getRowInfo (lclRow);
4182 const size_t beg = rowInfo.offset1D;
4183 const size_t end = beg + rowInfo.numEntries;
4184 for (size_t k = beg; k < end; ++k) {
4185 const LO oldLclCol = oldLclInds1D(k);
4186 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4187 allCurColIndsValid = false;
4188 break; // Stop at the first invalid index
4189 }
4190 const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4191
4192 // The above conversion MUST succeed. Otherwise, the
4193 // current local index is invalid, which means that
4194 // the graph was constructed incorrectly.
4195 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4196 allCurColIndsValid = false;
4197 break; // Stop at the first invalid index
4198 }
4199 else {
4200 const LO newLclCol = newColMap->getLocalElement (gblCol);
4201 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4202 localSuffices = false;
4203 break; // Stop at the first invalid index
4204 }
4205 newLclInds1D(k) = newLclCol;
4206 }
4207 } // for each entry in the current row
4208 } // for each locally owned row
4209 }
4210 else { // locally indexed, but no column Map
4211 // This case is only possible if replaceColMap() was called
4212 // with a null argument on the calling process. It's
4213 // possible, but it means that this method can't possibly
4214 // succeed, since we have no way of knowing how to convert
4215 // the current local indices to global indices.
4216 allCurColIndsValid = false;
4217 }
4218 }
4219 else { // globally indexed
4220 // If the graph is globally indexed, we don't need to save
4221 // local indices, but we _do_ need to know whether the current
4222 // global indices are valid in the new column Map. We may
4223 // need to do a getRemoteIndexList call to find this out.
4224 //
4225 // In this case, it doesn't matter whether the graph currently
4226 // has a column Map. We don't need the old column Map to
4227 // convert from global indices to the _new_ column Map's local
4228 // indices. Furthermore, we can use the same code, whether
4229 // the graph is static or dynamic profile.
4230
4231 // Test whether the current global indices are in the new
4232 // column Map on the calling process.
4233 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4234 const RowInfo rowInfo = this->getRowInfo (lclRow);
4235 auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
4236 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4237 const GO gblCol = oldGblRowView(k);
4238 if (! newColMap->isNodeGlobalElement (gblCol)) {
4239 localSuffices = false;
4240 break; // Stop at the first invalid index
4241 }
4242 } // for each entry in the current row
4243 } // for each locally owned row
4244 } // locally or globally indexed
4245 } // whether indices are allocated
4246
4247 // Do an all-reduce to check both possible error conditions.
4248 int lclSuccess[2];
4249 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4250 lclSuccess[1] = localSuffices ? 1 : 0;
4251 int gblSuccess[2];
4252 gblSuccess[0] = 0;
4253 gblSuccess[1] = 0;
4254 RCP<const Teuchos::Comm<int> > comm =
4255 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4256 if (! comm.is_null ()) {
4257 reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4258 }
4259
4260 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4261 gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4262 " The most likely reason is that the graph is locally indexed, but the "
4263 "column Map is missing (null) on some processes, due to a previous call "
4264 "to replaceColMap().");
4265
4266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4267 gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4268 "contains column indices that are in the old column Map, but not in the "
4269 "new column Map (on that process). This method does NOT redistribute "
4270 "data; it does not claim to do the work of an Import or Export operation."
4271 " This means that for all processess, the calling process MUST own all "
4272 "column indices, in both the old column Map and the new column Map. In "
4273 "this case, you will need to do an Import or Export operation to "
4274 "redistribute data.");
4275
4276 // Commit the results.
4277 if (isLocallyIndexed ()) {
4278 { // scope the device view; sortAndMergeAllIndices needs host
4279 typename local_inds_dualv_type::t_dev newLclInds1D_dev(
4280 Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
4281 Kokkos::WithoutInitializing),
4282 newLclInds1D.extent(0));
4283 Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
4284 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
4285 }
4286
4287 // We've reindexed, so we don't know if the indices are sorted.
4288 //
4289 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4290 // since we're already going through all the indices above. We
4291 // could also sort each row in place; that way, we would only
4292 // have to make one pass over the rows.
4293 indicesAreSorted_ = false;
4294 if (sortIndicesInEachRow) {
4295 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4296 // order to call this method.
4297 //
4298 // FIXME (mfh 17 Sep 2014) This violates the strong exception
4299 // guarantee. It would be better to sort the new index arrays
4300 // before committing them.
4301 const bool sorted = false; // need to resort
4302 const bool merged = true; // no need to merge, since no dups
4303 this->sortAndMergeAllIndices (sorted, merged);
4304 }
4305 }
4306 colMap_ = newColMap;
4307
4308 if (newImport.is_null ()) {
4309 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4310 // check whether the input Import is null on any process.
4311 //
4312 // If the domain Map hasn't been set yet, we can't compute a new
4313 // Import object. Leave it what it is; it should be null, but
4314 // it doesn't matter. If the domain Map _has_ been set, then
4315 // compute a new Import object if necessary.
4316 if (! domainMap_.is_null ()) {
4317 if (! domainMap_->isSameAs (* newColMap)) {
4318 importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4319 } else {
4320 importer_ = Teuchos::null; // don't need an Import
4321 }
4322 }
4323 } else {
4324 // The caller gave us an Import object. Assume that it's valid.
4325 importer_ = newImport;
4326 }
4327 }
4328
4329 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4330 void
4332 replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4333 {
4334 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4335 TEUCHOS_TEST_FOR_EXCEPTION(
4336 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4337 "this method unless the graph already has a column Map.");
4338 TEUCHOS_TEST_FOR_EXCEPTION(
4339 newDomainMap.is_null (), std::invalid_argument,
4340 prefix << "The new domain Map must be nonnull.");
4341
4342 // Create a new importer, if needed
4343 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4344 if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4345 newImporter = rcp(new import_type(newDomainMap, colMap_));
4346 }
4347 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4348 }
4349
4350 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4351 void
4353 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4354 const Teuchos::RCP<const import_type>& newImporter)
4355 {
4356 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4357 TEUCHOS_TEST_FOR_EXCEPTION(
4358 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4359 "this method unless the graph already has a column Map.");
4360 TEUCHOS_TEST_FOR_EXCEPTION(
4361 newDomainMap.is_null (), std::invalid_argument,
4362 prefix << "The new domain Map must be nonnull.");
4363
4364 if (debug_) {
4365 if (newImporter.is_null ()) {
4366 // It's not a good idea to put expensive operations in a macro
4367 // clause, even if they are side effect - free, because macros
4368 // don't promise that they won't evaluate their arguments more
4369 // than once. It's polite for them to do so, but not required.
4370 const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4371 TEUCHOS_TEST_FOR_EXCEPTION
4372 (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4373 "then the new domain Map must be the same as the current column Map.");
4374 }
4375 else {
4376 const bool colSameAsTgt =
4377 colMap_->isSameAs (* (newImporter->getTargetMap ()));
4378 const bool newDomSameAsSrc =
4379 newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4380 TEUCHOS_TEST_FOR_EXCEPTION
4381 (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4382 "new Import is nonnull, then the current column Map must be the same "
4383 "as the new Import's target Map, and the new domain Map must be the "
4384 "same as the new Import's source Map.");
4385 }
4386 }
4387
4388 domainMap_ = newDomainMap;
4389 importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4390 }
4391
4392 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4393 void
4395 replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4396 {
4397 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4398 TEUCHOS_TEST_FOR_EXCEPTION(
4399 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4400 "this method unless the graph already has a row Map.");
4401 TEUCHOS_TEST_FOR_EXCEPTION(
4402 newRangeMap.is_null (), std::invalid_argument,
4403 prefix << "The new range Map must be nonnull.");
4404
4405 // Create a new exporter, if needed
4406 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4407 if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4408 newExporter = rcp(new export_type(rowMap_, newRangeMap));
4409 }
4410 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4411 }
4412
4413 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4414 void
4416 replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4417 const Teuchos::RCP<const export_type>& newExporter)
4418 {
4419 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4420 TEUCHOS_TEST_FOR_EXCEPTION(
4421 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4422 "this method unless the graph already has a column Map.");
4423 TEUCHOS_TEST_FOR_EXCEPTION(
4424 newRangeMap.is_null (), std::invalid_argument,
4425 prefix << "The new domain Map must be nonnull.");
4426
4427 if (debug_) {
4428 if (newExporter.is_null ()) {
4429 // It's not a good idea to put expensive operations in a macro
4430 // clause, even if they are side effect - free, because macros
4431 // don't promise that they won't evaluate their arguments more
4432 // than once. It's polite for them to do so, but not required.
4433 const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4434 TEUCHOS_TEST_FOR_EXCEPTION
4435 (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4436 "then the new range Map must be the same as the current row Map.");
4437 }
4438 else {
4439 const bool newRangeSameAsTgt =
4440 newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4441 const bool rowSameAsSrc =
4442 rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4443 TEUCHOS_TEST_FOR_EXCEPTION
4444 (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4445 "new Export is nonnull, then the current row Map must be the same "
4446 "as the new Export's source Map, and the new range Map must be the "
4447 "same as the new Export's target Map.");
4448 }
4449 }
4450
4451 rangeMap_ = newRangeMap;
4452 exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4453 }
4454
4455#ifdef TPETRA_ENABLE_DEPRECATED_CODE
4456 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4459 getLocalGraph () const
4460 {
4461 return getLocalGraphDevice();
4462 }
4463#endif
4464
4465 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4468 getLocalGraphDevice () const
4469 {
4471 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4472 rowPtrsPacked_dev_);
4473 }
4474
4475 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4478 getLocalGraphHost () const
4479 {
4480 return local_graph_host_type(
4481 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4482 rowPtrsPacked_host_);
4483 }
4484
4485 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4486 void
4489 {
4490 using ::Tpetra::Details::ProfilingRegion;
4491 using Teuchos::ArrayView;
4492 using Teuchos::outArg;
4493 using Teuchos::reduceAll;
4494 typedef global_size_t GST;
4495
4496 ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4497
4498 this->computeLocalConstants ();
4499
4500 // Compute global constants from local constants. Processes that
4501 // already have local constants still participate in the
4502 // all-reduces, using their previously computed values.
4503 if (! this->haveGlobalConstants_) {
4504 const Teuchos::Comm<int>& comm = * (this->getComm ());
4505 // Promote all the nodeNum* and nodeMaxNum* quantities from
4506 // size_t to global_size_t, when doing the all-reduces for
4507 // globalNum* / globalMaxNum* results.
4508 //
4509 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4510 // this in two all-reduces (one for the sum and the other for
4511 // the max), or use a custom MPI_Op that combines the sum and
4512 // the max. The latter might even be slower than two
4513 // all-reduces on modern network hardware. It would also be a
4514 // good idea to use nonblocking all-reduces (MPI 3), so that we
4515 // don't have to wait around for the first one to finish before
4516 // starting the second one.
4517 GST lcl, gbl;
4518 lcl = static_cast<GST> (this->getNodeNumEntries ());
4519
4520 reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4521 this->globalNumEntries_ = gbl;
4522
4523 const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4524 reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4525 outArg (this->globalMaxNumRowEntries_));
4526 this->haveGlobalConstants_ = true;
4527 }
4528 }
4529
4530
4531 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4532 void
4535 {
4536 using ::Tpetra::Details::ProfilingRegion;
4537
4538 ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4539 if (this->haveLocalConstants_) {
4540 return;
4541 }
4542
4543 // Reset local properties
4544 this->nodeMaxNumRowEntries_ =
4545 Teuchos::OrdinalTraits<size_t>::invalid();
4546
4547 using LO = local_ordinal_type;
4548
4549 auto ptr = this->rowPtrsPacked_dev_;
4550 const LO lclNumRows = ptr.extent(0) == 0 ?
4551 static_cast<LO> (0) :
4552 (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4553
4554 const LO lclMaxNumRowEnt =
4555 ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4556 ptr, lclNumRows);
4557 this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4558 this->haveLocalConstants_ = true;
4559 }
4560
4561
4562 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4563 std::pair<size_t, std::string>
4565 makeIndicesLocal (const bool verbose)
4566 {
4568 using Teuchos::arcp;
4569 using Teuchos::Array;
4570 using std::endl;
4571 typedef LocalOrdinal LO;
4572 typedef GlobalOrdinal GO;
4573 typedef device_type DT;
4574 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4575 typedef decltype (k_numRowEntries_) row_entries_type;
4576 typedef typename row_entries_type::non_const_value_type num_ent_type;
4577 const char tfecfFuncName[] = "makeIndicesLocal: ";
4578 ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4579
4580 std::unique_ptr<std::string> prefix;
4581 if (verbose) {
4582 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4583 std::ostringstream os;
4584 os << *prefix << "lclNumRows: " << getNodeNumRows() << endl;
4585 std::cerr << os.str();
4586 }
4587
4588 // These are somewhat global properties, so it's safe to have
4589 // exception checks for them, rather than returning an error code.
4590 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4591 (! this->hasColMap (), std::logic_error, "The graph does not have a "
4592 "column Map yet. This method should never be called in that case. "
4593 "Please report this bug to the Tpetra developers.");
4594 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4595 (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4596 "that it has a column Map, because hasColMap() returns true. However, "
4597 "the result of getColMap() is null. This should never happen. Please "
4598 "report this bug to the Tpetra developers.");
4599
4600 // Return value 1: The number of column indices (counting
4601 // duplicates) that could not be converted to local indices,
4602 // because they were not in the column Map on the calling process.
4603 size_t lclNumErrs = 0;
4604 std::ostringstream errStrm; // for return value 2 (error string)
4605
4606 const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4607 const map_type& colMap = * (this->getColMap ());
4608
4609 if (this->isGloballyIndexed () && lclNumRows != 0) {
4610 // This is a host-accessible View.
4611 typename row_entries_type::const_type h_numRowEnt =
4612 this->k_numRowEntries_;
4613
4614 // Allocate space for local indices.
4615 if (rowPtrsUnpacked_host_.extent (0) == 0) {
4616 errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4617 "happen here. Please report this bug to the Tpetra developers."
4618 << endl;
4619 // Need to return early.
4620 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4621 errStrm.str ());
4622 }
4623 const auto numEnt = rowPtrsUnpacked_host_(lclNumRows);
4624
4625 // mfh 17 Dec 2016: We don't need initial zero-fill of
4626 // lclIndsUnpacked_wdv, because we will fill it below anyway.
4627 // AllowPadding would only help for aligned access (e.g.,
4628 // for vectorization) if we also were to pad each row to the
4629 // same alignment, so we'll skip AllowPadding for now.
4630
4631 // using Kokkos::AllowPadding;
4632 using Kokkos::view_alloc;
4633 using Kokkos::WithoutInitializing;
4634
4635 // When giving the label as an argument to
4636 // Kokkos::view_alloc, the label must be a string and not a
4637 // char*, else the code won't compile. This is because
4638 // view_alloc also allows a raw pointer as its first
4639 // argument. See
4640 // https://github.com/kokkos/kokkos/issues/434. This is a
4641 // large allocation typically, so the overhead of creating
4642 // an std::string is minor.
4643 const std::string label ("Tpetra::CrsGraph::lclInd");
4644 if (verbose) {
4645 std::ostringstream os;
4646 os << *prefix << "(Re)allocate lclInd_wdv: old="
4647 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4648 std::cerr << os.str();
4649 }
4650
4651 local_inds_dualv_type lclInds_dualv =
4652 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4653 numEnt);
4654 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4655
4656 auto lclColMap = colMap.getLocalMap ();
4657 // This is a "device mirror" of the host View h_numRowEnt.
4658 //
4659 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4660 // Device instance is to use its default constructor. See the
4661 // following Kokkos issue:
4662 //
4663 // https://github.com/kokkos/kokkos/issues/442
4664 if (verbose) {
4665 std::ostringstream os;
4666 os << *prefix << "Allocate device mirror k_numRowEnt: "
4667 << h_numRowEnt.extent(0) << endl;
4668 std::cerr << os.str();
4669 }
4670 auto k_numRowEnt =
4671 Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4672
4674 lclNumErrs =
4675 convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4676 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4677 gblInds_wdv.getDeviceView(Access::ReadOnly),
4678 rowPtrsUnpacked_dev_,
4679 lclColMap,
4680 k_numRowEnt);
4681 if (lclNumErrs != 0) {
4682 const int myRank = [this] () {
4683 auto map = this->getMap ();
4684 if (map.is_null ()) {
4685 return 0;
4686 }
4687 else {
4688 auto comm = map->getComm ();
4689 return comm.is_null () ? 0 : comm->getRank ();
4690 }
4691 } ();
4692 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4693 errStrm << "(Process " << myRank << ") When converting column "
4694 "indices from global to local, we encountered " << lclNumErrs
4695 << " ind" << (pluralNumErrs ? "ices" : "ex")
4696 << " that do" << (pluralNumErrs ? "es" : "")
4697 << " not live in the column Map on this process." << endl;
4698 }
4699
4700 // We've converted column indices from global to local, so we
4701 // can deallocate the global column indices (which we know are
4702 // in 1-D storage, because the graph has static profile).
4703 if (verbose) {
4704 std::ostringstream os;
4705 os << *prefix << "Free gblInds_wdv: "
4706 << gblInds_wdv.extent(0) << endl;
4707 std::cerr << os.str();
4708 }
4709 gblInds_wdv = global_inds_wdv_type ();
4710 } // globallyIndexed() && lclNumRows > 0
4711
4712 this->indicesAreLocal_ = true;
4713 this->indicesAreGlobal_ = false;
4714 this->checkInternalState ();
4715
4716 return std::make_pair (lclNumErrs, errStrm.str ());
4717 }
4718
4719 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4720 void
4722 makeColMap (Teuchos::Array<int>& remotePIDs)
4723 {
4725 using std::endl;
4726 const char tfecfFuncName[] = "makeColMap";
4727
4728 ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4729 std::unique_ptr<std::string> prefix;
4730 if (verbose_) {
4731 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4732 std::ostringstream os;
4733 os << *prefix << "Start" << endl;
4734 std::cerr << os.str();
4735 }
4736
4737 // this->colMap_ should be null at this point, but we accept the
4738 // future possibility that it might not be (esp. if we decide
4739 // later to support graph structure changes after first
4740 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4741 // 2017) support).
4742 Teuchos::RCP<const map_type> colMap = this->colMap_;
4743 const bool sortEachProcsGids =
4744 this->sortGhostsAssociatedWithEachProcessor_;
4745
4746 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4747 // per-process error code. If an error does occur on a process,
4748 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4749 // notice that error. This is the caller's responsibility. For
4750 // now, we only propagate (to all processes) and report the error
4751 // in debug mode. In the future, we need to add the local/global
4752 // error handling scheme used in BlockCrsMatrix to this class.
4753 if (debug_) {
4754 using Teuchos::outArg;
4755 using Teuchos::REDUCE_MIN;
4756 using Teuchos::reduceAll;
4757
4758 std::ostringstream errStrm;
4759 const int lclErrCode =
4760 Details::makeColMap (colMap, remotePIDs,
4761 getDomainMap (), *this, sortEachProcsGids, &errStrm);
4762 auto comm = this->getComm ();
4763 if (! comm.is_null ()) {
4764 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4765 int gblSuccess = 0; // output argument
4766 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4767 outArg (gblSuccess));
4768 if (gblSuccess != 1) {
4769 std::ostringstream os;
4770 Details::gathervPrint (os, errStrm.str (), *comm);
4771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4772 (true, std::runtime_error, ": An error happened on at "
4773 "least one process in the CrsGraph's communicator. "
4774 "Here are all processes' error messages:" << std::endl
4775 << os.str ());
4776 }
4777 }
4778 }
4779 else {
4780 (void) Details::makeColMap (colMap, remotePIDs,
4781 getDomainMap (), *this, sortEachProcsGids, nullptr);
4782 }
4783 // See above. We want to admit the possibility of makeColMap
4784 // actually revising an existing column Map, even though that
4785 // doesn't currently (as of 10 May 2017) happen.
4786 this->colMap_ = colMap;
4787
4788 checkInternalState ();
4789 if (verbose_) {
4790 std::ostringstream os;
4791 os << *prefix << "Done" << endl;
4792 std::cerr << os.str();
4793 }
4794 }
4795
4796
4797 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4798 void
4800 sortAndMergeAllIndices (const bool sorted, const bool merged)
4801 {
4802 using std::endl;
4803 using LO = LocalOrdinal;
4804 using host_execution_space =
4805 typename Kokkos::View<LO*, device_type>::HostMirror::
4806 execution_space;
4807 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4808 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4809 Details::ProfilingRegion regionSortAndMerge
4810 ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4811
4812 std::unique_ptr<std::string> prefix;
4813 if (verbose_) {
4814 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4815 std::ostringstream os;
4816 os << *prefix << "Start: "
4817 << "sorted=" << (sorted ? "true" : "false")
4818 << ", merged=" << (merged ? "true" : "false") << endl;
4819 std::cerr << os.str();
4820 }
4821 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4822 (this->isGloballyIndexed(), std::logic_error,
4823 "This method may only be called after makeIndicesLocal." );
4824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4825 (! merged && this->isStorageOptimized(), std::logic_error,
4826 "The graph is already storage optimized, so we shouldn't be "
4827 "merging any indices. "
4828 "Please report this bug to the Tpetra developers.");
4829
4830 if (! sorted || ! merged) {
4831 const LO lclNumRows(this->getNodeNumRows());
4832 auto range = range_type(0, lclNumRows);
4833
4834 if (verbose_) {
4835 size_t totalNumDups = 0;
4836 Kokkos::parallel_reduce(range,
4837 [this, sorted, merged] (const LO lclRow, size_t& numDups)
4838 {
4839 const RowInfo rowInfo = this->getRowInfo(lclRow);
4840 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4841 },
4842 totalNumDups);
4843 std::ostringstream os;
4844 os << *prefix << "totalNumDups=" << totalNumDups << endl;
4845 std::cerr << os.str();
4846 }
4847 else {
4848 Kokkos::parallel_for(range,
4849 [this, sorted, merged] (const LO lclRow)
4850 {
4851 const RowInfo rowInfo = this->getRowInfo(lclRow);
4852 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4853 });
4854 }
4855 this->indicesAreSorted_ = true; // we just sorted every row
4856 this->noRedundancies_ = true; // we just merged every row
4857 }
4858
4859 if (verbose_) {
4860 std::ostringstream os;
4861 os << *prefix << "Done" << endl;
4862 std::cerr << os.str();
4863 }
4864 }
4865
4866 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4867 void
4869 makeImportExport (Teuchos::Array<int>& remotePIDs,
4870 const bool useRemotePIDs)
4871 {
4872 using ::Tpetra::Details::ProfilingRegion;
4873 using Teuchos::ParameterList;
4874 using Teuchos::RCP;
4875 using Teuchos::rcp;
4876 const char tfecfFuncName[] = "makeImportExport: ";
4877 ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4878
4879 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4880 (! this->hasColMap (), std::logic_error,
4881 "This method may not be called unless the graph has a column Map.");
4882 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4883
4884 // Don't do any checks to see if we need to create the Import, if
4885 // it exists already.
4886 //
4887 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4888 // change CrsGraph in the future to allow changing the column
4889 // Map after fillComplete. For now, the column Map is fixed
4890 // after the first fillComplete call.
4891 if (importer_.is_null ()) {
4892 // Create the Import instance if necessary.
4893 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4894 if (params.is_null () || ! params->isSublist ("Import")) {
4895 if (useRemotePIDs) {
4896 importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4897 }
4898 else {
4899 importer_ = rcp (new import_type (domainMap_, colMap_));
4900 }
4901 }
4902 else {
4903 RCP<ParameterList> importSublist = sublist (params, "Import", true);
4904 if (useRemotePIDs) {
4905 RCP<import_type> newImp =
4906 rcp (new import_type (domainMap_, colMap_, remotePIDs,
4907 importSublist));
4908 importer_ = newImp;
4909 }
4910 else {
4911 importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4912 }
4913 }
4914 }
4915 }
4916
4917 // Don't do any checks to see if we need to create the Export, if
4918 // it exists already.
4919 if (exporter_.is_null ()) {
4920 // Create the Export instance if necessary.
4921 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4922 if (params.is_null () || ! params->isSublist ("Export")) {
4923 exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4924 }
4925 else {
4926 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4927 exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4928 }
4929 }
4930 }
4931 }
4932
4933
4934 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4935 std::string
4937 description () const
4938 {
4939 std::ostringstream oss;
4940 oss << dist_object_type::description ();
4941 if (isFillComplete ()) {
4942 oss << "{status = fill complete"
4943 << ", global rows = " << getGlobalNumRows()
4944 << ", global cols = " << getGlobalNumCols()
4945 << ", global num entries = " << getGlobalNumEntries()
4946 << "}";
4947 }
4948 else {
4949 oss << "{status = fill not complete"
4950 << ", global rows = " << getGlobalNumRows()
4951 << "}";
4952 }
4953 return oss.str();
4954 }
4955
4956
4957 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4958 void
4960 describe (Teuchos::FancyOStream &out,
4961 const Teuchos::EVerbosityLevel verbLevel) const
4962 {
4963 using Teuchos::ArrayView;
4964 using Teuchos::Comm;
4965 using Teuchos::RCP;
4966 using Teuchos::VERB_DEFAULT;
4967 using Teuchos::VERB_NONE;
4968 using Teuchos::VERB_LOW;
4969 using Teuchos::VERB_MEDIUM;
4970 using Teuchos::VERB_HIGH;
4971 using Teuchos::VERB_EXTREME;
4972 using std::endl;
4973 using std::setw;
4974
4975 Teuchos::EVerbosityLevel vl = verbLevel;
4976 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4977 RCP<const Comm<int> > comm = this->getComm();
4978 const int myImageID = comm->getRank(),
4979 numImages = comm->getSize();
4980 size_t width = 1;
4981 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4982 ++width;
4983 }
4984 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4985 Teuchos::OSTab tab (out);
4986 // none: print nothing
4987 // low: print O(1) info from node 0
4988 // medium: print O(P) info, num entries per node
4989 // high: print O(N) info, num entries per row
4990 // extreme: print O(NNZ) info: print graph indices
4991 //
4992 // for medium and higher, print constituent objects at specified verbLevel
4993 if (vl != VERB_NONE) {
4994 if (myImageID == 0) out << this->description() << std::endl;
4995 // O(1) globals, minus what was already printed by description()
4996 if (isFillComplete() && myImageID == 0) {
4997 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4998 }
4999 // constituent objects
5000 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5001 if (myImageID == 0) out << "\nRow map: " << std::endl;
5002 rowMap_->describe(out,vl);
5003 if (colMap_ != Teuchos::null) {
5004 if (myImageID == 0) out << "\nColumn map: " << std::endl;
5005 colMap_->describe(out,vl);
5006 }
5007 if (domainMap_ != Teuchos::null) {
5008 if (myImageID == 0) out << "\nDomain map: " << std::endl;
5009 domainMap_->describe(out,vl);
5010 }
5011 if (rangeMap_ != Teuchos::null) {
5012 if (myImageID == 0) out << "\nRange map: " << std::endl;
5013 rangeMap_->describe(out,vl);
5014 }
5015 }
5016 // O(P) data
5017 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5018 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5019 if (myImageID == imageCtr) {
5020 out << "Node ID = " << imageCtr << std::endl
5021 << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5022 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5023 if (! indicesAreAllocated ()) {
5024 out << "Indices are not allocated." << std::endl;
5025 }
5026 }
5027 comm->barrier();
5028 comm->barrier();
5029 comm->barrier();
5030 }
5031 }
5032 // O(N) and O(NNZ) data
5033 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5034 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5035 if (myImageID == imageCtr) {
5036 out << std::setw(width) << "Node ID"
5037 << std::setw(width) << "Global Row"
5038 << std::setw(width) << "Num Entries";
5039 if (vl == VERB_EXTREME) {
5040 out << " Entries";
5041 }
5042 out << std::endl;
5043 const LocalOrdinal lclNumRows =
5044 static_cast<LocalOrdinal> (this->getNodeNumRows ());
5045 for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5046 const RowInfo rowinfo = this->getRowInfo (r);
5047 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5048 out << std::setw(width) << myImageID
5049 << std::setw(width) << gid
5050 << std::setw(width) << rowinfo.numEntries;
5051 if (vl == VERB_EXTREME) {
5052 out << " ";
5053 if (isGloballyIndexed()) {
5054 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
5055 for (size_t j=0; j < rowinfo.numEntries; ++j){
5056 GlobalOrdinal colgid = rowview[j] + rowinfo.offset1D;
5057 out << colgid << " ";
5058 }
5059 }
5060 else if (isLocallyIndexed()) {
5061 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5062 for (size_t j=0; j < rowinfo.numEntries; ++j) {
5063 LocalOrdinal collid = rowview[j] + rowinfo.offset1D;
5064 out << colMap_->getGlobalElement(collid) << " ";
5065 }
5066 }
5067 }
5068 out << std::endl;
5069 }
5070 }
5071 comm->barrier();
5072 comm->barrier();
5073 comm->barrier();
5074 }
5075 }
5076 }
5077 }
5078
5079
5080 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5081 bool
5083 checkSizes (const SrcDistObject& /* source */)
5084 {
5085 // It's not clear what kind of compatibility checks on sizes can
5086 // be performed here. Epetra_CrsGraph doesn't check any sizes for
5087 // compatibility.
5088 return true;
5089 }
5090
5091 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5092 void
5095 (const SrcDistObject& source,
5096 const size_t numSameIDs,
5097 const Kokkos::DualView<const local_ordinal_type*,
5098 buffer_device_type>& permuteToLIDs,
5099 const Kokkos::DualView<const local_ordinal_type*,
5100 buffer_device_type>& permuteFromLIDs,
5101 const CombineMode /*CM*/)
5102 {
5103 using std::endl;
5104 using LO = local_ordinal_type;
5105 using GO = global_ordinal_type;
5106 using this_type = CrsGraph<LO, GO, node_type>;
5107 const char tfecfFuncName[] = "copyAndPermute: ";
5108 const bool verbose = verbose_;
5109
5110 std::unique_ptr<std::string> prefix;
5111 if (verbose) {
5112 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
5113 std::ostringstream os;
5114 os << *prefix << endl;
5115 std::cerr << os.str ();
5116 }
5117
5118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5119 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5120 std::runtime_error, "permuteToLIDs.extent(0) = "
5121 << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
5122 << permuteFromLIDs.extent (0) << ".");
5123
5124 // We know from checkSizes that the source object is a
5125 // row_graph_type, so we don't need to check again.
5126 const row_graph_type& srcRowGraph =
5127 dynamic_cast<const row_graph_type&> (source);
5128
5129 if (verbose) {
5130 std::ostringstream os;
5131 os << *prefix << "Compute padding" << endl;
5132 std::cerr << os.str ();
5133 }
5134 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
5135 permuteToLIDs, permuteFromLIDs, verbose);
5136 applyCrsPadding(*padding, verbose);
5137
5138 // If the source object is actually a CrsGraph, we can use view
5139 // mode instead of copy mode to access the entries in each row,
5140 // if the graph is not fill complete.
5141 const this_type* srcCrsGraph =
5142 dynamic_cast<const this_type*> (&source);
5143
5144 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
5145 const map_type& tgtRowMap = *(getRowMap());
5146 const bool src_filled = srcRowGraph.isFillComplete();
5147 nonconst_global_inds_host_view_type row_copy;
5148 LO myid = 0;
5149
5150 //
5151 // "Copy" part of "copy and permute."
5152 //
5153 if (src_filled || srcCrsGraph == nullptr) {
5154 if (verbose) {
5155 std::ostringstream os;
5156 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5157 std::cerr << os.str ();
5158 }
5159 // If the source graph is fill complete, we can't use view mode,
5160 // because the data might be stored in a different format not
5161 // compatible with the expectations of view mode. Also, if the
5162 // source graph is not a CrsGraph, we can't use view mode,
5163 // because RowGraph only provides copy mode access to the data.
5164 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5165 const GO gid = srcRowMap.getGlobalElement (myid);
5166 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5167 Kokkos::resize(row_copy,row_length);
5168 size_t check_row_length = 0;
5169 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
5170 this->insertGlobalIndices (gid, row_length, row_copy.data());
5171 }
5172 } else {
5173 if (verbose) {
5174 std::ostringstream os;
5175 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5176 std::cerr << os.str ();
5177 }
5178 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5179 const GO gid = srcRowMap.getGlobalElement (myid);
5180 global_inds_host_view_type row;
5181 srcCrsGraph->getGlobalRowView (gid, row);
5182 this->insertGlobalIndices (gid, row.extent(0), row.data());
5183 }
5184 }
5185
5186 //
5187 // "Permute" part of "copy and permute."
5188 //
5189 auto permuteToLIDs_h = permuteToLIDs.view_host ();
5190 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5191
5192 if (src_filled || srcCrsGraph == nullptr) {
5193 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5194 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5195 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5196 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5197 Kokkos::resize(row_copy,row_length);
5198 size_t check_row_length = 0;
5199 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
5200 this->insertGlobalIndices (mygid, row_length, row_copy.data());
5201 }
5202 } else {
5203 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5204 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5205 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5206 global_inds_host_view_type row;
5207 srcCrsGraph->getGlobalRowView (srcgid, row);
5208 this->insertGlobalIndices (mygid, row.extent(0), row.data());
5209 }
5210 }
5211
5212 if (verbose) {
5213 std::ostringstream os;
5214 os << *prefix << "Done" << endl;
5215 std::cerr << os.str ();
5216 }
5217 }
5218
5219 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5220 void
5221 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5222 applyCrsPadding(const padding_type& padding,
5223 const bool verbose)
5224 {
5225 using Details::ProfilingRegion;
5227 using std::endl;
5228 using LO = local_ordinal_type;
5229 using row_ptrs_type =
5230 typename local_graph_device_type::row_map_type::non_const_type;
5231 using range_policy =
5232 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5233 const char tfecfFuncName[] = "applyCrsPadding";
5234 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
5235
5236 std::unique_ptr<std::string> prefix;
5237 if (verbose) {
5238 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5239 std::ostringstream os;
5240 os << *prefix << "padding: ";
5241 padding.print(os);
5242 os << endl;
5243 std::cerr << os.str();
5244 }
5245 const int myRank = ! verbose ? -1 : [&] () {
5246 auto map = this->getMap();
5247 if (map.is_null()) {
5248 return -1;
5249 }
5250 auto comm = map->getComm();
5251 if (comm.is_null()) {
5252 return -1;
5253 }
5254 return comm->getRank();
5255 } ();
5256
5257 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
5258 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
5259 // size needs to increase. That should be the job of
5260 // padCrsArrays.
5261
5262 // Assume global indexing we don't have any indices yet
5263 if (! indicesAreAllocated()) {
5264 if (verbose) {
5265 std::ostringstream os;
5266 os << *prefix << "Call allocateIndices" << endl;
5267 std::cerr << os.str();
5268 }
5269 allocateIndices(GlobalIndices, verbose);
5270 }
5271 TEUCHOS_ASSERT( indicesAreAllocated() );
5272
5273 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5274 // would use it directly.
5275
5276 if (verbose) {
5277 std::ostringstream os;
5278 os << *prefix << "Allocate row_ptrs_beg: "
5279 << rowPtrsUnpacked_dev_.extent(0) << endl;
5280 std::cerr << os.str();
5281 }
5282 using Kokkos::view_alloc;
5283 using Kokkos::WithoutInitializing;
5284 row_ptrs_type row_ptrs_beg(
5285 view_alloc("row_ptrs_beg", WithoutInitializing),
5286 rowPtrsUnpacked_dev_.extent(0));
5287 Kokkos::deep_copy(row_ptrs_beg, rowPtrsUnpacked_dev_);
5288
5289 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
5290 size_t(row_ptrs_beg.extent(0) - 1);
5291 if (verbose) {
5292 std::ostringstream os;
5293 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
5294 std::cerr << os.str();
5295 }
5296 row_ptrs_type row_ptrs_end(
5297 view_alloc("row_ptrs_end", WithoutInitializing), N);
5298 row_ptrs_type num_row_entries;
5299
5300 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5301 if (refill_num_row_entries) { // Case 1: Unpacked storage
5302 // We can't assume correct *this capture until C++17, and it's
5303 // likely more efficient just to capture what we need anyway.
5304 num_row_entries =
5305 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
5306 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
5307 Kokkos::parallel_for
5308 ("Fill end row pointers", range_policy(0, N),
5309 KOKKOS_LAMBDA (const size_t i) {
5310 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5311 });
5312 }
5313 else {
5314 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5315 // storage, we don't need row_ptr_end to be separate allocation;
5316 // could just have it alias row_ptr_beg+1.
5317 Kokkos::parallel_for
5318 ("Fill end row pointers", range_policy(0, N),
5319 KOKKOS_LAMBDA (const size_t i) {
5320 row_ptrs_end(i) = row_ptrs_beg(i+1);
5321 });
5322 }
5323
5324 if (isGloballyIndexed()) {
5325 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5326 padding, myRank, verbose);
5327 }
5328 else {
5329 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5330 padding, myRank, verbose);
5331 }
5332
5333 if (refill_num_row_entries) {
5334 Kokkos::parallel_for
5335 ("Fill num entries", range_policy(0, N),
5336 KOKKOS_LAMBDA (const size_t i) {
5337 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5338 });
5339 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5340 }
5341 if (verbose) {
5342 std::ostringstream os;
5343 os << *prefix << "Reassign k_rowPtrs_; old size: "
5344 << rowPtrsUnpacked_dev_.extent(0) << ", new size: "
5345 << row_ptrs_beg.extent(0) << endl;
5346 std::cerr << os.str();
5347 TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
5348 }
5349
5350 setRowPtrsUnpacked(row_ptrs_beg);
5351
5352 set_need_sync_host_uvm_access(); // need fence before host UVM access of k_rowPtrs_
5353 }
5354
5355 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5356 std::unique_ptr<
5357 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5358 >
5359 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5360 computeCrsPadding(
5361 const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5362 const size_t numSameIDs,
5363 const Kokkos::DualView<const local_ordinal_type*,
5364 buffer_device_type>& permuteToLIDs,
5365 const Kokkos::DualView<const local_ordinal_type*,
5366 buffer_device_type>& permuteFromLIDs,
5367 const bool verbose) const
5368 {
5369 using LO = local_ordinal_type;
5370 using std::endl;
5371
5372 std::unique_ptr<std::string> prefix;
5373 if (verbose) {
5374 prefix = this->createPrefix("CrsGraph",
5375 "computeCrsPadding(same & permute)");
5376 std::ostringstream os;
5377 os << *prefix << "{numSameIDs: " << numSameIDs
5378 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5379 << endl;
5380 std::cerr << os.str();
5381 }
5382
5383 const int myRank = [&] () {
5384 auto comm = rowMap_.is_null() ? Teuchos::null :
5385 rowMap_->getComm();
5386 return comm.is_null() ? -1 : comm->getRank();
5387 } ();
5388 std::unique_ptr<padding_type> padding(
5389 new padding_type(myRank, numSameIDs,
5390 permuteFromLIDs.extent(0)));
5391
5392 computeCrsPaddingForSameIDs(*padding, source,
5393 static_cast<LO>(numSameIDs));
5394 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5395 permuteFromLIDs);
5396 return padding;
5397 }
5398
5399 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5400 void
5401 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5402 computeCrsPaddingForSameIDs(
5403 padding_type& padding,
5404 const RowGraph<local_ordinal_type, global_ordinal_type,
5405 node_type>& source,
5406 const local_ordinal_type numSameIDs) const
5407 {
5408 using LO = local_ordinal_type;
5409 using GO = global_ordinal_type;
5410 using Details::Impl::getRowGraphGlobalRow;
5411 using std::endl;
5412 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5413
5414 std::unique_ptr<std::string> prefix;
5415 const bool verbose = verbose_;
5416 if (verbose) {
5417 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5418 std::ostringstream os;
5419 os << *prefix << "numSameIDs: " << numSameIDs << endl;
5420 std::cerr << os.str();
5421 }
5422
5423 if (numSameIDs == 0) {
5424 return;
5425 }
5426
5427 const map_type& srcRowMap = *(source.getRowMap());
5428 const map_type& tgtRowMap = *rowMap_;
5429 using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5430 const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5431 const bool src_is_unique =
5432 srcCrs == nullptr ? false : srcCrs->isMerged();
5433 const bool tgt_is_unique = this->isMerged();
5434
5435 std::vector<GO> srcGblColIndsScratch;
5436 std::vector<GO> tgtGblColIndsScratch;
5437
5438 execute_sync_host_uvm_access(); // protect host UVM access
5439 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5440 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5441 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5442 auto srcGblColInds = getRowGraphGlobalRow(
5443 srcGblColIndsScratch, source, srcGblRowInd);
5444 auto tgtGblColInds = getRowGraphGlobalRow(
5445 tgtGblColIndsScratch, *this, tgtGblRowInd);
5446 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5447 tgtGblColInds.size(), tgt_is_unique,
5448 srcGblColInds.getRawPtr(),
5449 srcGblColInds.size(), src_is_unique);
5450 }
5451 if (verbose) {
5452 std::ostringstream os;
5453 os << *prefix << "Done" << endl;
5454 std::cerr << os.str();
5455 }
5456 }
5457
5458 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5459 void
5460 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5461 computeCrsPaddingForPermutedIDs(
5462 padding_type& padding,
5463 const RowGraph<local_ordinal_type, global_ordinal_type,
5464 node_type>& source,
5465 const Kokkos::DualView<const local_ordinal_type*,
5466 buffer_device_type>& permuteToLIDs,
5467 const Kokkos::DualView<const local_ordinal_type*,
5468 buffer_device_type>& permuteFromLIDs) const
5469 {
5470 using LO = local_ordinal_type;
5471 using GO = global_ordinal_type;
5472 using Details::Impl::getRowGraphGlobalRow;
5473 using std::endl;
5474 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5475
5476 std::unique_ptr<std::string> prefix;
5477 const bool verbose = verbose_;
5478 if (verbose) {
5479 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5480 std::ostringstream os;
5481 os << *prefix << "permuteToLIDs.extent(0): "
5482 << permuteToLIDs.extent(0)
5483 << ", permuteFromLIDs.extent(0): "
5484 << permuteFromLIDs.extent(0) << endl;
5485 std::cerr << os.str();
5486 }
5487
5488 if (permuteToLIDs.extent(0) == 0) {
5489 return;
5490 }
5491
5492 const map_type& srcRowMap = *(source.getRowMap());
5493 const map_type& tgtRowMap = *rowMap_;
5494 using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5495 const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5496 const bool src_is_unique =
5497 srcCrs == nullptr ? false : srcCrs->isMerged();
5498 const bool tgt_is_unique = this->isMerged();
5499
5500 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5501 auto permuteToLIDs_h = permuteToLIDs.view_host();
5502 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5503 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5504
5505 std::vector<GO> srcGblColIndsScratch;
5506 std::vector<GO> tgtGblColIndsScratch;
5507 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5508
5509 execute_sync_host_uvm_access(); // protect host UVM access
5510 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5511 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5512 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5513 auto srcGblColInds = getRowGraphGlobalRow(
5514 srcGblColIndsScratch, source, srcGblRowInd);
5515 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5516 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5517 auto tgtGblColInds = getRowGraphGlobalRow(
5518 tgtGblColIndsScratch, *this, tgtGblRowInd);
5519 padding.update_permute(whichPermute, tgtLclRowInd,
5520 tgtGblColInds.getRawPtr(),
5521 tgtGblColInds.size(), tgt_is_unique,
5522 srcGblColInds.getRawPtr(),
5523 srcGblColInds.size(), src_is_unique);
5524 }
5525
5526 if (verbose) {
5527 std::ostringstream os;
5528 os << *prefix << "Done" << endl;
5529 std::cerr << os.str();
5530 }
5531 }
5532
5533 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5534 std::unique_ptr<
5535 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5536 >
5537 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5538 computeCrsPaddingForImports(
5539 const Kokkos::DualView<const local_ordinal_type*,
5540 buffer_device_type>& importLIDs,
5541 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5542 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5543 const bool verbose) const
5544 {
5545 using Details::Impl::getRowGraphGlobalRow;
5546 using std::endl;
5547 using LO = local_ordinal_type;
5548 using GO = global_ordinal_type;
5549 const char tfecfFuncName[] = "computeCrsPaddingForImports";
5550
5551 std::unique_ptr<std::string> prefix;
5552 if (verbose) {
5553 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5554 std::ostringstream os;
5555 os << *prefix << "importLIDs.extent(0): "
5556 << importLIDs.extent(0)
5557 << ", imports.extent(0): "
5558 << imports.extent(0)
5559 << ", numPacketsPerLID.extent(0): "
5560 << numPacketsPerLID.extent(0) << endl;
5561 std::cerr << os.str();
5562 }
5563
5564 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5565 const int myRank = [&] () {
5566 auto comm = rowMap_.is_null() ? Teuchos::null :
5567 rowMap_->getComm();
5568 return comm.is_null() ? -1 : comm->getRank();
5569 } ();
5570 std::unique_ptr<padding_type> padding(
5571 new padding_type(myRank, numImports));
5572
5573 if (imports.need_sync_host()) {
5574 imports.sync_host();
5575 }
5576 auto imports_h = imports.view_host();
5577 if (numPacketsPerLID.need_sync_host ()) {
5578 numPacketsPerLID.sync_host();
5579 }
5580 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5581
5582 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5583 auto importLIDs_h = importLIDs.view_host();
5584
5585 const map_type& tgtRowMap = *rowMap_;
5586 // Always merge source column indices, since isMerged() is
5587 // per-process state, and we don't know its value on other
5588 // processes that sent us data.
5589 constexpr bool src_is_unique = false;
5590 const bool tgt_is_unique = isMerged();
5591
5592 std::vector<GO> tgtGblColIndsScratch;
5593 size_t offset = 0;
5594 execute_sync_host_uvm_access(); // protect host UVM access
5595 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5596 // CrsGraph packs just global column indices, while CrsMatrix
5597 // packs bytes (first the number of entries in the row, then the
5598 // global column indices, then other stuff like the matrix
5599 // values in that row).
5600 const LO origSrcNumEnt =
5601 static_cast<LO>(numPacketsPerLID_h[whichImport]);
5602 GO* const srcGblColInds = imports_h.data() + offset;
5603
5604 const LO tgtLclRowInd = importLIDs_h[whichImport];
5605 const GO tgtGblRowInd =
5606 tgtRowMap.getGlobalElement(tgtLclRowInd);
5607 auto tgtGblColInds = getRowGraphGlobalRow(
5608 tgtGblColIndsScratch, *this, tgtGblRowInd);
5609 const size_t origTgtNumEnt(tgtGblColInds.size());
5610
5611 padding->update_import(whichImport, tgtLclRowInd,
5612 tgtGblColInds.getRawPtr(),
5613 origTgtNumEnt, tgt_is_unique,
5614 srcGblColInds,
5615 origSrcNumEnt, src_is_unique);
5616 offset += origSrcNumEnt;
5617 }
5618
5619 if (verbose) {
5620 std::ostringstream os;
5621 os << *prefix << "Done" << endl;
5622 std::cerr << os.str();
5623 }
5624 return padding;
5625 }
5626
5627 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5628 std::unique_ptr<
5629 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5630 >
5631 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5632 computePaddingForCrsMatrixUnpack(
5633 const Kokkos::DualView<const local_ordinal_type*,
5634 buffer_device_type>& importLIDs,
5635 Kokkos::DualView<char*, buffer_device_type> imports,
5636 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5637 const bool verbose) const
5638 {
5639 using Details::Impl::getRowGraphGlobalRow;
5640 using Details::PackTraits;
5641 using std::endl;
5642 using LO = local_ordinal_type;
5643 using GO = global_ordinal_type;
5644 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5645
5646 std::unique_ptr<std::string> prefix;
5647 if (verbose) {
5648 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5649 std::ostringstream os;
5650 os << *prefix << "importLIDs.extent(0): "
5651 << importLIDs.extent(0)
5652 << ", imports.extent(0): "
5653 << imports.extent(0)
5654 << ", numPacketsPerLID.extent(0): "
5655 << numPacketsPerLID.extent(0) << endl;
5656 std::cerr << os.str();
5657 }
5658 const bool extraVerbose =
5659 verbose && Details::Behavior::verbose("CrsPadding");
5660
5661 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5662 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5663 const int myRank = [&] () {
5664 auto comm = rowMap_.is_null() ? Teuchos::null :
5665 rowMap_->getComm();
5666 return comm.is_null() ? -1 : comm->getRank();
5667 } ();
5668 std::unique_ptr<padding_type> padding(
5669 new padding_type(myRank, numImports));
5670
5671 if (imports.need_sync_host()) {
5672 imports.sync_host();
5673 }
5674 auto imports_h = imports.view_host();
5675 if (numPacketsPerLID.need_sync_host ()) {
5676 numPacketsPerLID.sync_host();
5677 }
5678 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5679
5680 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5681 auto importLIDs_h = importLIDs.view_host();
5682
5683 const map_type& tgtRowMap = *rowMap_;
5684 // Always merge source column indices, since isMerged() is
5685 // per-process state, and we don't know its value on other
5686 // processes that sent us data.
5687 constexpr bool src_is_unique = false;
5688 const bool tgt_is_unique = isMerged();
5689
5690 std::vector<GO> srcGblColIndsScratch;
5691 std::vector<GO> tgtGblColIndsScratch;
5692 size_t offset = 0;
5693 execute_sync_host_uvm_access(); // protect host UVM access
5694 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5695 // CrsGraph packs just global column indices, while CrsMatrix
5696 // packs bytes (first the number of entries in the row, then the
5697 // global column indices, then other stuff like the matrix
5698 // values in that row).
5699 const size_t numBytes = numPacketsPerLID_h[whichImport];
5700 if (extraVerbose) {
5701 std::ostringstream os;
5702 os << *prefix << "whichImport=" << whichImport
5703 << ", numImports=" << numImports
5704 << ", numBytes=" << numBytes << endl;
5705 std::cerr << os.str();
5706 }
5707 if (numBytes == 0) {
5708 continue; // special case: no entries to unpack for this row
5709 }
5710 LO origSrcNumEnt = 0;
5711 const size_t numEntBeg = offset;
5712 const size_t numEntLen =
5713 PackTraits<LO>::packValueCount(origSrcNumEnt);
5714 TEUCHOS_ASSERT( numBytes >= numEntLen );
5715 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5716 PackTraits<LO>::unpackValue(origSrcNumEnt,
5717 imports_h.data() + numEntBeg);
5718 if (extraVerbose) {
5719 std::ostringstream os;
5720 os << *prefix << "whichImport=" << whichImport
5721 << ", numImports=" << numImports
5722 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5723 std::cerr << os.str();
5724 }
5725 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5726 TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5727 const size_t gidsBeg = numEntBeg + numEntLen;
5728 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5729 srcGblColIndsScratch.resize(origSrcNumEnt);
5730 }
5731 GO* const srcGblColInds = srcGblColIndsScratch.data();
5732 PackTraits<GO>::unpackArray(srcGblColInds,
5733 imports_h.data() + gidsBeg,
5734 origSrcNumEnt);
5735 const LO tgtLclRowInd = importLIDs_h[whichImport];
5736 const GO tgtGblRowInd =
5737 tgtRowMap.getGlobalElement(tgtLclRowInd);
5738 auto tgtGblColInds = getRowGraphGlobalRow(
5739 tgtGblColIndsScratch, *this, tgtGblRowInd);
5740 const size_t origNumTgtEnt(tgtGblColInds.size());
5741
5742 if (extraVerbose) {
5743 std::ostringstream os;
5744 os << *prefix << "whichImport=" << whichImport
5745 << ", numImports=" << numImports
5746 << ": Call padding->update_import" << endl;
5747 std::cerr << os.str();
5748 }
5749 padding->update_import(whichImport, tgtLclRowInd,
5750 tgtGblColInds.getRawPtr(),
5751 origNumTgtEnt, tgt_is_unique,
5752 srcGblColInds,
5753 origSrcNumEnt, src_is_unique);
5754 offset += numBytes;
5755 }
5756
5757 if (verbose) {
5758 std::ostringstream os;
5759 os << *prefix << "Done" << endl;
5760 std::cerr << os.str();
5761 }
5762 return padding;
5763 }
5764
5765 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5766 void
5767 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5768 packAndPrepare
5769 (const SrcDistObject& source,
5770 const Kokkos::DualView<const local_ordinal_type*,
5771 buffer_device_type>& exportLIDs,
5772 Kokkos::DualView<packet_type*,
5773 buffer_device_type>& exports,
5774 Kokkos::DualView<size_t*,
5775 buffer_device_type> numPacketsPerLID,
5776 size_t& constantNumPackets)
5777 {
5779 using GO = global_ordinal_type;
5780 using std::endl;
5781 using crs_graph_type =
5782 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5783 const char tfecfFuncName[] = "packAndPrepare: ";
5784 ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5785
5786 const bool verbose = verbose_;
5787 std::unique_ptr<std::string> prefix;
5788 if (verbose) {
5789 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5790 std::ostringstream os;
5791 os << *prefix << "Start" << endl;
5792 std::cerr << os.str();
5793 }
5794
5795 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5796 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5797 std::runtime_error,
5798 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5799 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5800 << ".");
5801 const row_graph_type* srcRowGraphPtr =
5802 dynamic_cast<const row_graph_type*> (&source);
5803 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5804 (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5805 "or Import operation to a CrsGraph must be a RowGraph with the same "
5806 "template parameters.");
5807 // We don't check whether src_graph has had fillComplete called,
5808 // because it doesn't matter whether the *source* graph has been
5809 // fillComplete'd. The target graph can not be fillComplete'd yet.
5810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5811 (this->isFillComplete (), std::runtime_error,
5812 "The target graph of an Import or Export must not be fill complete.");
5813
5814 const crs_graph_type* srcCrsGraphPtr =
5815 dynamic_cast<const crs_graph_type*> (&source);
5816
5817 if (srcCrsGraphPtr == nullptr) {
5818 using Teuchos::ArrayView;
5819 using LO = local_ordinal_type;
5820
5821 if (verbose) {
5822 std::ostringstream os;
5823 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5824 << endl;
5825 std::cerr << os.str();
5826 }
5827 // RowGraph::pack serves the "old" DistObject interface. It
5828 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5829 // entails deep-copying the exports buffer on output. RowGraph
5830 // is a convenience interface when not a CrsGraph, so we accept
5831 // the performance hit.
5832 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5833 auto exportLIDs_h = exportLIDs.view_host ();
5834 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5835 exportLIDs_h.extent (0));
5836 Teuchos::Array<GO> exports_a;
5837
5838 numPacketsPerLID.clear_sync_state ();
5839 numPacketsPerLID.modify_host ();
5840 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5841 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5842 numPacketsPerLID_h.extent (0));
5843 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5844 constantNumPackets);
5845 const size_t newSize = static_cast<size_t> (exports_a.size ());
5846 if (static_cast<size_t> (exports.extent (0)) != newSize) {
5847 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5848 exports = exports_dv_type ("exports", newSize);
5849 }
5850 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5851 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5852 exports.clear_sync_state ();
5853 exports.modify_host ();
5854 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5855 }
5856 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5857 else if (! getColMap ().is_null () &&
5858 (rowPtrsPacked_dev_.extent (0) != 0 ||
5859 getRowMap ()->getNodeNumElements () == 0)) {
5860 if (verbose) {
5861 std::ostringstream os;
5862 os << *prefix << "packCrsGraphNew path" << endl;
5863 std::cerr << os.str();
5864 }
5865 using export_pids_type =
5866 Kokkos::DualView<const int*, buffer_device_type>;
5867 export_pids_type exportPIDs; // not filling it; needed for syntax
5868 using LO = local_ordinal_type;
5869 using NT = node_type;
5871 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5872 exports, numPacketsPerLID,
5873 constantNumPackets, false);
5874 }
5875 else {
5876 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5877 constantNumPackets);
5878 }
5879
5880 if (verbose) {
5881 std::ostringstream os;
5882 os << *prefix << "Done" << endl;
5883 std::cerr << os.str();
5884 }
5885 }
5886
5887 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5888 void
5890 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5891 Teuchos::Array<GlobalOrdinal>& exports,
5892 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5893 size_t& constantNumPackets) const
5894 {
5895 auto col_map = this->getColMap();
5896 // packCrsGraph requires k_rowPtrsPacked to be set
5897 if( !col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
5899 packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5900 exportLIDs, constantNumPackets);
5901 }
5902 else {
5903 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5904 constantNumPackets);
5905 }
5906 }
5907
5908 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5909 void
5911 packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5912 Teuchos::Array<GlobalOrdinal>& exports,
5913 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5914 size_t& constantNumPackets) const
5915 {
5916 using std::endl;
5917 using LO = LocalOrdinal;
5918 using GO = GlobalOrdinal;
5919 using host_execution_space =
5920 typename Kokkos::View<size_t*, device_type>::
5921 HostMirror::execution_space;
5922 const char tfecfFuncName[] = "packFillActive: ";
5923 const bool verbose = verbose_;
5924
5925 const auto numExportLIDs = exportLIDs.size ();
5926 std::unique_ptr<std::string> prefix;
5927 if (verbose) {
5928 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5929 std::ostringstream os;
5930 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5931 std::cerr << os.str();
5932 }
5933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5934 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5935 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5936 " = " << numPacketsPerLID.size () << ".");
5937
5938 const map_type& rowMap = * (this->getRowMap ());
5939 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5941 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5942 "This graph claims to be locally indexed, but its column Map is nullptr. "
5943 "This should never happen. Please report this bug to the Tpetra "
5944 "developers.");
5945
5946 // We may pack different amounts of data for different rows.
5947 constantNumPackets = 0;
5948
5949 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5950 // it might be now, but we might as well be safe).
5951 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5952 const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5953
5954 // Count the total number of packets (column indices, in the case
5955 // of a CrsGraph) to pack. While doing so, set
5956 // numPacketsPerLID[i] to the number of entries owned by the
5957 // calling process in (local) row exportLIDs[i] of the graph, that
5958 // the caller wants us to send out.
5959 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5960 size_t totalNumPackets = 0;
5961 size_t errCount = 0;
5962 // lambdas turn what they capture const, so we can't
5963 // atomic_add(&errCount,1). Instead, we need a View to modify.
5964 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5965 host_device_type;
5966 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5967 constexpr size_t ONE = 1;
5968
5969 execute_sync_host_uvm_access(); // protect host UVM access
5970 Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5971 inputRange,
5972 [=] (const LO& i, size_t& curTotalNumPackets) {
5973 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5974 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5975 Kokkos::atomic_add (&errCountView(), ONE);
5976 numPacketsPerLID_raw[i] = 0;
5977 }
5978 else {
5979 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5980 numPacketsPerLID_raw[i] = numEnt;
5981 curTotalNumPackets += numEnt;
5982 }
5983 },
5984 totalNumPackets);
5985
5986 if (verbose) {
5987 std::ostringstream os;
5988 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5989 std::cerr << os.str();
5990 }
5991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5992 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5993 "one or more errors! errCount = " << errCount
5994 << ", totalNumPackets = " << totalNumPackets << ".");
5995 errCount = 0;
5996
5997 // Allocate space for all the column indices to pack.
5998 exports.resize (totalNumPackets);
5999
6000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6001 (! this->supportsRowViews (), std::logic_error,
6002 "this->supportsRowViews() returns false; this should never happen. "
6003 "Please report this bug to the Tpetra developers.");
6004
6005 // Loop again over the rows to export, and pack rows of indices
6006 // into the output buffer.
6007
6008 if (verbose) {
6009 std::ostringstream os;
6010 os << *prefix << "Pack into exports" << endl;
6011 std::cerr << os.str();
6012 }
6013
6014 // Teuchos::ArrayView may not be thread safe, or may not be
6015 // efficiently thread safe. Better to use the raw pointer.
6016 GO* const exports_raw = exports.getRawPtr ();
6017 errCount = 0;
6018 Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
6019 inputRange, [=, &prefix]
6020 (const LO i, size_t& exportsOffset, const bool final) {
6021 const size_t curOffset = exportsOffset;
6022 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6023 const RowInfo rowInfo =
6024 this->getRowInfoFromGlobalRowIndex (gblRow);
6025
6026 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
6027 if (rowInfo.localRow == TDO::invalid ()) {
6028 if (verbose) {
6029 std::ostringstream os;
6030 os << *prefix << ": INVALID rowInfo: i=" << i
6031 << ", lclRow=" << exportLIDs_raw[i] << endl;
6032 std::cerr << os.str();
6033 }
6034 Kokkos::atomic_add (&errCountView(), ONE);
6035 }
6036 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6037 if (verbose) {
6038 std::ostringstream os;
6039 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
6040 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
6041 "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6042 << ") > totalNumPackets (= " << totalNumPackets << ")."
6043 << endl;
6044 std::cerr << os.str();
6045 }
6046 Kokkos::atomic_add (&errCountView(), ONE);
6047 }
6048 else {
6049 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6050 if (this->isLocallyIndexed ()) {
6051 auto lclColInds = getLocalIndsViewHost (rowInfo);
6052 if (final) {
6053 for (LO k = 0; k < numEnt; ++k) {
6054 const LO lclColInd = lclColInds(k);
6055 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6056 // Pack it, even if it's wrong. Let the receiving
6057 // process deal with it. Otherwise, we'll miss out
6058 // on any correct data.
6059 exports_raw[curOffset + k] = gblColInd;
6060 } // for each entry in the row
6061 } // final pass?
6062 exportsOffset = curOffset + numEnt;
6063 }
6064 else if (this->isGloballyIndexed ()) {
6065 auto gblColInds = getGlobalIndsViewHost (rowInfo);
6066 if (final) {
6067 for (LO k = 0; k < numEnt; ++k) {
6068 const GO gblColInd = gblColInds(k);
6069 // Pack it, even if it's wrong. Let the receiving
6070 // process deal with it. Otherwise, we'll miss out
6071 // on any correct data.
6072 exports_raw[curOffset + k] = gblColInd;
6073 } // for each entry in the row
6074 } // final pass?
6075 exportsOffset = curOffset + numEnt;
6076 }
6077 // If neither globally nor locally indexed, then the graph
6078 // has no entries in this row (or indeed, in any row on this
6079 // process) to pack.
6080 }
6081 });
6082
6083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6084 (errCount != 0, std::logic_error, "Packing encountered "
6085 "one or more errors! errCount = " << errCount
6086 << ", totalNumPackets = " << totalNumPackets << ".");
6087
6088 if (verbose) {
6089 std::ostringstream os;
6090 os << *prefix << "Done" << endl;
6091 std::cerr << os.str();
6092 }
6093 }
6094
6095 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6096 void
6097 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6098 packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
6099 buffer_device_type>& exportLIDs,
6100 Kokkos::DualView<packet_type*,
6101 buffer_device_type>& exports,
6102 Kokkos::DualView<size_t*,
6103 buffer_device_type> numPacketsPerLID,
6104 size_t& constantNumPackets) const
6105 {
6106 using std::endl;
6107 using LO = local_ordinal_type;
6108 using GO = global_ordinal_type;
6109 using host_execution_space = typename Kokkos::View<size_t*,
6110 device_type>::HostMirror::execution_space;
6111 using host_device_type =
6112 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6113 using exports_dv_type =
6114 Kokkos::DualView<packet_type*, buffer_device_type>;
6115 const char tfecfFuncName[] = "packFillActiveNew: ";
6116 const bool verbose = verbose_;
6117
6118 const auto numExportLIDs = exportLIDs.extent (0);
6119 std::unique_ptr<std::string> prefix;
6120 if (verbose) {
6121 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
6122 std::ostringstream os;
6123 os << *prefix << "numExportLIDs: " << numExportLIDs
6124 << ", numPacketsPerLID.extent(0): "
6125 << numPacketsPerLID.extent(0) << endl;
6126 std::cerr << os.str();
6127 }
6128 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6129 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6130 "exportLIDs.extent(0) = " << numExportLIDs
6131 << " != numPacketsPerLID.extent(0) = "
6132 << numPacketsPerLID.extent (0) << ".");
6133 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6134 auto exportLIDs_h = exportLIDs.view_host ();
6135
6136 const map_type& rowMap = * (this->getRowMap ());
6137 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6139 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6140 "This graph claims to be locally indexed, but its column Map is nullptr. "
6141 "This should never happen. Please report this bug to the Tpetra "
6142 "developers.");
6143
6144 // We may pack different amounts of data for different rows.
6145 constantNumPackets = 0;
6146
6147 numPacketsPerLID.clear_sync_state ();
6148 numPacketsPerLID.modify_host ();
6149 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6150
6151 // Count the total number of packets (column indices, in the case
6152 // of a CrsGraph) to pack. While doing so, set
6153 // numPacketsPerLID[i] to the number of entries owned by the
6154 // calling process in (local) row exportLIDs[i] of the graph, that
6155 // the caller wants us to send out.
6156 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6157 range_type inputRange (0, numExportLIDs);
6158 size_t totalNumPackets = 0;
6159 size_t errCount = 0;
6160 // lambdas turn what they capture const, so we can't
6161 // atomic_add(&errCount,1). Instead, we need a View to modify.
6162 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6163 constexpr size_t ONE = 1;
6164
6165 if (verbose) {
6166 std::ostringstream os;
6167 os << *prefix << "Compute totalNumPackets" << endl;
6168 std::cerr << os.str ();
6169 }
6170
6171 execute_sync_host_uvm_access(); // protect host UVM access
6172 Kokkos::parallel_reduce
6173 ("Tpetra::CrsGraph::pack: totalNumPackets",
6174 inputRange,
6175 [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
6176 const LO lclRow = exportLIDs_h[i];
6177 const GO gblRow = rowMap.getGlobalElement (lclRow);
6178 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6179 if (verbose) {
6180 std::ostringstream os;
6181 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6182 << " not in row Map on this process" << endl;
6183 std::cerr << os.str();
6184 }
6185 Kokkos::atomic_add (&errCountView(), ONE);
6186 numPacketsPerLID_h(i) = 0;
6187 }
6188 else {
6189 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6190 numPacketsPerLID_h(i) = numEnt;
6191 curTotalNumPackets += numEnt;
6192 }
6193 },
6194 totalNumPackets);
6195
6196 if (verbose) {
6197 std::ostringstream os;
6198 os << *prefix << "totalNumPackets: " << totalNumPackets
6199 << ", errCount: " << errCount << endl;
6200 std::cerr << os.str ();
6201 }
6202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6203 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6204 "one or more errors! totalNumPackets: " << totalNumPackets
6205 << ", errCount: " << errCount << ".");
6206
6207 // Allocate space for all the column indices to pack.
6208 if (size_t(exports.extent (0)) < totalNumPackets) {
6209 // FIXME (mfh 09 Apr 2019) Create without initializing.
6210 exports = exports_dv_type ("exports", totalNumPackets);
6211 }
6212
6213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6214 (! this->supportsRowViews (), std::logic_error,
6215 "this->supportsRowViews() returns false; this should never happen. "
6216 "Please report this bug to the Tpetra developers.");
6217
6218 // Loop again over the rows to export, and pack rows of indices
6219 // into the output buffer.
6220
6221 if (verbose) {
6222 std::ostringstream os;
6223 os << *prefix << "Pack into exports buffer" << endl;
6224 std::cerr << os.str();
6225 }
6226
6227 exports.clear_sync_state ();
6228 exports.modify_host ();
6229 auto exports_h = exports.view_host ();
6230
6231 errCount = 0;
6232 Kokkos::parallel_scan
6233 ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6234 inputRange, [=, &prefix]
6235 (const LO i, size_t& exportsOffset, const bool final) {
6236 const size_t curOffset = exportsOffset;
6237 const LO lclRow = exportLIDs_h(i);
6238 const GO gblRow = rowMap.getGlobalElement (lclRow);
6239 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6240 if (verbose) {
6241 std::ostringstream os;
6242 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6243 << " not in row Map on this process" << endl;
6244 std::cerr << os.str();
6245 }
6246 Kokkos::atomic_add (&errCountView(), ONE);
6247 return;
6248 }
6249
6250 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6251 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6252 if (verbose) {
6253 std::ostringstream os;
6254 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6255 << ", gblRow=" << gblRow << ": invalid rowInfo"
6256 << endl;
6257 std::cerr << os.str();
6258 }
6259 Kokkos::atomic_add (&errCountView(), ONE);
6260 return;
6261 }
6262
6263 if (curOffset + rowInfo.numEntries > totalNumPackets) {
6264 if (verbose) {
6265 std::ostringstream os;
6266 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6267 << ", gblRow=" << gblRow << ", curOffset (= "
6268 << curOffset << ") + numEnt (= " << rowInfo.numEntries
6269 << ") > totalNumPackets (= " << totalNumPackets
6270 << ")." << endl;
6271 std::cerr << os.str();
6272 }
6273 Kokkos::atomic_add (&errCountView(), ONE);
6274 return;
6275 }
6276
6277 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6278 if (this->isLocallyIndexed ()) {
6279 auto lclColInds = getLocalIndsViewHost(rowInfo);
6280 if (final) {
6281 for (LO k = 0; k < numEnt; ++k) {
6282 const LO lclColInd = lclColInds(k);
6283 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6284 // Pack it, even if it's wrong. Let the receiving
6285 // process deal with it. Otherwise, we'll miss out
6286 // on any correct data.
6287 exports_h(curOffset + k) = gblColInd;
6288 } // for each entry in the row
6289 } // final pass?
6290 exportsOffset = curOffset + numEnt;
6291 }
6292 else if (this->isGloballyIndexed ()) {
6293 auto gblColInds = getGlobalIndsViewHost(rowInfo);
6294 if (final) {
6295 for (LO k = 0; k < numEnt; ++k) {
6296 const GO gblColInd = gblColInds(k);
6297 // Pack it, even if it's wrong. Let the receiving
6298 // process deal with it. Otherwise, we'll miss out
6299 // on any correct data.
6300 exports_h(curOffset + k) = gblColInd;
6301 } // for each entry in the row
6302 } // final pass?
6303 exportsOffset = curOffset + numEnt;
6304 }
6305 // If neither globally nor locally indexed, then the graph
6306 // has no entries in this row (or indeed, in any row on this
6307 // process) to pack.
6308 });
6309
6310 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6311 // (errCount != 0, std::logic_error, "Packing encountered "
6312 // "one or more errors! errCount = " << errCount
6313 // << ", totalNumPackets = " << totalNumPackets << ".");
6314
6315 if (verbose) {
6316 std::ostringstream os;
6317 os << *prefix << "errCount=" << errCount << "; Done" << endl;
6318 std::cerr << os.str();
6319 }
6320 }
6321
6322 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6323 void
6324 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6325 unpackAndCombine
6326 (const Kokkos::DualView<const local_ordinal_type*,
6327 buffer_device_type>& importLIDs,
6328 Kokkos::DualView<packet_type*,
6329 buffer_device_type> imports,
6330 Kokkos::DualView<size_t*,
6331 buffer_device_type> numPacketsPerLID,
6332 const size_t /* constantNumPackets */,
6333 const CombineMode /* combineMode */ )
6334 {
6335 using Details::ProfilingRegion;
6336 using std::endl;
6337 using LO = local_ordinal_type;
6338 using GO = global_ordinal_type;
6339 const char tfecfFuncName[] = "unpackAndCombine";
6340
6341 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6342 const bool verbose = verbose_;
6343
6344 std::unique_ptr<std::string> prefix;
6345 if (verbose) {
6346 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6347 std::ostringstream os;
6348 os << *prefix << "Start" << endl;
6349 std::cerr << os.str ();
6350 }
6351 {
6352 auto padding = computeCrsPaddingForImports(
6353 importLIDs, imports, numPacketsPerLID, verbose);
6354 applyCrsPadding(*padding, verbose);
6355 if (verbose) {
6356 std::ostringstream os;
6357 os << *prefix << "Done computing & applying padding" << endl;
6358 std::cerr << os.str ();
6359 }
6360 }
6361
6362 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6363 // reasonable meaning, whether or not the matrix is fill complete.
6364 // It's just more work to implement.
6365
6366 // We are not checking the value of the CombineMode input
6367 // argument. For CrsGraph, we only support import/export
6368 // operations if fillComplete has not yet been called. Any
6369 // incoming column-indices are inserted into the target graph. In
6370 // this context, CombineMode values of ADD vs INSERT are
6371 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6372 // duplicate column-index is inserted, it will be compressed out
6373 // when fillComplete is called.
6374 //
6375 // Note: I think REPLACE means that an existing row is replaced by
6376 // the imported row, i.e., the existing indices are cleared. CGB,
6377 // 6/17/2010
6378
6379 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6380 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6381 std::runtime_error, ": importLIDs.extent(0) = "
6382 << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6383 << numPacketsPerLID.extent (0) << ".");
6384 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6385 (isFillComplete (), std::runtime_error,
6386 ": Import or Export operations are not allowed on a target "
6387 "CrsGraph that is fillComplete.");
6388
6389 const size_t numImportLIDs(importLIDs.extent(0));
6390 if (numPacketsPerLID.need_sync_host()) {
6391 numPacketsPerLID.sync_host();
6392 }
6393 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6394 if (imports.need_sync_host()) {
6395 imports.sync_host();
6396 }
6397 auto imports_h = imports.view_host();
6398 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6399 auto importLIDs_h = importLIDs.view_host();
6400
6401 // If we're inserting in local indices, let's pre-allocate
6402 Teuchos::Array<LO> lclColInds;
6403 if (isLocallyIndexed()) {
6404 if (verbose) {
6405 std::ostringstream os;
6406 os << *prefix << "Preallocate local indices scratch" << endl;
6407 std::cerr << os.str();
6408 }
6409 size_t maxNumInserts = 0;
6410 for (size_t i = 0; i < numImportLIDs; ++i) {
6411 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6412 }
6413 if (verbose) {
6414 std::ostringstream os;
6415 os << *prefix << "Local indices scratch size: "
6416 << maxNumInserts << endl;
6417 std::cerr << os.str();
6418 }
6419 lclColInds.resize (maxNumInserts);
6420 }
6421 else {
6422 if (verbose) {
6423 std::ostringstream os;
6424 os << *prefix;
6425 if (isGloballyIndexed()) {
6426 os << "Graph is globally indexed";
6427 }
6428 else {
6429 os << "Graph is neither locally nor globally indexed";
6430 }
6431 os << endl;
6432 std::cerr << os.str();
6433 }
6434 }
6435
6436 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6437 const map_type& rowMap = *rowMap_;
6438
6439 try {
6440 size_t importsOffset = 0;
6441 for (size_t i = 0; i < numImportLIDs; ++i) {
6442 if (verbose) {
6443 std::ostringstream os;
6444 os << *prefix << "i=" << i << ", numImportLIDs="
6445 << numImportLIDs << endl;
6446 std::cerr << os.str();
6447 }
6448 // We can only unpack into owned rows, since we only have
6449 // local row indices.
6450 const LO lclRow = importLIDs_h[i];
6451 const GO gblRow = rowMap.getGlobalElement(lclRow);
6452 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6453 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6454 std::logic_error, "importLIDs[i=" << i << "]="
6455 << lclRow << " is not in the row Map on the calling "
6456 "process.");
6457 const LO numEnt = numPacketsPerLID_h[i];
6458 const GO* const gblColInds = (numEnt == 0) ? nullptr :
6459 imports_h.data() + importsOffset;
6460 if (! isLocallyIndexed()) {
6461 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6462 }
6463 else {
6464 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6465 // column Map filtering.
6466 for (LO j = 0; j < numEnt; j++) {
6467 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6468 }
6469 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6470 }
6471 importsOffset += numEnt;
6472 }
6473 }
6474 catch (std::exception& e) {
6475 TEUCHOS_TEST_FOR_EXCEPTION
6476 (true, std::runtime_error,
6477 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6478 "exception: " << endl << e.what());
6479 }
6480
6481 if (verbose) {
6482 std::ostringstream os;
6483 os << *prefix << "Done" << endl;
6484 std::cerr << os.str();
6485 }
6486 }
6487
6488 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6489 void
6491 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6492 {
6493 using Teuchos::Comm;
6494 using Teuchos::null;
6495 using Teuchos::ParameterList;
6496 using Teuchos::RCP;
6497
6498 // We'll set all the state "transactionally," so that this method
6499 // satisfies the strong exception guarantee. This object's state
6500 // won't be modified until the end of this method.
6501 RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6502 RCP<import_type> importer;
6503 RCP<export_type> exporter;
6504
6505 rowMap = newMap;
6506 RCP<const Comm<int> > newComm =
6507 (newMap.is_null ()) ? null : newMap->getComm ();
6508
6509 if (! domainMap_.is_null ()) {
6510 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6511 // Common case: original domain and row Maps are identical.
6512 // In that case, we need only replace the original domain Map
6513 // with the new Map. This ensures that the new domain and row
6514 // Maps _stay_ identical.
6515 domainMap = newMap;
6516 } else {
6517 domainMap = domainMap_->replaceCommWithSubset (newComm);
6518 }
6519 }
6520 if (! rangeMap_.is_null ()) {
6521 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6522 // Common case: original range and row Maps are identical. In
6523 // that case, we need only replace the original range Map with
6524 // the new Map. This ensures that the new range and row Maps
6525 // _stay_ identical.
6526 rangeMap = newMap;
6527 } else {
6528 rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6529 }
6530 }
6531 if (! colMap_.is_null ()) {
6532 colMap = colMap_->replaceCommWithSubset (newComm);
6533 }
6534
6535 // (Re)create the Export and / or Import if necessary.
6536 if (! newComm.is_null ()) {
6537 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6538 //
6539 // The operations below are collective on the new communicator.
6540 //
6541 // (Re)create the Export object if necessary. If I haven't
6542 // called fillComplete yet, I don't have a rangeMap, so I must
6543 // first check if the _original_ rangeMap is not null. Ditto
6544 // for the Import object and the domain Map.
6545 if (! rangeMap_.is_null () &&
6546 rangeMap != rowMap &&
6547 ! rangeMap->isSameAs (*rowMap)) {
6548 if (params.is_null () || ! params->isSublist ("Export")) {
6549 exporter = rcp (new export_type (rowMap, rangeMap));
6550 }
6551 else {
6552 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6553 exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6554 }
6555 }
6556 // (Re)create the Import object if necessary.
6557 if (! domainMap_.is_null () &&
6558 domainMap != colMap &&
6559 ! domainMap->isSameAs (*colMap)) {
6560 if (params.is_null () || ! params->isSublist ("Import")) {
6561 importer = rcp (new import_type (domainMap, colMap));
6562 } else {
6563 RCP<ParameterList> importSublist = sublist (params, "Import", true);
6564 importer = rcp (new import_type (domainMap, colMap, importSublist));
6565 }
6566 }
6567 } // if newComm is not null
6568
6569 // Defer side effects until the end. If no destructors throw
6570 // exceptions (they shouldn't anyway), then this method satisfies
6571 // the strong exception guarantee.
6572 exporter_ = exporter;
6573 importer_ = importer;
6574 rowMap_ = rowMap;
6575 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6576 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6577 // the same object. We might want to get rid of this redundant
6578 // pointer sometime, but for now, we'll leave it alone and just
6579 // set map_ to the same object.
6580 this->map_ = rowMap;
6581 domainMap_ = domainMap;
6582 rangeMap_ = rangeMap;
6583 colMap_ = colMap;
6584 }
6585
6586 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6587 void
6589 getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6590 {
6591 using std::endl;
6592 using LO = LocalOrdinal;
6593 using GO = GlobalOrdinal;
6594 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6595 const bool verbose = verbose_;
6596
6597 std::unique_ptr<std::string> prefix;
6598 if (verbose) {
6599 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6600 std::ostringstream os;
6601 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6602 << endl;
6603 std::cerr << os.str();
6604 }
6605
6606 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6607 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6608 const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6609 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6610 (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6611 std::invalid_argument, "offsets.extent(0) = " <<
6612 offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6613
6614 const map_type& rowMap = * (this->getRowMap ());
6615 const map_type& colMap = * (this->getColMap ());
6616
6617 // We only use these in debug mode, but since debug mode is a
6618 // run-time option, they need to exist here. That's why we create
6619 // the vector with explicit size zero, to avoid overhead if debug
6620 // mode is off.
6621 bool allRowMapDiagEntriesInColMap = true;
6622 bool allDiagEntriesFound = true;
6623 bool allOffsetsCorrect = true;
6624 bool noOtherWeirdness = true;
6625 using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6626 wrong_offsets_type wrongOffsets(0);
6627
6628 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6629 // the subset of Map functionality that we need below.
6630 auto lclRowMap = rowMap.getLocalMap ();
6631 auto lclColMap = colMap.getLocalMap ();
6632
6633 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6634 // setup, at least on the host. For CUDA, we have to use LocalMap
6635 // (that comes from each of the two Maps).
6636
6637 const bool sorted = this->isSorted ();
6638 if (isFillComplete ()) {
6639 auto lclGraph = this->getLocalGraphDevice ();
6640 ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6641 lclGraph.row_map,
6642 lclGraph.entries, sorted);
6643 }
6644 else {
6645 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6646 // since the graph is not fill complete. The previous version
6647 // of this code assumed UVM; this version does not.
6648 auto offsets_h = Kokkos::create_mirror_view (offsets);
6649
6650 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6651 // Find the diagonal entry. Since the row Map and column Map
6652 // may differ, we have to compare global row and column
6653 // indices, not local.
6654 const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6655 const GO gblColInd = gblRowInd;
6656 const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6657
6658 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6659 allRowMapDiagEntriesInColMap = false;
6660 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6661 }
6662 else {
6663 const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6664 if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6665 rowInfo.numEntries > 0) {
6666
6667 auto colInds = this->getLocalIndsViewHost (rowInfo);
6668 const size_t hint = 0; // not needed for this algorithm
6669 const size_t offset =
6670 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6671 lclColInd, hint, sorted);
6672 offsets_h(lclRowInd) = offset;
6673
6674 if (debug_) {
6675 // Now that we have what we think is an offset, make sure
6676 // that it really does point to the diagonal entry. Offsets
6677 // are _relative_ to each row, not absolute (for the whole
6678 // (local) graph).
6679 typename local_inds_dualv_type::t_host::const_type lclColInds;
6680 try {
6681 lclColInds = this->getLocalIndsViewHost (rowInfo);
6682 }
6683 catch (...) {
6684 noOtherWeirdness = false;
6685 }
6686 // Don't continue with error checking if the above failed.
6687 if (noOtherWeirdness) {
6688 const size_t numEnt = lclColInds.extent (0);
6689 if (offset >= numEnt) {
6690 // Offsets are relative to each row, so this means that
6691 // the offset is out of bounds.
6692 allOffsetsCorrect = false;
6693 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6694 } else {
6695 const LO actualLclColInd = lclColInds(offset);
6696 const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6697 if (actualGblColInd != gblColInd) {
6698 allOffsetsCorrect = false;
6699 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6700 }
6701 }
6702 }
6703 } // debug_
6704 }
6705 else { // either row is empty, or something went wrong w/ getRowInfo()
6706 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6707 allDiagEntriesFound = false;
6708 }
6709 } // whether lclColInd is a valid local column index
6710 } // for each local row
6711
6712 Kokkos::deep_copy (offsets, offsets_h);
6713 } // whether the graph is fill complete
6714
6715 if (verbose && wrongOffsets.size () != 0) {
6716 std::ostringstream os;
6717 os << *prefix << "Wrong offsets: [";
6718 for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6719 os << "(" << wrongOffsets[k].first << ","
6720 << wrongOffsets[k].second << ")";
6721 if (k + 1 < wrongOffsets.size ()) {
6722 os << ", ";
6723 }
6724 }
6725 os << "]" << endl;
6726 std::cerr << os.str();
6727 }
6728
6729 if (debug_) {
6730 using Teuchos::reduceAll;
6731 using std::endl;
6732 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6733 const bool localSuccess =
6734 allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6735 const int numResults = 5;
6736 int lclResults[5];
6737 lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6738 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6739 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6740 lclResults[3] = noOtherWeirdness ? 1 : 0;
6741 // min-all-reduce will compute least rank of all the processes
6742 // that didn't succeed.
6743 lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6744
6745 int gblResults[5];
6746 gblResults[0] = 0;
6747 gblResults[1] = 0;
6748 gblResults[2] = 0;
6749 gblResults[3] = 0;
6750 gblResults[4] = 0;
6751 reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6752 numResults, lclResults, gblResults);
6753
6754 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6755 || gblResults[3] != 1) {
6756 std::ostringstream os; // build error message
6757 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6758 "possibly among others): " << endl;
6759 if (gblResults[0] == 0) {
6760 os << " - The column Map does not contain at least one diagonal entry "
6761 "of the graph." << endl;
6762 }
6763 if (gblResults[1] == 0) {
6764 os << " - On one or more processes, some row does not contain a "
6765 "diagonal entry." << endl;
6766 }
6767 if (gblResults[2] == 0) {
6768 os << " - On one or more processes, some offsets are incorrect."
6769 << endl;
6770 }
6771 if (gblResults[3] == 0) {
6772 os << " - One or more processes had some other error."
6773 << endl;
6774 }
6775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6776 }
6777 } // debug_
6778 }
6779
6780 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6781 void
6783 getLocalOffRankOffsets (offset_device_view_type& offsets) const
6784 {
6785 using std::endl;
6786 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6787 const bool verbose = verbose_;
6788
6789 std::unique_ptr<std::string> prefix;
6790 if (verbose) {
6791 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6792 std::ostringstream os;
6793 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6794 << endl;
6795 std::cerr << os.str();
6796 }
6797
6798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6799 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6800 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6801
6802 const size_t lclNumRows = this->getNodeNumRows ();
6803
6804 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6805 offsets = k_offRankOffsets_;
6806 return;
6807 }
6808 haveLocalOffRankOffsets_ = false;
6809 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6810 offsets = k_offRankOffsets_;
6811
6812 const map_type& colMap = * (this->getColMap ());
6813 const map_type& domMap = * (this->getDomainMap ());
6814
6815 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6816 // the subset of Map functionality that we need below.
6817 auto lclColMap = colMap.getLocalMap ();
6818 auto lclDomMap = domMap.getLocalMap ();
6819
6820 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6821 // setup, at least on the host. For CUDA, we have to use LocalMap
6822 // (that comes from each of the two Maps).
6823
6824 TEUCHOS_ASSERT(this->isSorted ());
6825 if (isFillComplete ()) {
6826 auto lclGraph = this->getLocalGraphDevice ();
6827 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6828 lclColMap, lclDomMap,
6829 lclGraph);
6830 haveLocalOffRankOffsets_ = true;
6831 }
6832 }
6833
6834 namespace { // (anonymous)
6835
6836 // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6837 // below). The point is to avoid the deep copy between the input
6838 // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6839 // can't use UVM to avoid the deep copy with CUDA, because the
6840 // ArrayRCP is a host pointer, while the input to the graph's
6841 // getLocalDiagOffsets method is a device pointer. Assigning a
6842 // host pointer to a device pointer is incorrect unless the host
6843 // pointer points to host pinned memory. The goal is to get rid
6844 // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6845 // copy for backwards compatibility.
6846 //
6847 // We have to use template magic because
6848 // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6849 // if device_type::memory_space is not Kokkos::HostSpace (as is
6850 // the case with CUDA).
6851
6852 template<class DeviceType,
6853 const bool memSpaceIsHostSpace =
6854 std::is_same<typename DeviceType::memory_space,
6855 Kokkos::HostSpace>::value>
6856 struct HelpGetLocalDiagOffsets {};
6857
6858 template<class DeviceType>
6859 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6860 typedef DeviceType device_type;
6861 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6862 Kokkos::MemoryUnmanaged> device_offsets_type;
6863 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6864 Kokkos::MemoryUnmanaged> host_offsets_type;
6865
6866 static device_offsets_type
6867 getDeviceOffsets (const host_offsets_type& hostOffsets)
6868 {
6869 // Host and device are the same; no need to allocate a
6870 // temporary device View.
6871 return hostOffsets;
6872 }
6873
6874 static void
6875 copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6876 const device_offsets_type& /* deviceOffsets */)
6877 { /* copy back not needed; host and device are the same */ }
6878 };
6879
6880 template<class DeviceType>
6881 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6882 typedef DeviceType device_type;
6883 // We have to do a deep copy, since host memory space != device
6884 // memory space. Thus, the device View is managed (we need to
6885 // allocate a temporary device View).
6886 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6887 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6888 Kokkos::MemoryUnmanaged> host_offsets_type;
6889
6890 static device_offsets_type
6891 getDeviceOffsets (const host_offsets_type& hostOffsets)
6892 {
6893 // Host memory space != device memory space, so we must
6894 // allocate a temporary device View for the graph.
6895 return device_offsets_type ("offsets", hostOffsets.extent (0));
6896 }
6897
6898 static void
6899 copyBackIfNeeded (const host_offsets_type& hostOffsets,
6900 const device_offsets_type& deviceOffsets)
6901 {
6902 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6903 }
6904 };
6905 } // namespace (anonymous)
6906
6907
6908 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6909 void
6911 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6912 {
6913 typedef LocalOrdinal LO;
6914 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6915 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6916 (! this->hasColMap (), std::runtime_error,
6917 "The graph does not yet have a column Map.");
6918 const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
6919 if (static_cast<LO> (offsets.size ()) != myNumRows) {
6920 // NOTE (mfh 21 Jan 2016) This means that the method does not
6921 // satisfy the strong exception guarantee (no side effects
6922 // unless successful).
6923 offsets.resize (myNumRows);
6924 }
6925
6926 // mfh 21 Jan 2016: This method unfortunately takes a
6927 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6928 // device pointer. We can't access host memory from the device;
6929 // that's the wrong direction for UVM. (It's the right direction
6930 // for inefficient host pinned memory, but we don't want to use
6931 // that here.) Thus, if device memory space != host memory space,
6932 // we allocate and use a temporary device View to get the offsets.
6933 // If the two spaces are equal, the template magic makes the deep
6934 // copy go away.
6935 typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6936 typedef typename helper_type::host_offsets_type host_offsets_type;
6937 // Unmanaged host View that views the output array.
6938 host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6939 // Allocate temp device View if host != device, else reuse host array.
6940 auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6941 // NOT recursion; this calls the overload that takes a device View.
6942 this->getLocalDiagOffsets (deviceOffsets);
6943 helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6944 }
6945
6946 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6947 bool
6949 supportsRowViews () const {
6950 return true;
6951 }
6952
6953 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6954 void
6957 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6958 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6959 const Teuchos::RCP<const map_type>& domainMap,
6960 const Teuchos::RCP<const map_type>& rangeMap,
6961 const Teuchos::RCP<Teuchos::ParameterList>& params) const
6962 {
6967 using Teuchos::ArrayRCP;
6968 using Teuchos::ArrayView;
6969 using Teuchos::Comm;
6970 using Teuchos::ParameterList;
6971 using Teuchos::rcp;
6972 using Teuchos::RCP;
6973#ifdef HAVE_TPETRA_MMM_TIMINGS
6974 using std::string;
6975 using Teuchos::TimeMonitor;
6976#endif
6977
6978 using LO = LocalOrdinal;
6979 using GO = GlobalOrdinal;
6980 using NT = node_type;
6981 using this_type = CrsGraph<LO, GO, NT>;
6982 using ivector_type = Vector<int, LO, GO, NT>;
6983
6984 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6985
6986#ifdef HAVE_TPETRA_MMM_TIMINGS
6987 string label;
6988 if(!params.is_null()) label = params->get("Timer Label", label);
6989 string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6990 RCP<TimeMonitor> MM =
6991 rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6992#endif
6993
6994 // Make sure that the input argument rowTransfer is either an
6995 // Import or an Export. Import and Export are the only two
6996 // subclasses of Transfer that we defined, but users might
6997 // (unwisely, for now at least) decide to implement their own
6998 // subclasses. Exclude this possibility.
6999 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
7000 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
7001 TEUCHOS_TEST_FOR_EXCEPTION(
7002 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
7003 prefix << "The 'rowTransfer' input argument must be either an Import or "
7004 "an Export, and its template parameters must match the corresponding "
7005 "template parameters of the CrsGraph.");
7006
7007 // Make sure that the input argument domainTransfer is either an
7008 // Import or an Export. Import and Export are the only two
7009 // subclasses of Transfer that we defined, but users might
7010 // (unwisely, for now at least) decide to implement their own
7011 // subclasses. Exclude this possibility.
7012 Teuchos::RCP<const import_type> xferDomainAsImport =
7013 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7014 Teuchos::RCP<const export_type> xferDomainAsExport =
7015 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7016
7017 if(! domainTransfer.is_null()) {
7018
7019 TEUCHOS_TEST_FOR_EXCEPTION(
7020 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7021 prefix << "The 'domainTransfer' input argument must be either an "
7022 "Import or an Export, and its template parameters must match the "
7023 "corresponding template parameters of the CrsGraph.");
7024
7025 TEUCHOS_TEST_FOR_EXCEPTION(
7026 ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
7027 (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
7028 ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7029 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7030 "must be of the same type (either Import or Export).");
7031
7032 TEUCHOS_TEST_FOR_EXCEPTION(
7033 ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
7034 (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
7035 ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7036 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7037 "must be of the same type (either Import or Export).");
7038
7039 } // domainTransfer != null
7040
7041
7042 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
7043 // if the source Map is not distributed but the target Map is?
7044 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7045
7046 //
7047 // Get the caller's parameters
7048 //
7049
7050 bool reverseMode = false; // Are we in reverse mode?
7051 bool restrictComm = false; // Do we need to restrict the communicator?
7052 RCP<ParameterList> graphparams; // parameters for the destination graph
7053 if (! params.is_null()) {
7054 reverseMode = params->get("Reverse Mode", reverseMode);
7055 restrictComm = params->get("Restrict Communicator", restrictComm);
7056 graphparams = sublist(params, "CrsGraph");
7057 }
7058
7059 // Get the new domain and range Maps. We need some of them for error
7060 // checking, now that we have the reverseMode parameter.
7061 RCP<const map_type> MyRowMap = reverseMode ?
7062 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7063 RCP<const map_type> MyColMap; // create this below
7064 RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
7065 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7066 RCP<const map_type> BaseRowMap = MyRowMap;
7067 RCP<const map_type> BaseDomainMap = MyDomainMap;
7068
7069 // If the user gave us a nonnull destGraph, then check whether it's
7070 // "pristine." That means that it has no entries.
7071 //
7072 // FIXME (mfh 15 May 2014) If this is not true on all processes,
7073 // then this exception test may hang. It would be better to
7074 // forward an error flag to the next communication phase.
7075 if (! destGraph.is_null()) {
7076 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
7077 // whether a graph or matrix has no entries on the calling
7078 // process, is that it is neither locally nor globally indexed.
7079 // This may change eventually with the Kokkos refactor version
7080 // of Tpetra, so it would be better just to check the quantity
7081 // of interest directly. Note that with the Kokkos refactor
7082 // version of Tpetra, asking for the total number of entries in
7083 // a graph or matrix that is not fill complete might require
7084 // computation (kernel launch), since it is not thread scalable
7085 // to update a count every time an entry is inserted.
7086 const bool NewFlag =
7087 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7088 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7089 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
7090 "if its graph is empty (neither locally nor globally indexed).");
7091
7092 // FIXME (mfh 15 May 2014) At some point, we want to change
7093 // graphs and matrices so that their DistObject Map
7094 // (this->getMap()) may differ from their row Map. This will
7095 // make redistribution for 2-D distributions more efficient. I
7096 // hesitate to change this check, because I'm not sure how much
7097 // the code here depends on getMap() and getRowMap() being the
7098 // same.
7099 TEUCHOS_TEST_FOR_EXCEPTION(
7100 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7101 prefix << "The (row) Map of the input argument 'destGraph' is not the "
7102 "same as the (row) Map specified by the input argument 'rowTransfer'.");
7103
7104 TEUCHOS_TEST_FOR_EXCEPTION(
7105 ! destGraph->checkSizes(*this), std::invalid_argument,
7106 prefix << "You provided a nonnull destination graph, but checkSizes() "
7107 "indicates that it is not a legal legal target for redistribution from "
7108 "the source graph (*this). This may mean that they do not have the "
7109 "same dimensions.");
7110 }
7111
7112 // If forward mode (the default), then *this's (row) Map must be
7113 // the same as the source Map of the Transfer. If reverse mode,
7114 // then *this's (row) Map must be the same as the target Map of
7115 // the Transfer.
7116 //
7117 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7118 // and matrices so that their DistObject Map (this->getMap()) may
7119 // differ from their row Map. This will make redistribution for
7120 // 2-D distributions more efficient. I hesitate to change this
7121 // check, because I'm not sure how much the code here depends on
7122 // getMap() and getRowMap() being the same.
7123 TEUCHOS_TEST_FOR_EXCEPTION(
7124 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7125 std::invalid_argument, prefix <<
7126 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7127
7128 TEUCHOS_TEST_FOR_EXCEPTION(
7129 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7130 std::invalid_argument, prefix <<
7131 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7132
7133 // checks for domainTransfer
7134 TEUCHOS_TEST_FOR_EXCEPTION(
7135 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7136 std::invalid_argument,
7137 prefix << "The target map of the 'domainTransfer' input argument must be "
7138 "the same as the rebalanced domain map 'domainMap'");
7139
7140 TEUCHOS_TEST_FOR_EXCEPTION(
7141 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7142 std::invalid_argument,
7143 prefix << "The source map of the 'domainTransfer' input argument must be "
7144 "the same as the rebalanced domain map 'domainMap'");
7145
7146 // The basic algorithm here is:
7147 //
7148 // 1. Call the moral equivalent of "Distor.do" to handle the import.
7149 // 2. Copy all the Imported and Copy/Permuted data into the raw
7150 // CrsGraph pointers, still using GIDs.
7151 // 3. Call an optimized version of MakeColMap that avoids the
7152 // Directory lookups (since the importer knows who owns all the
7153 // GIDs) AND reindexes to LIDs.
7154 // 4. Call expertStaticFillComplete()
7155
7156 // Get information from the Importer
7157 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7158 ArrayView<const LO> ExportLIDs = reverseMode ?
7159 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7160 ArrayView<const LO> RemoteLIDs = reverseMode ?
7161 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7162 ArrayView<const LO> PermuteToLIDs = reverseMode ?
7163 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7164 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7165 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7166 Distributor& Distor = rowTransfer.getDistributor();
7167
7168 // Owning PIDs
7169 Teuchos::Array<int> SourcePids;
7170 Teuchos::Array<int> TargetPids;
7171 int MyPID = getComm()->getRank();
7172
7173 // Temp variables for sub-communicators
7174 RCP<const map_type> ReducedRowMap, ReducedColMap,
7175 ReducedDomainMap, ReducedRangeMap;
7176 RCP<const Comm<int> > ReducedComm;
7177
7178 // If the user gave us a null destGraph, then construct the new
7179 // destination graph. We will replace its column Map later.
7180 if (destGraph.is_null()) {
7181 destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7182 }
7183
7184 /***************************************************/
7185 /***** 1) First communicator restriction phase ****/
7186 /***************************************************/
7187 if (restrictComm) {
7188 ReducedRowMap = MyRowMap->removeEmptyProcesses();
7189 ReducedComm = ReducedRowMap.is_null() ?
7190 Teuchos::null :
7191 ReducedRowMap->getComm();
7192 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7193
7194 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7195 ReducedRowMap :
7196 MyDomainMap->replaceCommWithSubset(ReducedComm);
7197 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7198 ReducedRowMap :
7199 MyRangeMap->replaceCommWithSubset(ReducedComm);
7200
7201 // Reset the "my" maps
7202 MyRowMap = ReducedRowMap;
7203 MyDomainMap = ReducedDomainMap;
7204 MyRangeMap = ReducedRangeMap;
7205
7206 // Update my PID, if we've restricted the communicator
7207 if (! ReducedComm.is_null()) {
7208 MyPID = ReducedComm->getRank();
7209 }
7210 else {
7211 MyPID = -2; // For debugging
7212 }
7213 }
7214 else {
7215 ReducedComm = MyRowMap->getComm();
7216 }
7217
7218 /***************************************************/
7219 /***** 2) From Tpera::DistObject::doTransfer() ****/
7220 /***************************************************/
7221#ifdef HAVE_TPETRA_MMM_TIMINGS
7222 MM = Teuchos::null;
7223 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7224#endif
7225 // Get the owning PIDs
7226 RCP<const import_type> MyImporter = getImporter();
7227
7228 // check whether domain maps of source graph and base domain map is the same
7229 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7230
7231 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7232 // Same domain map as source graph
7233 //
7234 // NOTE: This won't work for restrictComm (because the Import
7235 // doesn't know the restricted PIDs), though writing an
7236 // optimized version for that case would be easy (Import an
7237 // IntVector of the new PIDs). Might want to add this later.
7238 Import_Util::getPids(*MyImporter, SourcePids, false);
7239 }
7240 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7241 // Same domain map as source graph (restricted communicator)
7242 // We need one import from the domain to the column map
7243 ivector_type SourceDomain_pids(getDomainMap(),true);
7244 ivector_type SourceCol_pids(getColMap());
7245 // SourceDomain_pids contains the restricted pids
7246 SourceDomain_pids.putScalar(MyPID);
7247
7248 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7249 SourcePids.resize(getColMap()->getNodeNumElements());
7250 SourceCol_pids.get1dCopy(SourcePids());
7251 }
7252 else if (MyImporter.is_null() && bSameDomainMap) {
7253 // Graph has no off-process entries
7254 SourcePids.resize(getColMap()->getNodeNumElements());
7255 SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7256 }
7257 else if ( ! MyImporter.is_null() &&
7258 ! domainTransfer.is_null() ) {
7259 // general implementation for rectangular matrices with
7260 // domain map different than SourceGraph domain map.
7261 // User has to provide a DomainTransfer object. We need
7262 // to communications (import/export)
7263
7264 // TargetDomain_pids lives on the rebalanced new domain map
7265 ivector_type TargetDomain_pids(domainMap);
7266 TargetDomain_pids.putScalar(MyPID);
7267
7268 // SourceDomain_pids lives on the non-rebalanced old domain map
7269 ivector_type SourceDomain_pids(getDomainMap());
7270
7271 // SourceCol_pids lives on the non-rebalanced old column map
7272 ivector_type SourceCol_pids(getColMap());
7273
7274 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7275 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7276 }
7277 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7278 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7279 }
7280 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7281 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7282 }
7283 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7284 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7285 }
7286 else {
7287 TEUCHOS_TEST_FOR_EXCEPTION(
7288 true, std::logic_error,
7289 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7290 }
7291 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7292 SourcePids.resize(getColMap()->getNodeNumElements());
7293 SourceCol_pids.get1dCopy(SourcePids());
7294 }
7295 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7296 getDomainMap()->isSameAs(*getRowMap())) {
7297 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7298 ivector_type TargetRow_pids(domainMap);
7299 ivector_type SourceRow_pids(getRowMap());
7300 ivector_type SourceCol_pids(getColMap());
7301
7302 TargetRow_pids.putScalar(MyPID);
7303 if (! reverseMode && xferAsImport != nullptr) {
7304 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7305 }
7306 else if (reverseMode && xferAsExport != nullptr) {
7307 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7308 }
7309 else if (! reverseMode && xferAsExport != nullptr) {
7310 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7311 }
7312 else if (reverseMode && xferAsImport != nullptr) {
7313 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7314 }
7315 else {
7316 TEUCHOS_TEST_FOR_EXCEPTION(
7317 true, std::logic_error,
7318 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7319 }
7320 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7321 SourcePids.resize(getColMap()->getNodeNumElements());
7322 SourceCol_pids.get1dCopy(SourcePids());
7323 }
7324 else {
7325 TEUCHOS_TEST_FOR_EXCEPTION(
7326 true, std::invalid_argument,
7327 prefix << "This method only allows either domainMap == getDomainMap(), "
7328 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7329 }
7330
7331 // Tpetra-specific stuff
7332 size_t constantNumPackets = destGraph->constantNumberOfPackets();
7333 if (constantNumPackets == 0) {
7334 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7335 RemoteLIDs.size());
7336 }
7337 else {
7338 // There are a constant number of packets per element. We
7339 // already know (from the number of "remote" (incoming)
7340 // elements) how many incoming elements we expect, so we can
7341 // resize the buffer accordingly.
7342 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7343 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7344 }
7345
7346 {
7347 // packAndPrepare* methods modify numExportPacketsPerLID_.
7348 destGraph->numExportPacketsPerLID_.modify_host();
7349 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7350 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7351
7352 // Pack & Prepare w/ owning PIDs
7353 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7354 numExportPacketsPerLID, ExportLIDs,
7355 SourcePids, constantNumPackets);
7356 }
7357
7358 // Do the exchange of remote data.
7359#ifdef HAVE_TPETRA_MMM_TIMINGS
7360 MM = Teuchos::null;
7361 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7362#endif
7363
7364 if (communication_needed) {
7365 if (reverseMode) {
7366 if (constantNumPackets == 0) { // variable number of packets per LID
7367 // Make sure that host has the latest version, since we're
7368 // using the version on host. If host has the latest
7369 // version, syncing to host does nothing.
7370 destGraph->numExportPacketsPerLID_.sync_host();
7371 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7372 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7373 destGraph->numImportPacketsPerLID_.sync_host();
7374 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7375 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7376 Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7377 numImportPacketsPerLID);
7378 size_t totalImportPackets = 0;
7379 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7380 totalImportPackets += numImportPacketsPerLID[i];
7381 }
7382
7383 // Reallocation MUST go before setting the modified flag,
7384 // because it may clear out the flags.
7385 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7386 destGraph->imports_.modify_host();
7387 Teuchos::ArrayView<packet_type> hostImports =
7388 getArrayViewFromDualView(destGraph->imports_);
7389 // This is a legacy host pack/unpack path, so use the host
7390 // version of exports_.
7391 destGraph->exports_.sync_host();
7392 Teuchos::ArrayView<const packet_type> hostExports =
7393 getArrayViewFromDualView(destGraph->exports_);
7394 Distor.doReversePostsAndWaits(hostExports,
7395 numExportPacketsPerLID,
7396 hostImports,
7397 numImportPacketsPerLID);
7398 }
7399 else { // constant number of packets per LI
7400 destGraph->imports_.modify_host();
7401 Teuchos::ArrayView<packet_type> hostImports =
7402 getArrayViewFromDualView(destGraph->imports_);
7403 // This is a legacy host pack/unpack path, so use the host
7404 // version of exports_.
7405 destGraph->exports_.sync_host();
7406 Teuchos::ArrayView<const packet_type> hostExports =
7407 getArrayViewFromDualView(destGraph->exports_);
7408 Distor.doReversePostsAndWaits(hostExports,
7409 constantNumPackets,
7410 hostImports);
7411 }
7412 }
7413 else { // forward mode (the default)
7414 if (constantNumPackets == 0) { // variable number of packets per LID
7415 // Make sure that host has the latest version, since we're
7416 // using the version on host. If host has the latest
7417 // version, syncing to host does nothing.
7418 destGraph->numExportPacketsPerLID_.sync_host();
7419 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7420 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7421 destGraph->numImportPacketsPerLID_.sync_host();
7422 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7423 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7424 Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7425 numImportPacketsPerLID);
7426 size_t totalImportPackets = 0;
7427 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7428 totalImportPackets += numImportPacketsPerLID[i];
7429 }
7430
7431 // Reallocation MUST go before setting the modified flag,
7432 // because it may clear out the flags.
7433 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7434 destGraph->imports_.modify_host();
7435 Teuchos::ArrayView<packet_type> hostImports =
7436 getArrayViewFromDualView(destGraph->imports_);
7437 // This is a legacy host pack/unpack path, so use the host
7438 // version of exports_.
7439 destGraph->exports_.sync_host();
7440 Teuchos::ArrayView<const packet_type> hostExports =
7441 getArrayViewFromDualView(destGraph->exports_);
7442 Distor.doPostsAndWaits(hostExports,
7443 numExportPacketsPerLID,
7444 hostImports,
7445 numImportPacketsPerLID);
7446 }
7447 else { // constant number of packets per LID
7448 destGraph->imports_.modify_host();
7449 Teuchos::ArrayView<packet_type> hostImports =
7450 getArrayViewFromDualView(destGraph->imports_);
7451 // This is a legacy host pack/unpack path, so use the host
7452 // version of exports_.
7453 destGraph->exports_.sync_host();
7454 Teuchos::ArrayView<const packet_type> hostExports =
7455 getArrayViewFromDualView(destGraph->exports_);
7456 Distor.doPostsAndWaits(hostExports,
7457 constantNumPackets,
7458 hostImports);
7459 }
7460 }
7461 }
7462
7463 /*********************************************************************/
7464 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7465 /*********************************************************************/
7466
7467#ifdef HAVE_TPETRA_MMM_TIMINGS
7468 MM = Teuchos::null;
7469 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7470#endif
7471
7472 // Backwards compatibility measure. We'll use this again below.
7473 destGraph->numImportPacketsPerLID_.sync_host();
7474 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7475 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7476 destGraph->imports_.sync_host();
7477 Teuchos::ArrayView<const packet_type> hostImports =
7478 getArrayViewFromDualView(destGraph->imports_);
7479 size_t mynnz =
7480 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7481 numImportPacketsPerLID,
7482 constantNumPackets, INSERT,
7483 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7484 size_t N = BaseRowMap->getNodeNumElements();
7485
7486 // Allocations
7487 ArrayRCP<size_t> CSR_rowptr(N+1);
7488 ArrayRCP<GO> CSR_colind_GID;
7489 ArrayRCP<LO> CSR_colind_LID;
7490 CSR_colind_GID.resize(mynnz);
7491
7492 // If LO and GO are the same, we can reuse memory when
7493 // converting the column indices from global to local indices.
7494 if (typeid(LO) == typeid(GO)) {
7495 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7496 }
7497 else {
7498 CSR_colind_LID.resize(mynnz);
7499 }
7500
7501 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7502 // unpackAndCombine method on a "CrsArrays" object? This passing
7503 // in a huge list of arrays is icky. Can't we have a bit of an
7504 // abstraction? Implementing a concrete DistObject subclass only
7505 // takes five methods.
7506 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7507 numImportPacketsPerLID, constantNumPackets,
7508 INSERT, NumSameIDs, PermuteToLIDs,
7509 PermuteFromLIDs, N, mynnz, MyPID,
7510 CSR_rowptr(), CSR_colind_GID(),
7511 SourcePids(), TargetPids);
7512
7513 /**************************************************************/
7514 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7515 /**************************************************************/
7516#ifdef HAVE_TPETRA_MMM_TIMINGS
7517 MM = Teuchos::null;
7518 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7519#endif
7520 // Call an optimized version of makeColMap that avoids the
7521 // Directory lookups (since the Import object knows who owns all
7522 // the GIDs).
7523 Teuchos::Array<int> RemotePids;
7524 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7525 CSR_colind_LID(),
7526 CSR_colind_GID(),
7527 BaseDomainMap,
7528 TargetPids, RemotePids,
7529 MyColMap);
7530
7531 /*******************************************************/
7532 /**** 4) Second communicator restriction phase ****/
7533 /*******************************************************/
7534 if (restrictComm) {
7535 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7536 ReducedRowMap :
7537 MyColMap->replaceCommWithSubset(ReducedComm);
7538 MyColMap = ReducedColMap; // Reset the "my" maps
7539 }
7540
7541 // Replace the col map
7542 destGraph->replaceColMap(MyColMap);
7543
7544 // Short circuit if the processor is no longer in the communicator
7545 //
7546 // NOTE: Epetra replaces modifies all "removed" processes so they
7547 // have a dummy (serial) Map that doesn't touch the original
7548 // communicator. Duplicating that here might be a good idea.
7549 if (ReducedComm.is_null()) {
7550 return;
7551 }
7552
7553 /***************************************************/
7554 /**** 5) Sort ****/
7555 /***************************************************/
7556 if ((! reverseMode && xferAsImport != nullptr) ||
7557 (reverseMode && xferAsExport != nullptr)) {
7558 Import_Util::sortCrsEntries(CSR_rowptr(),
7559 CSR_colind_LID());
7560 }
7561 else if ((! reverseMode && xferAsExport != nullptr) ||
7562 (reverseMode && xferAsImport != nullptr)) {
7563 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7564 CSR_colind_LID());
7565 if (CSR_rowptr[N] != mynnz) {
7566 CSR_colind_LID.resize(CSR_rowptr[N]);
7567 }
7568 }
7569 else {
7570 TEUCHOS_TEST_FOR_EXCEPTION(
7571 true, std::logic_error,
7572 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7573 }
7574 /***************************************************/
7575 /**** 6) Reset the colmap and the arrays ****/
7576 /***************************************************/
7577
7578 // Call constructor for the new graph (restricted as needed)
7579 //
7580 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7581
7582 /***************************************************/
7583 /**** 7) Build Importer & Call ESFC ****/
7584 /***************************************************/
7585 // Pre-build the importer using the existing PIDs
7586 Teuchos::ParameterList esfc_params;
7587#ifdef HAVE_TPETRA_MMM_TIMINGS
7588 MM = Teuchos::null;
7589 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7590#endif
7591 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7592#ifdef HAVE_TPETRA_MMM_TIMINGS
7593 MM = Teuchos::null;
7594 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7595
7596 esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7597#endif
7598 if(!params.is_null())
7599 esfc_params.set("compute global constants",params->get("compute global constants",true));
7600
7601 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7602 MyImport, Teuchos::null, rcp(&esfc_params,false));
7603
7604 }
7605
7606 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7607 void
7610 const import_type& importer,
7611 const Teuchos::RCP<const map_type>& domainMap,
7612 const Teuchos::RCP<const map_type>& rangeMap,
7613 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7614 {
7615 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7616 }
7617
7618 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7619 void
7622 const import_type& rowImporter,
7623 const import_type& domainImporter,
7624 const Teuchos::RCP<const map_type>& domainMap,
7625 const Teuchos::RCP<const map_type>& rangeMap,
7626 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7627 {
7628 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7629 }
7630
7631 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7632 void
7635 const export_type& exporter,
7636 const Teuchos::RCP<const map_type>& domainMap,
7637 const Teuchos::RCP<const map_type>& rangeMap,
7638 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7639 {
7640 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7641 }
7642
7643 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7644 void
7647 const export_type& rowExporter,
7648 const export_type& domainExporter,
7649 const Teuchos::RCP<const map_type>& domainMap,
7650 const Teuchos::RCP<const map_type>& rangeMap,
7651 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7652 {
7653 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7654 }
7655
7656
7657 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7658 void
7661 {
7662 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7663
7664 std::swap(graph.rowMap_, this->rowMap_);
7665 std::swap(graph.colMap_, this->colMap_);
7666 std::swap(graph.rangeMap_, this->rangeMap_);
7667 std::swap(graph.domainMap_, this->domainMap_);
7668
7669 std::swap(graph.importer_, this->importer_);
7670 std::swap(graph.exporter_, this->exporter_);
7671
7672 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7673 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7674
7675 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7676
7677 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7678 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7679
7680 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7681
7682 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7683 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7684 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7685
7686 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7687 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7688 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7689
7690 std::swap(graph.storageStatus_, this->storageStatus_);
7691
7692 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7693 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7694 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7695 std::swap(graph.fillComplete_, this->fillComplete_);
7696 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7697 std::swap(graph.noRedundancies_, this->noRedundancies_);
7698 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7699 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7700 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7701
7702 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7703
7704 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7705 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7706 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7707 }
7708
7709
7710 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7711 bool
7714 {
7715 auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7716 bool output = true;
7717 output = m1.size() == m2.size() ? output : false;
7718 for(auto & it_m: m1)
7719 {
7720 size_t key = it_m.first;
7721 output = m2.find(key) != m2.end() ? output : false;
7722 if(output)
7723 {
7724 auto v1 = m1.find(key)->second;
7725 auto v2 = m2.find(key)->second;
7726 std::sort(v1.begin(), v1.end());
7727 std::sort(v2.begin(), v2.end());
7728
7729 output = v1.size() == v2.size() ? output : false;
7730 for(size_t i=0; output && i<v1.size(); i++)
7731 {
7732 output = v1[i]==v2[i] ? output : false;
7733 }
7734 }
7735 }
7736 return output;
7737 };
7738
7739 bool output = true;
7740
7741 output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7742 output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7743 output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7744 output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7745
7746 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7747
7748 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7749 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7750
7751 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7752
7753 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7754
7755 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7756 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7757 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7758 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7759 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7760 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7761 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7762 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7763 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7764 output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7765
7766 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7767 // nonlocals_ isa std::map<GO, std::vector<GO> >
7768 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7769
7770 // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7771 // - since this is a HostMirror type, it should be in host memory already
7772 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7773 if(output && this->k_numAllocPerRow_.extent(0) > 0)
7774 {
7775 for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7776 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7777 }
7778
7779 // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7780 // - since this is a HostMirror type, it should be in host memory already
7781 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7782 if(output && this->k_numRowEntries_.extent(0) > 0)
7783 {
7784 for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7785 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7786 }
7787
7788 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7789 output = this->rowPtrsUnpacked_host_.extent(0) == graph.rowPtrsUnpacked_host_.extent(0) ? output : false;
7790 if(output && this->rowPtrsUnpacked_host_.extent(0) > 0)
7791 {
7792 auto rowPtrsThis = this->rowPtrsUnpacked_host_;
7793 auto rowPtrsGraph = graph.rowPtrsUnpacked_host_;
7794 for(size_t i=0; output && i<rowPtrsThis.extent(0); i++)
7795 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7796 }
7797
7798 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7799 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7800 if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7801 {
7802 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7803 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7804 for(size_t i=0; output && i < indThis.extent(0); i++)
7805 output = indThis(i) == indGraph(i) ? output : false;
7806 }
7807
7808 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7809 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7810 if(output && this->gblInds_wdv.extent(0) > 0)
7811 {
7812 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7813 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7814 for(size_t i=0; output && i<indtThis.extent(0); i++)
7815 output = indtThis(i) == indtGraph(i) ? output : false;
7816 }
7817
7818 // Check lclGraph_ isa
7819 // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7820 // Kokkos::StaticCrsGraph has 3 data members in it:
7821 // Kokkos::View<size_type*, ...> row_map
7822 // (local_graph_device_type::row_map_type)
7823 // Kokkos::View<data_type*, ...> entries
7824 // (local_graph_device_type::entries_type)
7825 // Kokkos::View<size_type*, ...> row_block_offsets
7826 // (local_graph_device_type::row_block_type)
7827 // There is currently no Kokkos::StaticCrsGraph comparison function
7828 // that's built-in, so we will just compare
7829 // the three data items here. This can be replaced if Kokkos ever
7830 // puts in its own comparison routine.
7831 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7832 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7833
7834 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7835 ? output : false;
7836 if(output && thisLclGraph.row_map.extent(0) > 0)
7837 {
7838 auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7839 auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7840 for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7841 output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7842 ? output : false;
7843 }
7844
7845 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7846 ? output : false;
7847 if(output && thisLclGraph.entries.extent(0) > 0)
7848 {
7849 auto lclGraph_entries_host_this = thisLclGraph.entries;
7850 auto lclGraph_entries_host_graph = graphLclGraph.entries;
7851 for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7852 output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7853 ? output : false;
7854 }
7855
7856 output =
7857 thisLclGraph.row_block_offsets.extent(0) ==
7858 graphLclGraph.row_block_offsets.extent(0) ? output : false;
7859 if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7860 {
7861 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7862 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7863 for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7864 output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7865 ? output : false;
7866 }
7867
7868 // For Importer and Exporter, we don't need to explicitly check them since
7869 // they will be consistent with the maps.
7870 // Note: importer_ isa Teuchos::RCP<const import_type>
7871 // exporter_ isa Teuchos::RCP<const export_type>
7872
7873 return output;
7874 }
7875
7876
7877
7878} // namespace Tpetra
7879
7880//
7881// Explicit instantiation macros
7882//
7883// Must be expanded from within the Tpetra namespace!
7884//
7885
7886#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7887 template<> \
7888 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7889 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7890 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7891 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7892 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7893 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7894 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7895 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7896 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7897 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7898 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7899 const Teuchos::RCP<Teuchos::ParameterList>& params);
7900
7901#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7902 template<> \
7903 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7904 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7905 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7906 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7907 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7908 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7909 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7910 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7911 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7912 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7913 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7914 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7915 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7916 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7917 const Teuchos::RCP<Teuchos::ParameterList>& params);
7918
7919
7920#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7921 template<> \
7922 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7923 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7924 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7925 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7926 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7927 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7928 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7929 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7930 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7931 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7932 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7933 const Teuchos::RCP<Teuchos::ParameterList>& params);
7934
7935#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7936 template<> \
7937 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7938 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7939 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7940 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7941 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7942 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7943 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7944 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7945 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7946 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7947 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7948 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7949 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7950 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7951 const Teuchos::RCP<Teuchos::ParameterList>& params);
7952
7953
7954#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7955 template class CrsGraph<LO, GO, NODE>; \
7956 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7957 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7958 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7959 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7960
7961
7962#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
GO global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of colum indices for all rows KDDKDD UVM Removal: Device view takes place of k_lclInds...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows KDDKDD UVM Removal: Device view takes place of k_gblIn...
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
Teuchos::ArrayRCP< const local_ordinal_type > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
global_size_t globalNumEntries_
Global number of entries in the graph.
size_t getNodeAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of colum indices for all rows KDDKDD UVM Removal: Device view takes place of lclGraph_...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
Pack this object's data for Import or Export.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
Kokkos::View< constsize_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
bool haveLocalConstants_
Whether this process has computed local constants.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.