Tpetra parallel linear algebra Version of the Day
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_CRSMATRIX_DEF_HPP
41#define TPETRA_CRSMATRIX_DEF_HPP
42
50
53#include "Tpetra_RowMatrix.hpp"
54#include "Tpetra_LocalCrsMatrixOperator.hpp"
55
62#include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
67#include "KokkosSparse_getDiagCopy.hpp"
71#include "Tpetra_Details_packCrsMatrix.hpp"
72#include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
74#include "Teuchos_FancyOStream.hpp"
75#include "Teuchos_RCP.hpp"
76#include "Teuchos_DataAccess.hpp"
77#include "Teuchos_SerialDenseMatrix.hpp" // unused here, could delete
78#include "KokkosBlas.hpp"
79
80#include <memory>
81#include <sstream>
82#include <typeinfo>
83#include <utility>
84#include <vector>
85
86using Teuchos::rcpFromRef;
87
88namespace Tpetra {
89
90namespace { // (anonymous)
91
92 template<class T, class BinaryFunction>
93 T atomic_binary_function_update (volatile T* const dest,
94 const T& inputVal,
95 BinaryFunction f)
96 {
97 T oldVal = *dest;
98 T assume;
99
100 // NOTE (mfh 30 Nov 2015) I do NOT need a fence here for IBM
101 // POWER architectures, because 'newval' depends on 'assume',
102 // which depends on 'oldVal', which depends on '*dest'. This
103 // sets up a chain of read dependencies that should ensure
104 // correct behavior given a sane memory model.
105 do {
106 assume = oldVal;
107 T newVal = f (assume, inputVal);
108 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
109 } while (assume != oldVal);
110
111 return oldVal;
112 }
113} // namespace (anonymous)
114
115//
116// Users must never rely on anything in the Details namespace.
117//
118namespace Details {
119
129template<class Scalar>
130struct AbsMax {
132 Scalar operator() (const Scalar& x, const Scalar& y) {
133 typedef Teuchos::ScalarTraits<Scalar> STS;
134 return std::max (STS::magnitude (x), STS::magnitude (y));
135 }
136};
137
138} // namespace Details
139} // namespace Tpetra
140
141namespace Tpetra {
142
143 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
145 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
146 size_t maxNumEntriesPerRow,
147 const ProfileType pftype,
148 const Teuchos::RCP<Teuchos::ParameterList>& params) :
149 dist_object_type (rowMap)
150 {
151 const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
152 "ProfileType[, RCP<ParameterList>]): ";
153 Teuchos::RCP<crs_graph_type> graph;
154 try {
155 graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
156 pftype, params));
157 }
158 catch (std::exception& e) {
159 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
160 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
161 "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
162 << e.what ());
163 }
164 // myGraph_ not null means that the matrix owns the graph. That's
165 // different than the const CrsGraph constructor, where the matrix
166 // does _not_ own the graph.
167 myGraph_ = graph;
168 staticGraph_ = myGraph_;
169 resumeFill (params);
171 }
172
173 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
175 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
176 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
177 const ProfileType pftype,
178 const Teuchos::RCP<Teuchos::ParameterList>& params) :
179 dist_object_type (rowMap)
180 {
181 const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
182 "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
183 Teuchos::RCP<crs_graph_type> graph;
184 try {
185 using Teuchos::rcp;
186 graph = rcp(new crs_graph_type(rowMap, numEntPerRowToAlloc,
187 pftype, params));
188 }
189 catch (std::exception& e) {
190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
191 (true, std::runtime_error, "CrsGraph constructor "
192 "(RCP<const Map>, ArrayView<const size_t>, "
193 "ProfileType[, RCP<ParameterList>]) threw an exception: "
194 << e.what ());
195 }
196 // myGraph_ not null means that the matrix owns the graph. That's
197 // different than the const CrsGraph constructor, where the matrix
198 // does _not_ own the graph.
199 myGraph_ = graph;
200 staticGraph_ = graph;
201 resumeFill (params);
203 }
204
205
206 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
208 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
209 const Teuchos::RCP<const map_type>& colMap,
210 const size_t maxNumEntPerRow,
211 const ProfileType pftype,
212 const Teuchos::RCP<Teuchos::ParameterList>& params) :
213 dist_object_type (rowMap)
214 {
215 const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
216 "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]): ";
217 const char suffix[] =
218 " Please report this bug to the Tpetra developers.";
219
220 // An artifact of debugging something a while back.
221 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
222 (! staticGraph_.is_null (), std::logic_error,
223 "staticGraph_ is not null at the beginning of the constructor."
224 << suffix);
225 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
226 (! myGraph_.is_null (), std::logic_error,
227 "myGraph_ is not null at the beginning of the constructor."
228 << suffix);
229 Teuchos::RCP<crs_graph_type> graph;
230 try {
231 graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
232 maxNumEntPerRow,
233 pftype, params));
234 }
235 catch (std::exception& e) {
236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
237 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
238 "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
239 "exception: " << e.what ());
240 }
241 // myGraph_ not null means that the matrix owns the graph. That's
242 // different than the const CrsGraph constructor, where the matrix
243 // does _not_ own the graph.
244 myGraph_ = graph;
245 staticGraph_ = myGraph_;
246 resumeFill (params);
248 }
249
250 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
252 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
253 const Teuchos::RCP<const map_type>& colMap,
254 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
255 const ProfileType pftype,
256 const Teuchos::RCP<Teuchos::ParameterList>& params) :
257 dist_object_type (rowMap)
258 {
259 const char tfecfFuncName[] =
260 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
261 "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
262 Teuchos::RCP<crs_graph_type> graph;
263 try {
264 graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
265 numEntPerRowToAlloc,
266 pftype, params));
267 }
268 catch (std::exception& e) {
269 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
270 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
271 "RCP<const Map>, ArrayView<const size_t>, ProfileType[, "
272 "RCP<ParameterList>]) threw an exception: " << e.what ());
273 }
274 // myGraph_ not null means that the matrix owns the graph. That's
275 // different than the const CrsGraph constructor, where the matrix
276 // does _not_ own the graph.
277 myGraph_ = graph;
278 staticGraph_ = graph;
279 resumeFill (params);
281 }
282
283
284 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
286 CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
287 const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
288 dist_object_type (graph->getRowMap ()),
289 staticGraph_ (graph),
290 storageStatus_ (Details::STORAGE_1D_PACKED)
291 {
292 using std::endl;
293 typedef typename local_matrix_device_type::values_type values_type;
294 const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
295 "RCP<ParameterList>]): ";
296 const bool verbose = Details::Behavior::verbose("CrsMatrix");
297
298 std::unique_ptr<std::string> prefix;
299 if (verbose) {
300 prefix = this->createPrefix("CrsMatrix", "CrsMatrix(graph,params)");
301 std::ostringstream os;
302 os << *prefix << "Start" << endl;
303 std::cerr << os.str ();
304 }
305
306 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
307 (graph.is_null (), std::runtime_error, "Input graph is null.");
308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
309 (! graph->isFillComplete (), std::runtime_error, "Input graph "
310 "is not fill complete. You must call fillComplete on the "
311 "graph before using it to construct a CrsMatrix. Note that "
312 "calling resumeFill on the graph makes it not fill complete, "
313 "even if you had previously called fillComplete. In that "
314 "case, you must call fillComplete on the graph again.");
315
316 // The graph is fill complete, so it is locally indexed and has a
317 // fixed structure. This means we can allocate the (1-D) array of
318 // values and build the local matrix right now. Note that the
319 // local matrix's number of columns comes from the column Map, not
320 // the domain Map.
321
322 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
323 if (verbose) {
324 std::ostringstream os;
325 os << *prefix << "Allocate values: " << numEnt << endl;
326 std::cerr << os.str ();
327 }
328
329 values_type val ("Tpetra::CrsMatrix::values", numEnt);
330 valuesPacked_wdv = values_wdv_type(val);
331 valuesUnpacked_wdv = valuesPacked_wdv;
332
333 // FIXME (22 Jun 2016) I would very much like to get rid of
334 // k_values1D_ at some point. I find it confusing to have all
335 // these extra references lying around.
336// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
337
339
340 if (verbose) {
341 std::ostringstream os;
342 os << *prefix << "Done" << endl;
343 std::cerr << os.str ();
344 }
345 }
346
347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
350 const Teuchos::RCP<const crs_graph_type>& graph,
351 const Teuchos::RCP<Teuchos::ParameterList>& params) :
352 dist_object_type (graph->getRowMap ()),
353 staticGraph_ (graph),
354 storageStatus_ (matrix.storageStatus_)
355 {
356 const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>, "
357 "local_matrix_device_type::values_type, "
358 "[,RCP<ParameterList>]): ";
359 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
360 (graph.is_null (), std::runtime_error, "Input graph is null.");
361 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
362 (! graph->isFillComplete (), std::runtime_error, "Input graph "
363 "is not fill complete. You must call fillComplete on the "
364 "graph before using it to construct a CrsMatrix. Note that "
365 "calling resumeFill on the graph makes it not fill complete, "
366 "even if you had previously called fillComplete. In that "
367 "case, you must call fillComplete on the graph again.");
368
369 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
370 valuesPacked_wdv = values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
371
372 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
373 valuesUnpacked_wdv = values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
374
376 }
377
378
379 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
381 CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
382 const typename local_matrix_device_type::values_type& values,
383 const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
384 dist_object_type (graph->getRowMap ()),
385 staticGraph_ (graph),
386 storageStatus_ (Details::STORAGE_1D_PACKED)
387 {
388 const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>, "
389 "local_matrix_device_type::values_type, "
390 "[,RCP<ParameterList>]): ";
391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
392 (graph.is_null (), std::runtime_error, "Input graph is null.");
393 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
394 (! graph->isFillComplete (), std::runtime_error, "Input graph "
395 "is not fill complete. You must call fillComplete on the "
396 "graph before using it to construct a CrsMatrix. Note that "
397 "calling resumeFill on the graph makes it not fill complete, "
398 "even if you had previously called fillComplete. In that "
399 "case, you must call fillComplete on the graph again.");
400
401 // The graph is fill complete, so it is locally indexed and has a
402 // fixed structure. This means we can allocate the (1-D) array of
403 // values and build the local matrix right now. Note that the
404 // local matrix's number of columns comes from the column Map, not
405 // the domain Map.
406
407 valuesPacked_wdv = values_wdv_type(values);
408 valuesUnpacked_wdv = valuesPacked_wdv;
409
410 // FIXME (22 Jun 2016) I would very much like to get rid of
411 // k_values1D_ at some point. I find it confusing to have all
412 // these extra references lying around.
413 // KDDKDD ALMOST THERE, MARK!
414// k_values1D_ = valuesUnpacked_wdv.getDeviceView(Access::ReadWrite);
415
417 }
418
419 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
421 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
422 const Teuchos::RCP<const map_type>& colMap,
423 const typename local_graph_device_type::row_map_type& rowPointers,
424 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
425 const typename local_matrix_device_type::values_type& values,
426 const Teuchos::RCP<Teuchos::ParameterList>& params) :
427 dist_object_type (rowMap),
428 storageStatus_ (Details::STORAGE_1D_PACKED)
429 {
430 using Details::getEntryOnHost;
431 using Teuchos::RCP;
432 using std::endl;
433 const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
434 "RCP<const Map>, ptr, ind, val[, params]): ";
435 const char suffix[] =
436 ". Please report this bug to the Tpetra developers.";
437 const bool debug = Details::Behavior::debug("CrsMatrix");
438 const bool verbose = Details::Behavior::verbose("CrsMatrix");
439
440 std::unique_ptr<std::string> prefix;
441 if (verbose) {
442 prefix = this->createPrefix(
443 "CrsMatrix", "CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
444 std::ostringstream os;
445 os << *prefix << "Start" << endl;
446 std::cerr << os.str ();
447 }
448
449 // Check the user's input. Note that this might throw only on
450 // some processes but not others, causing deadlock. We prefer
451 // deadlock due to exceptions to segfaults, because users can
452 // catch exceptions.
453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
454 (values.extent(0) != columnIndices.extent(0),
455 std::invalid_argument, "values.extent(0)=" << values.extent(0)
456 << " != columnIndices.extent(0) = " << columnIndices.extent(0)
457 << ".");
458 if (debug && rowPointers.extent(0) != 0) {
459 const size_t numEnt =
460 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
461 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
462 (numEnt != size_t(columnIndices.extent(0)) ||
463 numEnt != size_t(values.extent(0)),
464 std::invalid_argument, "Last entry of rowPointers says that "
465 "the matrix has " << numEnt << " entr"
466 << (numEnt != 1 ? "ies" : "y") << ", but the dimensions of "
467 "columnIndices and values don't match this. "
468 "columnIndices.extent(0)=" << columnIndices.extent (0)
469 << " and values.extent(0)=" << values.extent (0) << ".");
470 }
471
472 RCP<crs_graph_type> graph;
473 try {
474 graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
475 columnIndices, params));
476 }
477 catch (std::exception& e) {
478 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
479 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
480 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
481 << e.what ());
482 }
483 // The newly created CrsGraph _must_ have a local graph at this
484 // point. We don't really care whether CrsGraph's constructor
485 // deep-copies or shallow-copies the input, but the dimensions
486 // have to be right. That's how we tell whether the CrsGraph has
487 // a local graph.
488 auto lclGraph = graph->getLocalGraphDevice ();
489 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
490 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
491 lclGraph.entries.extent (0) != columnIndices.extent (0),
492 std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
493 "ind[, params]) did not set the local graph correctly." << suffix);
494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
495 (lclGraph.entries.extent (0) != values.extent (0),
496 std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
497 "params]) did not set the local graph correctly. "
498 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
499 << " != values.extent(0) = " << values.extent (0) << suffix);
500
501 // myGraph_ not null means that the matrix owns the graph. This
502 // is true because the column indices come in as nonconst,
503 // implying shared ownership.
504 myGraph_ = graph;
505 staticGraph_ = graph;
506
507 // The graph may not be fill complete yet. However, it is locally
508 // indexed (since we have a column Map) and has a fixed structure
509 // (due to the input arrays). This means we can allocate the
510 // (1-D) array of values and build the local matrix right now.
511 // Note that the local matrix's number of columns comes from the
512 // column Map, not the domain Map.
513
514 valuesPacked_wdv = values_wdv_type(values);
515 valuesUnpacked_wdv = valuesPacked_wdv;
516
517 // FIXME (22 Jun 2016) I would very much like to get rid of
518 // k_values1D_ at some point. I find it confusing to have all
519 // these extra references lying around.
520// this->k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
521
523 if (verbose) {
524 std::ostringstream os;
525 os << *prefix << "Done" << endl;
526 std::cerr << os.str();
527 }
528 }
529
530 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
532 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
533 const Teuchos::RCP<const map_type>& colMap,
534 const Teuchos::ArrayRCP<size_t>& ptr,
535 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
536 const Teuchos::ArrayRCP<Scalar>& val,
537 const Teuchos::RCP<Teuchos::ParameterList>& params) :
538 dist_object_type (rowMap),
539 storageStatus_ (Details::STORAGE_1D_PACKED)
540 {
541 using Kokkos::Compat::getKokkosViewDeepCopy;
542 using Teuchos::av_reinterpret_cast;
543 using Teuchos::RCP;
544 using values_type = typename local_matrix_device_type::values_type;
545 using IST = impl_scalar_type;
546 const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
547 "RCP<const Map>, ptr, ind, val[, params]): ";
548
549 RCP<crs_graph_type> graph;
550 try {
551 graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
552 ind, params));
553 }
554 catch (std::exception& e) {
555 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
556 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
557 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
558 "RCP<ParameterList>]) threw an exception: " << e.what ());
559 }
560 // myGraph_ not null means that the matrix owns the graph. This
561 // is true because the column indices come in as nonconst,
562 // implying shared ownership.
563 myGraph_ = graph;
564 staticGraph_ = graph;
565
566 // The graph may not be fill complete yet. However, it is locally
567 // indexed (since we have a column Map) and has a fixed structure
568 // (due to the input arrays). This means we can allocate the
569 // (1-D) array of values and build the local matrix right now.
570 // Note that the local matrix's number of columns comes from the
571 // column Map, not the domain Map.
572
573 // The graph _must_ have a local graph at this point. We don't
574 // really care whether CrsGraph's constructor deep-copies or
575 // shallow-copies the input, but the dimensions have to be right.
576 // That's how we tell whether the CrsGraph has a local graph.
577 auto lclGraph = staticGraph_->getLocalGraphDevice ();
578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
579 (size_t (lclGraph.row_map.extent (0)) != size_t (ptr.size ()) ||
580 size_t (lclGraph.entries.extent (0)) != size_t (ind.size ()),
581 std::logic_error, "CrsGraph's constructor (rowMap, colMap, "
582 "ptr, ind[, params]) did not set the local graph correctly. "
583 "Please report this bug to the Tpetra developers.");
584
585 values_type valIn =
586 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
587 valuesPacked_wdv = values_wdv_type(valIn);
588 valuesUnpacked_wdv = valuesPacked_wdv;
589
590 // FIXME (22 Jun 2016) I would very much like to get rid of
591 // k_values1D_ at some point. I find it confusing to have all
592 // these extra references lying around.
593// this->k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
594
596 }
597
598 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
600 CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
601 const Teuchos::RCP<const map_type>& colMap,
602 const local_matrix_device_type& lclMatrix,
603 const Teuchos::RCP<Teuchos::ParameterList>& params) :
604 dist_object_type (rowMap),
605 storageStatus_ (Details::STORAGE_1D_PACKED),
606 fillComplete_ (true)
607 {
608 const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
609 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
610 const char suffix[] =
611 " Please report this bug to the Tpetra developers.";
612
613 Teuchos::RCP<crs_graph_type> graph;
614 try {
615 graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
616 lclMatrix.graph, params));
617 }
618 catch (std::exception& e) {
619 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
620 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
621 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
622 "exception: " << e.what ());
623 }
624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
625 (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
626 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
627 "did not produce a fill-complete graph. Please report this bug to the "
628 "Tpetra developers.");
629 // myGraph_ not null means that the matrix owns the graph. This
630 // is true because the column indices come in as nonconst through
631 // the matrix, implying shared ownership.
632 myGraph_ = graph;
633 staticGraph_ = graph;
634
635 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
636 valuesUnpacked_wdv = valuesPacked_wdv;
637
638// k_values1D_ = valuesUnpacked_wdv.getDeviceView(Access::ReadWrite);
639
640 const bool callComputeGlobalConstants = params.get () == nullptr ||
641 params->get ("compute global constants", true);
642 if (callComputeGlobalConstants) {
643 this->computeGlobalConstants ();
644 }
645
646 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
647 (isFillActive (), std::logic_error,
648 "At the end of a CrsMatrix constructor that should produce "
649 "a fillComplete matrix, isFillActive() is true." << suffix);
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! isFillComplete (), std::logic_error, "At the end of a "
652 "CrsMatrix constructor that should produce a fillComplete "
653 "matrix, isFillComplete() is false." << suffix);
655 }
656
657 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
659 CrsMatrix (const local_matrix_device_type& lclMatrix,
660 const Teuchos::RCP<const map_type>& rowMap,
661 const Teuchos::RCP<const map_type>& colMap,
662 const Teuchos::RCP<const map_type>& domainMap,
663 const Teuchos::RCP<const map_type>& rangeMap,
664 const Teuchos::RCP<Teuchos::ParameterList>& params) :
665 dist_object_type (rowMap),
666 storageStatus_ (Details::STORAGE_1D_PACKED),
667 fillComplete_ (true)
668 {
669 const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
670 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
671 "local_matrix_device_type[, RCP<ParameterList>]): ";
672 const char suffix[] =
673 " Please report this bug to the Tpetra developers.";
674
675 Teuchos::RCP<crs_graph_type> graph;
676 try {
677 graph = Teuchos::rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
678 domainMap, rangeMap, params));
679 }
680 catch (std::exception& e) {
681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
682 (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
683 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
684 "RCP<ParameterList>]) threw an exception: " << e.what ());
685 }
686 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
687 (! graph->isFillComplete (), std::logic_error, "CrsGraph "
688 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
689 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
690 "not produce a fillComplete graph." << suffix);
691 // myGraph_ not null means that the matrix owns the graph. This
692 // is true because the column indices come in as nonconst through
693 // the matrix, implying shared ownership.
694 myGraph_ = graph;
695 staticGraph_ = graph;
696
697 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
698 valuesUnpacked_wdv = valuesPacked_wdv;
699// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
700
701 const bool callComputeGlobalConstants = params.get () == nullptr ||
702 params->get ("compute global constants", true);
703 if (callComputeGlobalConstants) {
704 this->computeGlobalConstants ();
705 }
706
707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
708 (isFillActive (), std::logic_error,
709 "At the end of a CrsMatrix constructor that should produce "
710 "a fillComplete matrix, isFillActive() is true." << suffix);
711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
712 (! isFillComplete (), std::logic_error, "At the end of a "
713 "CrsMatrix constructor that should produce a fillComplete "
714 "matrix, isFillComplete() is false." << suffix);
716 }
717
718 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
720 CrsMatrix (const local_matrix_device_type& lclMatrix,
721 const Teuchos::RCP<const map_type>& rowMap,
722 const Teuchos::RCP<const map_type>& colMap,
723 const Teuchos::RCP<const map_type>& domainMap,
724 const Teuchos::RCP<const map_type>& rangeMap,
725 const Teuchos::RCP<const import_type>& importer,
726 const Teuchos::RCP<const export_type>& exporter,
727 const Teuchos::RCP<Teuchos::ParameterList>& params) :
728 dist_object_type (rowMap),
729 storageStatus_ (Details::STORAGE_1D_PACKED),
730 fillComplete_ (true)
731 {
732 using Teuchos::rcp;
733 const char tfecfFuncName[] = "Tpetra::CrsMatrix"
734 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
735 const char suffix[] =
736 " Please report this bug to the Tpetra developers.";
737
738 Teuchos::RCP<crs_graph_type> graph;
739 try {
740 graph = rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
741 domainMap, rangeMap, importer,
742 exporter, params));
743 }
744 catch (std::exception& e) {
745 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
746 (true, std::runtime_error, "CrsGraph constructor "
747 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
748 "params) threw: " << e.what ());
749 }
750 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
751 (!graph->isFillComplete (), std::logic_error, "CrsGraph "
752 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
753 "Export, params) did not produce a fill-complete graph. "
754 "Please report this bug to the Tpetra developers.");
755 // myGraph_ not null means that the matrix owns the graph. This
756 // is true because the column indices come in as nonconst through
757 // the matrix, implying shared ownership.
758 myGraph_ = graph;
759 staticGraph_ = graph;
760
761 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
762 valuesUnpacked_wdv = valuesPacked_wdv;
763// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
764
765 const bool callComputeGlobalConstants = params.get () == nullptr ||
766 params->get ("compute global constants", true);
767 if (callComputeGlobalConstants) {
768 this->computeGlobalConstants ();
769 }
770
771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
772 (isFillActive (), std::logic_error,
773 "At the end of a CrsMatrix constructor that should produce "
774 "a fillComplete matrix, isFillActive() is true." << suffix);
775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
776 (! isFillComplete (), std::logic_error, "At the end of a "
777 "CrsMatrix constructor that should produce a fillComplete "
778 "matrix, isFillComplete() is false." << suffix);
780 }
781
782 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
785 const Teuchos::DataAccess copyOrView)
786 : CrsMatrix (source.getCrsGraph (), source.getLocalValuesView ())
787 {
788 const char tfecfFuncName[] = "Tpetra::CrsMatrix("
789 "const CrsMatrix&, const Teuchos::DataAccess): ";
790 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
791 (! source.isFillComplete (), std::invalid_argument,
792 "Source graph must be fillComplete().");
793
794 if (copyOrView == Teuchos::Copy) {
795 using values_type = typename local_matrix_device_type::values_type;
796 values_type vals = source.getLocalValuesView ();
797 using Kokkos::view_alloc;
798 using Kokkos::WithoutInitializing;
799 values_type newvals (view_alloc ("val", WithoutInitializing),
800 vals.extent (0));
801 Kokkos::deep_copy (newvals, vals);
802 valuesPacked_wdv = values_wdv_type(newvals);
803 valuesUnpacked_wdv = valuesPacked_wdv;
804// k_values1D_ = newvals;
805 if (source.isFillComplete ()) {
806 fillComplete (source.getDomainMap (), source.getRangeMap ());
807 }
808 }
809 else if (copyOrView == Teuchos::View) {
810 return;
811 }
812 else {
813 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
814 (true, std::invalid_argument, "Second argument 'copyOrView' "
815 "has an invalid value " << copyOrView << ". Valid values "
816 "include Teuchos::Copy = " << Teuchos::Copy << " and "
817 "Teuchos::View = " << Teuchos::View << ".");
818 }
819 }
820
821 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
822 void
825 {
826 std::swap(crs_matrix.importMV_, this->importMV_);
827 std::swap(crs_matrix.exportMV_, this->exportMV_);
828 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
829 std::swap(crs_matrix.myGraph_, this->myGraph_);
830 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
831 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
832 std::swap(crs_matrix.storageStatus_, this->storageStatus_);
833 std::swap(crs_matrix.fillComplete_, this->fillComplete_);
834 std::swap(crs_matrix.nonlocals_, this->nonlocals_);
835 std::swap(crs_matrix.frobNorm_, this->frobNorm_);
836 }
837
838 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
839 Teuchos::RCP<const Teuchos::Comm<int> >
841 getComm () const {
842 return getCrsGraphRef ().getComm ();
843 }
844
845 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
848 getProfileType () const {
849 return this->getCrsGraphRef ().getProfileType ();
850 }
851
852 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
853 bool
855 isFillComplete () const {
856 return fillComplete_;
857 }
858
859 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
860 bool
863 return ! fillComplete_;
864 }
865
866 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
867 bool
869 isStorageOptimized () const {
870 return this->getCrsGraphRef ().isStorageOptimized ();
871 }
872
873 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
874 bool
876 isLocallyIndexed () const {
877 return getCrsGraphRef ().isLocallyIndexed ();
878 }
879
880 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
881 bool
883 isGloballyIndexed () const {
884 return getCrsGraphRef ().isGloballyIndexed ();
885 }
886
887 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
888 bool
890 hasColMap () const {
891 return getCrsGraphRef ().hasColMap ();
892 }
893
894 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
897 getGlobalNumEntries () const {
898 return getCrsGraphRef ().getGlobalNumEntries ();
899 }
900
901 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
902 size_t
904 getNodeNumEntries () const {
905 return getCrsGraphRef ().getNodeNumEntries ();
906 }
907
908 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
911 getGlobalNumRows () const {
912 return getCrsGraphRef ().getGlobalNumRows ();
913 }
914
915 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
918 getGlobalNumCols () const {
919 return getCrsGraphRef ().getGlobalNumCols ();
920 }
921
922 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
923 size_t
925 getNodeNumRows () const {
926 return getCrsGraphRef ().getNodeNumRows ();
927 }
928
929 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
930 size_t
932 getNodeNumCols () const {
933 return getCrsGraphRef ().getNodeNumCols ();
934 }
935
936
937 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
938 size_t
940 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
941 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
942 }
943
944 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
945 size_t
947 getNumEntriesInLocalRow (LocalOrdinal localRow) const {
948 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
949 }
950
951 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
952 size_t
955 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
956 }
957
958 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
959 size_t
962 return getCrsGraphRef ().getNodeMaxNumRowEntries ();
963 }
964
965 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
966 GlobalOrdinal
968 getIndexBase () const {
969 return getRowMap ()->getIndexBase ();
970 }
972 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
973 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
975 getRowMap () const {
976 return getCrsGraphRef ().getRowMap ();
977 }
978
979 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
980 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
982 getColMap () const {
983 return getCrsGraphRef ().getColMap ();
984 }
985
986 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
987 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
989 getDomainMap () const {
990 return getCrsGraphRef ().getDomainMap ();
991 }
992
993 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
994 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
996 getRangeMap () const {
997 return getCrsGraphRef ().getRangeMap ();
998 }
999
1000 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1001 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
1003 getGraph () const {
1004 if (staticGraph_ != Teuchos::null) {
1005 return staticGraph_;
1006 }
1007 return myGraph_;
1008 }
1009
1010 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1011 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
1013 getCrsGraph () const {
1014 if (staticGraph_ != Teuchos::null) {
1015 return staticGraph_;
1016 }
1017 return myGraph_;
1018 }
1019
1020 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1023 getCrsGraphRef () const
1024 {
1025#ifdef HAVE_TPETRA_DEBUG
1026 constexpr bool debug = true;
1027#else
1028 constexpr bool debug = false;
1029#endif // HAVE_TPETRA_DEBUG
1030
1031 if (! this->staticGraph_.is_null ()) {
1032 return * (this->staticGraph_);
1033 }
1034 else {
1035 if (debug) {
1036 const char tfecfFuncName[] = "getCrsGraphRef: ";
1037 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1038 (this->myGraph_.is_null (), std::logic_error,
1039 "Both staticGraph_ and myGraph_ are null. "
1040 "Please report this bug to the Tpetra developers.");
1041 }
1042 return * (this->myGraph_);
1043 }
1044 }
1045
1046#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1047 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1049 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1050 getLocalMatrix () const
1051 {
1052 return getLocalMatrixDevice();
1053 }
1054#endif // TPETRA_ENABLE_DEPRECATED_CODE
1056 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1059 getLocalMatrixDevice () const
1060 {
1061 auto numCols = staticGraph_->getColMap()->getNodeNumElements();
1062 return local_matrix_device_type("Tpetra::CrsMatrix::lclMatrixDevice",
1063 numCols,
1064 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
1065 staticGraph_->getLocalGraphDevice());
1066 }
1067
1068 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1069 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
1071 getLocalMatrixHost () const
1073 auto numCols = staticGraph_->getColMap()->getNodeNumElements();
1074 return local_matrix_host_type("Tpetra::CrsMatrix::lclMatrixHost", numCols,
1075 valuesPacked_wdv.getHostView(Access::ReadWrite),
1076 staticGraph_->getLocalGraphHost());
1077 }
1078
1079// KDDKDD NOT SURE WHY THIS MUST RETURN A SHARED_PTR
1080 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1081 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1084 {
1085 auto localMatrix = getLocalMatrixDevice();
1086#ifdef HAVE_TPETRACORE_CUDA
1087#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
1088 if(this->getNodeNumEntries() <= size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()) &&
1089 std::is_same<Node, Kokkos::Compat::KokkosCudaWrapperNode>::value)
1090 {
1091 if(this->ordinalRowptrs.data() == nullptr)
1092 {
1093 auto originalRowptrs = localMatrix.graph.row_map;
1094 //create LocalOrdinal-typed copy of the local graph's rowptrs.
1095 //This enables the LocalCrsMatrixOperator to use cuSPARSE SpMV.
1096 this->ordinalRowptrs = ordinal_rowptrs_type(
1097 Kokkos::ViewAllocateWithoutInitializing("CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1098 auto ordinalRowptrs_ = this->ordinalRowptrs; //don't want to capture 'this'
1099 Kokkos::parallel_for("CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1100 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1101 KOKKOS_LAMBDA(LocalOrdinal i)
1102 {
1103 ordinalRowptrs_(i) = originalRowptrs(i);
1104 });
1105 }
1106 //return local operator using ordinalRowptrs
1107 return std::make_shared<local_multiply_op_type>(
1108 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1109 }
1110#endif
1111#endif
1112// KDDKDD NOT SURE WHY THIS MUST RETURN A SHARED_PTR
1113 return std::make_shared<local_multiply_op_type>(
1114 std::make_shared<local_matrix_device_type>(
1115 getLocalMatrixDevice()));
1116 }
1118 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1119 bool
1121 isStaticGraph () const {
1122 return myGraph_.is_null ();
1123 }
1124
1125 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1126 bool
1128 hasTransposeApply () const {
1129 return true;
1130 }
1131
1132 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1133 bool
1135 supportsRowViews () const {
1136 return true;
1137 }
1138
1139 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1140 void
1142 allocateValues (ELocalGlobal lg, GraphAllocationStatus gas,
1143 const bool verbose)
1145 using Details::Behavior;
1147 using std::endl;
1148 const char tfecfFuncName[] = "allocateValues: ";
1149 const char suffix[] =
1150 " Please report this bug to the Tpetra developers.";
1151 ProfilingRegion region("Tpetra::CrsMatrix::allocateValues");
1152
1153 std::unique_ptr<std::string> prefix;
1154 if (verbose) {
1155 prefix = this->createPrefix("CrsMatrix", "allocateValues");
1156 std::ostringstream os;
1157 os << *prefix << "lg: "
1158 << (lg == LocalIndices ? "Local" : "Global") << "Indices"
1159 << ", gas: Graph"
1160 << (gas == GraphAlreadyAllocated ? "Already" : "NotYet")
1161 << "Allocated" << endl;
1162 std::cerr << os.str();
1163 }
1164
1165 const bool debug = Behavior::debug("CrsMatrix");
1166 if (debug) {
1167 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1168 (this->staticGraph_.is_null (), std::logic_error,
1169 "staticGraph_ is null." << suffix);
1170
1171 // If the graph indices are already allocated, then gas should be
1172 // GraphAlreadyAllocated. Otherwise, gas should be
1173 // GraphNotYetAllocated.
1174 if ((gas == GraphAlreadyAllocated) !=
1175 staticGraph_->indicesAreAllocated ()) {
1176 const char err1[] = "The caller has asserted that the graph "
1177 "is ";
1178 const char err2[] = "already allocated, but the static graph "
1179 "says that its indices are ";
1180 const char err3[] = "already allocated. ";
1181 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1182 (gas == GraphAlreadyAllocated &&
1183 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1184 err1 << err2 << "not " << err3 << suffix);
1185 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1186 (gas != GraphAlreadyAllocated &&
1187 staticGraph_->indicesAreAllocated (), std::logic_error,
1188 err1 << "not " << err2 << err3 << suffix);
1189 }
1190
1191 // If the graph is unallocated, then it had better be a
1192 // matrix-owned graph. ("Matrix-owned graph" means that the
1193 // matrix gets to define the graph structure. If the CrsMatrix
1194 // constructor that takes an RCP<const CrsGraph> was used, then
1195 // the matrix does _not_ own the graph.)
1196 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1197 (! this->staticGraph_->indicesAreAllocated () &&
1198 this->myGraph_.is_null (), std::logic_error,
1199 "The static graph says that its indices are not allocated, "
1200 "but the graph is not owned by the matrix." << suffix);
1201 }
1202
1203 if (gas == GraphNotYetAllocated) {
1204 if (debug) {
1205 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1206 (this->myGraph_.is_null (), std::logic_error,
1207 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1208 }
1209 try {
1210 this->myGraph_->allocateIndices (lg, verbose);
1211 }
1212 catch (std::exception& e) {
1213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1214 (true, std::runtime_error, "CrsGraph::allocateIndices "
1215 "threw an exception: " << e.what ());
1216 }
1217 catch (...) {
1218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1219 (true, std::runtime_error, "CrsGraph::allocateIndices "
1220 "threw an exception not a subclass of std::exception.");
1221 }
1222 }
1223
1224 // Allocate matrix values.
1225 // "Static profile" means that the number of matrix entries in
1226 // each row was fixed at the time the CrsMatrix constructor was
1227 // called. This lets us use 1-D storage for the matrix's
1228 // values. ("1-D storage" means the same as that used by the
1229 // three arrays in the compressed sparse row storage format.)
1230
1231 if (debug) {
1232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1233 (this->staticGraph_.is_null (), std::logic_error,
1234 "this->getProfileType() == StaticProfile, but staticGraph_ "
1235 "is null." << suffix);
1237
1238 const size_t lclNumRows = this->staticGraph_->getNodeNumRows ();
1239 typename Graph::local_graph_device_type::row_map_type k_ptrs =
1240 this->staticGraph_->rowPtrsUnpacked_dev_;
1241 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1242 (k_ptrs.extent (0) != lclNumRows+1, std::logic_error,
1243 "With StaticProfile, row offsets array has length "
1244 << k_ptrs.extent (0) << " != (lclNumRows+1) = "
1245 << (lclNumRows+1) << ".");
1246
1247 const size_t lclTotalNumEntries =
1248 this->staticGraph_->rowPtrsUnpacked_host_(lclNumRows);
1249
1250 // Allocate array of (packed???) matrix values.
1251 using values_type = typename local_matrix_device_type::values_type;
1252 if (verbose) {
1253 std::ostringstream os;
1254 os << *prefix << "Allocate values_wdv: Pre "
1255 << valuesUnpacked_wdv.extent(0) << ", post "
1256 << lclTotalNumEntries << endl;
1257 std::cerr << os.str();
1258 }
1259// this->k_values1D_ =
1260 valuesUnpacked_wdv = values_wdv_type(
1261 values_type("Tpetra::CrsMatrix::values",
1262 lclTotalNumEntries));
1263 }
1264
1265 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1266 void
1267 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1268 getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
1269 Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
1270 Teuchos::ArrayRCP<const Scalar>& values) const
1271 {
1272 using Teuchos::RCP;
1273 const char tfecfFuncName[] = "getAllValues: ";
1274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1275 columnIndices.size () != values.size (), std::runtime_error,
1276 "Requires that columnIndices and values are the same size.");
1277
1278 RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
1279 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1280 relevantGraph.is_null (), std::runtime_error,
1281 "Requires that getCrsGraph() is not null.");
1282 try {
1283 rowPointers = relevantGraph->getNodeRowPtrs ();
1284 }
1285 catch (std::exception &e) {
1286 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1287 true, std::runtime_error,
1288 "Caught exception while calling graph->getNodeRowPtrs(): "
1289 << e.what ());
1290 }
1291 try {
1292 columnIndices = relevantGraph->getNodePackedIndices ();
1293 }
1294 catch (std::exception &e) {
1295 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1296 true, std::runtime_error,
1297 "Caught exception while calling graph->getNodePackedIndices(): "
1298 << e.what ());
1299 }
1300 Teuchos::ArrayRCP<const impl_scalar_type> vals =
1301// Kokkos::Compat::persistingView (k_values1D_);
1302 Kokkos::Compat::persistingView (valuesUnpacked_wdv.getHostView(Access::ReadOnly));
1303 values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
1304 }
1305
1306 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1307 void
1309 getAllValues(Teuchos::ArrayRCP<Scalar>& values) {
1310 using Teuchos::RCP;
1311 const char tfecfFuncName[] = "getAllValues: ";
1312 RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
1313 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1314 relevantGraph.is_null (), std::runtime_error,
1315 "Requires that getCrsGraph() is not null.");
1316 Teuchos::ArrayRCP<impl_scalar_type> vals =
1317 Kokkos::Compat::persistingView (k_values1D_);
1318 values = Teuchos::arcp_reinterpret_cast<Scalar> (vals);
1319 }
1320
1321
1322 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1323 void
1325 fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1326 {
1328 using ::Tpetra::Details::getEntryOnHost;
1329 using Teuchos::arcp_const_cast;
1330 using Teuchos::Array;
1331 using Teuchos::ArrayRCP;
1332 using Teuchos::null;
1333 using Teuchos::RCP;
1334 using Teuchos::rcp;
1335 using std::endl;
1336 using row_map_type = typename local_graph_device_type::row_map_type;
1337 using lclinds_1d_type = typename Graph::local_graph_device_type::entries_type::non_const_type;
1338 using values_type = typename local_matrix_device_type::values_type;
1339 Details::ProfilingRegion regionFLGAM
1340 ("Tpetra::CrsGraph::fillLocalGraphAndMatrix");
1341
1342 const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
1343 "fillComplete or expertStaticFillComplete): ";
1344 const char suffix[] =
1345 " Please report this bug to the Tpetra developers.";
1346 const bool debug = Details::Behavior::debug("CrsMatrix");
1347 const bool verbose = Details::Behavior::verbose("CrsMatrix");
1348
1349 std::unique_ptr<std::string> prefix;
1350 if (verbose) {
1351 prefix = this->createPrefix("CrsMatrix", "fillLocalGraphAndMatrix");
1352 std::ostringstream os;
1353 os << *prefix << endl;
1354 std::cerr << os.str ();
1355 }
1356
1357 if (debug) {
1358 // fillComplete() only calls fillLocalGraphAndMatrix() if the
1359 // matrix owns the graph, which means myGraph_ is not null.
1360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1361 (myGraph_.is_null (), std::logic_error, "The nonconst graph "
1362 "(myGraph_) is null. This means that the matrix has a "
1363 "const (a.k.a. \"static\") graph. fillComplete or "
1364 "expertStaticFillComplete should never call "
1365 "fillLocalGraphAndMatrix in that case." << suffix);
1366 }
1367
1368 const size_t lclNumRows = this->getNodeNumRows ();
1369
1370 // This method's goal is to fill in the three arrays (compressed
1371 // sparse row format) that define the sparse graph's and matrix's
1372 // structure, and the sparse matrix's values.
1373 //
1374 // Get references to the data in myGraph_, so we can modify them
1375 // as well. Note that we only call fillLocalGraphAndMatrix() if
1376 // the matrix owns the graph, which means myGraph_ is not null.
1377
1378 typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
1379
1380 // StaticProfile means that the matrix's column indices and
1381 // values are currently stored in a 1-D format, with row offsets
1382 // in rowPtrsUnpacked_ and local column indices in lclIndsUnpacked_wdv.
1383
1384 // StaticProfile also means that the graph's array of row
1385 // offsets must already be allocated.
1386 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1387 myGraph_->rowPtrsUnpacked_dev_;
1388
1389 if (debug) {
1390 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1391 (curRowOffsets.extent (0) == 0, std::logic_error,
1392 "(StaticProfile branch) curRowOffsets.extent(0) == 0.");
1393 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1394 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1395 "(StaticProfile branch) curRowOffsets.extent(0) = "
1396 << curRowOffsets.extent (0) << " != lclNumRows + 1 = "
1397 << (lclNumRows + 1) << ".");
1398 const size_t numOffsets = curRowOffsets.extent (0);
1399 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1400 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1401 (numOffsets != 0 &&
1402 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1403 std::logic_error, "(StaticProfile branch) numOffsets = " <<
1404 numOffsets << " != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1405 << myGraph_->lclIndsUnpacked_wdv.extent (0) << " != curRowOffsets("
1406 << numOffsets << ") = " << valToCheck << ".");
1407 }
1408
1409 if (myGraph_->getNodeNumEntries() !=
1410 myGraph_->getNodeAllocationSize()) {
1411
1412 // Use the nonconst version of row_map_type for k_ptrs,
1413 // because row_map_type is const and we need to modify k_ptrs here.
1414 typename row_map_type::non_const_type k_ptrs;
1415 row_map_type k_ptrs_const;
1416 lclinds_1d_type k_inds;
1417 values_type k_vals;
1419 if (verbose) {
1420 std::ostringstream os;
1421 const auto numEnt = myGraph_->getNodeNumEntries();
1422 const auto allocSize = myGraph_->getNodeAllocationSize();
1423 os << *prefix << "Unpacked 1-D storage: numEnt=" << numEnt
1424 << ", allocSize=" << allocSize << endl;
1425 std::cerr << os.str ();
1426 }
1427 // The matrix's current 1-D storage is "unpacked." This means
1428 // the row offsets may differ from what the final row offsets
1429 // should be. This could happen, for example, if the user
1430 // specified StaticProfile in the constructor and set an upper
1431 // bound on the number of entries per row, but didn't fill all
1432 // those entries.
1433 if (debug && curRowOffsets.extent (0) != 0) {
1434 const size_t numOffsets =
1435 static_cast<size_t> (curRowOffsets.extent (0));
1436 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1438 (static_cast<size_t> (valToCheck) !=
1439 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1440 std::logic_error, "(StaticProfile unpacked branch) Before "
1441 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1442 << ") = " << valToCheck << " != valuesUnpacked_wdv.extent(0)"
1443 " = " << valuesUnpacked_wdv.extent (0) << ".");
1444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1445 (static_cast<size_t> (valToCheck) !=
1446 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1447 std::logic_error, "(StaticProfile unpacked branch) Before "
1448 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1449 << ") = " << valToCheck
1450 << " != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1451 << myGraph_->lclIndsUnpacked_wdv.extent (0) << ".");
1452 }
1453 // Pack the row offsets into k_ptrs, by doing a sum-scan of
1454 // the array of valid entry counts per row.
1455
1456 // Total number of entries in the matrix on the calling
1457 // process. We will compute this in the loop below. It's
1458 // cheap to compute and useful as a sanity check.
1459 size_t lclTotalNumEntries = 0;
1460 {
1461 // Allocate the packed row offsets array. We use a nonconst
1462 // temporary (packedRowOffsets) here, because k_ptrs is
1463 // const. We will assign packedRowOffsets to k_ptrs below.
1464 if (verbose) {
1465 std::ostringstream os;
1466 os << *prefix << "Allocate packed row offsets: "
1467 << (lclNumRows+1) << endl;
1468 std::cerr << os.str ();
1469 }
1470 typename row_map_type::non_const_type
1471 packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1472 typename row_entries_type::const_type numRowEnt_h =
1473 myGraph_->k_numRowEntries_;
1474 // We're computing offsets on device. This function can
1475 // handle numRowEnt_h being a host View.
1476 lclTotalNumEntries =
1477 computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1478 // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1479 // to use packedRowOffsets in the loop above and assign here.
1480 k_ptrs = packedRowOffsets;
1481 k_ptrs_const = k_ptrs;
1482 }
1483
1484 if (debug) {
1485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1486 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1487 std::logic_error,
1488 "(StaticProfile unpacked branch) After packing k_ptrs, "
1489 "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1490 "lclNumRows+1 = " << (lclNumRows+1) << ".");
1491 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1492 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1493 (valToCheck != lclTotalNumEntries, std::logic_error,
1494 "(StaticProfile unpacked branch) After filling k_ptrs, "
1495 "k_ptrs(lclNumRows=" << lclNumRows << ") = " << valToCheck
1496 << " != total number of entries on the calling process = "
1497 << lclTotalNumEntries << ".");
1498 }
1499
1500 // Allocate the arrays of packed column indices and values.
1501 if (verbose) {
1502 std::ostringstream os;
1503 os << *prefix << "Allocate packed local column indices: "
1504 << lclTotalNumEntries << endl;
1505 std::cerr << os.str ();
1506 }
1507 k_inds = lclinds_1d_type ("Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1508 if (verbose) {
1509 std::ostringstream os;
1510 os << *prefix << "Allocate packed values: "
1511 << lclTotalNumEntries << endl;
1512 std::cerr << os.str ();
1513 }
1514 k_vals = values_type ("Tpetra::CrsMatrix::values", lclTotalNumEntries);
1515
1516 // curRowOffsets (myGraph_->rowPtrsUnpacked_) (???), lclIndsUnpacked_wdv,
1517 // and valuesUnpacked_wdv are currently unpacked. Pack them, using
1518 // the packed row offsets array k_ptrs that we created above.
1519 //
1520 // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1521 // need to keep around the unpacked row offsets, column
1522 // indices, and values arrays.
1523
1524 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
1525 // packed k_inds. We will replace lclIndsUnpacked_wdv below.
1526 using inds_packer_type = pack_functor<
1527 typename Graph::local_graph_device_type::entries_type::non_const_type,
1528 typename Graph::local_inds_dualv_type::t_dev::const_type,
1529 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1530 typename Graph::local_graph_device_type::row_map_type>;
1531 inds_packer_type indsPacker (
1532 k_inds,
1533 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1534 k_ptrs, curRowOffsets);
1535 using exec_space = typename decltype (k_inds)::execution_space;
1536 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1537 Kokkos::parallel_for
1538 ("Tpetra::CrsMatrix pack column indices",
1539 range_type (0, lclNumRows), indsPacker);
1540
1541 // Pack the values from unpacked valuesUnpacked_wdv into packed
1542 // k_vals. We will replace valuesPacked_wdv below.
1543 using vals_packer_type = pack_functor<
1544 typename values_type::non_const_type,
1545 typename values_type::const_type,
1546 typename row_map_type::non_const_type,
1547 typename row_map_type::const_type>;
1548 vals_packer_type valsPacker (
1549 k_vals,
1550 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1551 k_ptrs, curRowOffsets);
1552 Kokkos::parallel_for ("Tpetra::CrsMatrix pack values",
1553 range_type (0, lclNumRows), valsPacker);
1554
1555 if (debug) {
1556 const char myPrefix[] = "(StaticProfile \"Optimize Storage\""
1557 "=true branch) After packing, ";
1558 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1559 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1560 << "k_ptrs.extent(0) = 0. This probably means that "
1561 "rowPtrsUnpacked_ was never allocated.");
1562 if (k_ptrs.extent (0) != 0) {
1563 const size_t numOffsets (k_ptrs.extent (0));
1564 const auto valToCheck =
1565 getEntryOnHost (k_ptrs, numOffsets - 1);
1566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1567 (size_t (valToCheck) != k_vals.extent (0),
1568 std::logic_error, myPrefix <<
1569 "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1570 " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1572 (size_t (valToCheck) != k_inds.extent (0),
1573 std::logic_error, myPrefix <<
1574 "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1575 " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1576 }
1577 }
1578 // Build the local graph.
1579 myGraph_->setRowPtrsPacked(k_ptrs_const);
1580 myGraph_->lclIndsPacked_wdv =
1581 typename crs_graph_type::local_inds_wdv_type(k_inds);
1582 valuesPacked_wdv = values_wdv_type(k_vals);
1583 }
1584 else { // We don't have to pack, so just set the pointers.
1585 myGraph_->setRowPtrsPacked(myGraph_->rowPtrsUnpacked_dev_);
1586 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1587 valuesPacked_wdv = valuesUnpacked_wdv;
1588
1589 if (verbose) {
1590 std::ostringstream os;
1591 os << *prefix << "Storage already packed: rowPtrsUnpacked_: "
1592 << myGraph_->rowPtrsUnpacked_host_.extent(0) << ", lclIndsUnpacked_wdv: "
1593 << myGraph_->lclIndsUnpacked_wdv.extent(0) << ", valuesUnpacked_wdv: "
1594 << valuesUnpacked_wdv.extent(0) << endl;
1595 std::cerr << os.str();
1596 }
1597
1598 if (debug) {
1599 const char myPrefix[] =
1600 "(StaticProfile \"Optimize Storage\"=false branch) ";
1601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1602 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1603 << "myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1604 "that rowPtrsUnpacked_ was never allocated.");
1605 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1606 const size_t numOffsets (myGraph_->rowPtrsUnpacked_host_.extent (0));
1607 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1608 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1609 (size_t (valToCheck) != valuesPacked_wdv.extent (0),
1610 std::logic_error, myPrefix <<
1611 "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1612 << " != valuesPacked_wdv.extent(0) = "
1613 << valuesPacked_wdv.extent (0) << ".");
1614 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1615 (size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1616 std::logic_error, myPrefix <<
1617 "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1618 << " != myGraph_->lclIndsPacked.extent(0) = "
1619 << myGraph_->lclIndsPacked_wdv.extent (0) << ".");
1620 }
1621 }
1622 }
1623
1624 if (debug) {
1625 const char myPrefix[] = "After packing, ";
1626 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1627 (size_t (myGraph_->rowPtrsUnpacked_host_.extent (0)) != size_t (lclNumRows + 1),
1628 std::logic_error, myPrefix << "myGraph_->rowPtrsUnpacked_host_.extent(0) = "
1629 << myGraph_->rowPtrsUnpacked_host_.extent (0) << " != lclNumRows+1 = " <<
1630 (lclNumRows+1) << ".");
1631 if (myGraph_->rowPtrsUnpacked_host_.extent (0) != 0) {
1632 const size_t numOffsets (myGraph_->rowPtrsUnpacked_host_.extent (0));
1633 const size_t valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets-1);
1634 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1635 (valToCheck != size_t (valuesPacked_wdv.extent (0)),
1636 std::logic_error, myPrefix << "k_ptrs_const(" <<
1637 (numOffsets-1) << ") = " << valToCheck
1638 << " != valuesPacked_wdv.extent(0) = "
1639 << valuesPacked_wdv.extent (0) << ".");
1640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1641 (valToCheck != size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1642 std::logic_error, myPrefix << "k_ptrs_const(" <<
1643 (numOffsets-1) << ") = " << valToCheck
1644 << " != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1645 << myGraph_->lclIndsPacked_wdv.extent (0) << ".");
1646 }
1647 }
1648
1649 // May we ditch the old allocations for the packed (and otherwise
1650 // "optimized") allocations, later in this routine? Optimize
1651 // storage if the graph is not static, or if the graph already has
1652 // optimized storage.
1653 const bool defaultOptStorage =
1654 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1655 const bool requestOptimizedStorage =
1656 (! params.is_null () &&
1657 params->get ("Optimize Storage", defaultOptStorage)) ||
1658 (params.is_null () && defaultOptStorage);
1659
1660 // The graph has optimized storage when indices are allocated,
1661 // myGraph_->k_numRowEntries_ is empty, and there are more than
1662 // zero rows on this process. It's impossible for the graph to
1663 // have dynamic profile (getProfileType() == DynamicProfile) and
1664 // be optimized (isStorageOptimized()).
1665 if (requestOptimizedStorage) {
1666 // Free the old, unpacked, unoptimized allocations.
1667 // Change the graph from dynamic to static allocation profile
1668
1669 // Free graph data structures that are only needed for
1670 // unpacked 1-D storage.
1671 if (verbose) {
1672 std::ostringstream os;
1673 os << *prefix << "Optimizing storage: free k_numRowEntries_: "
1674 << myGraph_->k_numRowEntries_.extent(0) << endl;
1675 std::cerr << os.str();
1676 }
1677
1678 myGraph_->k_numRowEntries_ = row_entries_type ();
1679
1680 // Keep the new 1-D packed allocations.
1681 myGraph_->setRowPtrsUnpacked(myGraph_->rowPtrsPacked_dev_);
1682 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1683 valuesUnpacked_wdv = valuesPacked_wdv;
1684// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
1685
1686 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1687 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1688 }
1689 else {
1690 if (verbose) {
1691 std::ostringstream os;
1692 os << *prefix << "User requested NOT to optimize storage"
1693 << endl;
1694 std::cerr << os.str();
1695 }
1696 }
1697 }
1698
1699 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1700 void
1702 fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1703 {
1704 using ::Tpetra::Details::ProfilingRegion;
1705 using Teuchos::ArrayRCP;
1706 using Teuchos::Array;
1707 using Teuchos::null;
1708 using Teuchos::RCP;
1709 using Teuchos::rcp;
1710 using std::endl;
1711 using row_map_type = typename Graph::local_graph_device_type::row_map_type;
1712 using non_const_row_map_type = typename row_map_type::non_const_type;
1713 using values_type = typename local_matrix_device_type::values_type;
1714 ProfilingRegion regionFLM("Tpetra::CrsMatrix::fillLocalMatrix");
1715 const size_t lclNumRows = getNodeNumRows();
1716
1717 const bool verbose = Details::Behavior::verbose("CrsMatrix");
1718 std::unique_ptr<std::string> prefix;
1719 if (verbose) {
1720 prefix = this->createPrefix("CrsMatrix", "fillLocalMatrix");
1721 std::ostringstream os;
1722 os << *prefix << "lclNumRows: " << lclNumRows << endl;
1723 std::cerr << os.str ();
1724 }
1725
1726 // The goals of this routine are first, to allocate and fill
1727 // packed 1-D storage (see below for an explanation) in the vals
1728 // array, and second, to give vals to the local matrix and
1729 // finalize the local matrix. We only need k_ptrs, the packed 1-D
1730 // row offsets, within the scope of this routine, since we're only
1731 // filling the local matrix here (use fillLocalGraphAndMatrix() to
1732 // fill both the graph and the matrix at the same time).
1733
1734 // get data from staticGraph_
1735 size_t nodeNumEntries = staticGraph_->getNodeNumEntries ();
1736 size_t nodeNumAllocated = staticGraph_->getNodeAllocationSize ();
1737 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1738
1739 row_map_type k_ptrs; // "packed" row offsets array
1740 values_type k_vals; // "packed" values array
1741
1742 // May we ditch the old allocations for the packed (and otherwise
1743 // "optimized") allocations, later in this routine? Request
1744 // optimized storage by default.
1745 bool requestOptimizedStorage = true;
1746 const bool default_OptimizeStorage =
1747 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1748 if (! params.is_null() &&
1749 ! params->get("Optimize Storage", default_OptimizeStorage)) {
1750 requestOptimizedStorage = false;
1751 }
1752 // If we're not allowed to change a static graph, then we can't
1753 // change the storage of the matrix, either. This means that if
1754 // the graph's storage isn't already optimized, we can't optimize
1755 // the matrix's storage either. Check and give warning, as
1756 // appropriate.
1757 if (! staticGraph_->isStorageOptimized () &&
1758 requestOptimizedStorage) {
1760 (true, std::runtime_error, "You requested optimized storage "
1761 "by setting the \"Optimize Storage\" flag to \"true\" in "
1762 "the ParameterList, or by virtue of default behavior. "
1763 "However, the associated CrsGraph was filled separately and "
1764 "requested not to optimize storage. Therefore, the "
1765 "CrsMatrix cannot optimize storage.");
1766 requestOptimizedStorage = false;
1767 }
1768
1769 using row_entries_type = decltype (staticGraph_->k_numRowEntries_);
1770
1771 // StaticProfile means that the matrix's values are currently
1772 // stored in a 1-D format. However, this format is "unpacked";
1773 // it doesn't necessarily have the same row offsets as indicated
1774 // by the ptrs array returned by allocRowPtrs. This could
1775 // happen, for example, if the user specified StaticProfile in
1776 // the constructor and fixed the number of matrix entries in
1777 // each row, but didn't fill all those entries.
1778 //
1779 // As above, we don't need to keep the "packed" row offsets
1780 // array ptrs here, but we do need it here temporarily, so we
1781 // have to allocate it. We'll free ptrs later in this method.
1782 //
1783 // Note that this routine checks whether storage has already
1784 // been packed. This is a common case for solution of nonlinear
1785 // PDEs using the finite element method, as long as the
1786 // structure of the sparse matrix does not change between linear
1787 // solves.
1788 if (nodeNumEntries != nodeNumAllocated) {
1789 if (verbose) {
1790 std::ostringstream os;
1791 os << *prefix << "Unpacked 1-D storage: numEnt="
1792 << nodeNumEntries << ", allocSize=" << nodeNumAllocated
1793 << endl;
1794 std::cerr << os.str();
1795 }
1796 // We have to pack the 1-D storage, since the user didn't fill
1797 // up all requested storage.
1798 if (verbose) {
1799 std::ostringstream os;
1800 os << *prefix << "Allocate packed row offsets: "
1801 << (lclNumRows+1) << endl;
1802 std::cerr << os.str();
1803 }
1804 non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1805 lclNumRows+1);
1806 // Total number of entries in the matrix on the calling
1807 // process. We will compute this in the loop below. It's
1808 // cheap to compute and useful as a sanity check.
1809 size_t lclTotalNumEntries = 0;
1810 k_ptrs = tmpk_ptrs;
1811 {
1812 typename row_entries_type::const_type numRowEnt_h =
1813 staticGraph_->k_numRowEntries_;
1814 // This function can handle the counts being a host View.
1815 lclTotalNumEntries =
1816 Details::computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_h);
1817 }
1818
1819 // Allocate the "packed" values array.
1820 // It has exactly the right number of entries.
1821 if (verbose) {
1822 std::ostringstream os;
1823 os << *prefix << "Allocate packed values: "
1824 << lclTotalNumEntries << endl;
1825 std::cerr << os.str ();
1826 }
1827 k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1829 // Pack values_wdv into k_vals. We will replace values_wdv below.
1830 pack_functor<
1831 typename values_type::non_const_type,
1832 typename values_type::const_type,
1833 typename row_map_type::non_const_type,
1834 typename row_map_type::const_type> valsPacker
1835 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1836 tmpk_ptrs, k_rowPtrs);
1837
1838 using exec_space = typename decltype (k_vals)::execution_space;
1839 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1840 Kokkos::parallel_for ("Tpetra::CrsMatrix pack values",
1841 range_type (0, lclNumRows), valsPacker);
1842 valuesPacked_wdv = values_wdv_type(k_vals);
1843 }
1844 else { // We don't have to pack, so just set the pointer.
1845 valuesPacked_wdv = valuesUnpacked_wdv;
1846 if (verbose) {
1847 std::ostringstream os;
1848 os << *prefix << "Storage already packed: "
1849 << "valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1850 std::cerr << os.str();
1851 }
1852 }
1853
1854 // May we ditch the old allocations for the packed one?
1855 if (requestOptimizedStorage) {
1856 // The user requested optimized storage, so we can dump the
1857 // unpacked 1-D storage, and keep the packed storage.
1858 valuesUnpacked_wdv = valuesPacked_wdv;
1859// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
1860 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1861 }
1862 }
1863
1864 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1865 void
1866 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1867 insertIndicesAndValues (crs_graph_type& graph,
1868 RowInfo& rowInfo,
1869 const typename crs_graph_type::SLocalGlobalViews& newInds,
1870 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1871 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1872 const ELocalGlobal lg,
1873 const ELocalGlobal I)
1874 {
1875 const size_t oldNumEnt = rowInfo.numEntries;
1876 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1877
1878 // Use of memcpy here works around an issue with GCC >= 4.9.0,
1879 // that probably relates to scalar_type vs. impl_scalar_type
1880 // aliasing. See history of Tpetra_CrsGraph_def.hpp for
1881 // details; look for GCC_WORKAROUND macro definition.
1882 if (numInserted > 0) {
1883 const size_t startOffset = oldNumEnt;
1884 memcpy (&oldRowVals[startOffset], &newRowVals[0],
1885 numInserted * sizeof (impl_scalar_type));
1886 }
1887 }
1888
1889 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1890 void
1892 insertLocalValues (const LocalOrdinal lclRow,
1893 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1894 const Teuchos::ArrayView<const Scalar>& values)
1895 {
1896 using std::endl;
1897 const char tfecfFuncName[] = "insertLocalValues: ";
1898
1899 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1900 (! this->isFillActive (), std::runtime_error,
1901 "Fill is not active. After calling fillComplete, you must call "
1902 "resumeFill before you may insert entries into the matrix again.");
1903 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1904 (this->isStaticGraph (), std::runtime_error,
1905 "Cannot insert indices with static graph; use replaceLocalValues() "
1906 "instead.");
1907 // At this point, we know that myGraph_ is nonnull.
1908 crs_graph_type& graph = * (this->myGraph_);
1909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1910 (graph.colMap_.is_null (), std::runtime_error,
1911 "Cannot insert local indices without a column map.");
1912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1913 (graph.isGloballyIndexed (),
1914 std::runtime_error, "Graph indices are global; use "
1915 "insertGlobalValues().");
1916 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1917 (values.size () != indices.size (), std::runtime_error,
1918 "values.size() = " << values.size ()
1919 << " != indices.size() = " << indices.size () << ".");
1920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1921 ! graph.rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1922 "Local row index " << lclRow << " does not belong to this process.");
1923
1924 if (! graph.indicesAreAllocated ()) {
1925 // We only allocate values at most once per process, so it's OK
1926 // to check TPETRA_VERBOSE here.
1927 const bool verbose = Details::Behavior::verbose("CrsMatrix");
1928 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1929 }
1930
1931#ifdef HAVE_TPETRA_DEBUG
1932 const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1933 // In a debug build, test whether any of the given column indices
1934 // are not in the column Map. Keep track of the invalid column
1935 // indices so we can tell the user about them.
1936 {
1937 using Teuchos::toString;
1938
1939 const map_type& colMap = * (graph.colMap_);
1940 Teuchos::Array<LocalOrdinal> badColInds;
1941 bool allInColMap = true;
1942 for (size_t k = 0; k < numEntriesToAdd; ++k) {
1943 if (! colMap.isNodeLocalElement (indices[k])) {
1944 allInColMap = false;
1945 badColInds.push_back (indices[k]);
1946 }
1947 }
1948 if (! allInColMap) {
1949 std::ostringstream os;
1950 os << "You attempted to insert entries in owned row " << lclRow
1951 << ", at the following column indices: " << toString (indices)
1952 << "." << endl;
1953 os << "Of those, the following indices are not in the column Map on "
1954 "this process: " << toString (badColInds) << "." << endl << "Since "
1955 "the matrix has a column Map already, it is invalid to insert "
1956 "entries at those locations.";
1957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958 (true, std::invalid_argument, os.str ());
1959 }
1960 }
1961#endif // HAVE_TPETRA_DEBUG
1962
1963 RowInfo rowInfo = graph.getRowInfo (lclRow);
1964
1965 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1966 auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset) {
1967 valsView[offset] += values[k]; };
1968 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1969 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1970 }
1971
1972 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1973 void
1975 insertLocalValues (const LocalOrdinal localRow,
1976 const LocalOrdinal numEnt,
1977 const Scalar vals[],
1978 const LocalOrdinal cols[])
1979 {
1980 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1981 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1982 this->insertLocalValues (localRow, colsT, valsT);
1983 }
1984
1985 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1986 void
1989 RowInfo& rowInfo,
1990 const GlobalOrdinal gblColInds[],
1991 const impl_scalar_type vals[],
1992 const size_t numInputEnt)
1993 {
1994#ifdef HAVE_TPETRA_DEBUG
1995 const char tfecfFuncName[] = "insertGlobalValuesImpl: ";
1996 const size_t origNumEnt = graph.getNumEntriesInLocalRow (rowInfo.localRow);
1997 const size_t curNumEnt = rowInfo.numEntries;
1998#endif // HAVE_TPETRA_DEBUG
1999
2000 if (! graph.indicesAreAllocated ()) {
2001 // We only allocate values at most once per process, so it's OK
2002 // to check TPETRA_VERBOSE here.
2003 using ::Tpetra::Details::Behavior;
2004 const bool verbose = Behavior::verbose("CrsMatrix");
2005 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
2006 // mfh 23 Jul 2017: allocateValues invalidates existing
2007 // getRowInfo results. Once we get rid of lazy graph
2008 // allocation, we'll be able to move the getRowInfo call outside
2009 // of this method.
2010 rowInfo = graph.getRowInfo (rowInfo.localRow);
2011 }
2013 auto valsView = this->getValuesViewHostNonConst(rowInfo);
2014 auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset){
2015 valsView[offset] += vals[k];
2016 };
2017 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2018#ifdef HAVE_TPETRA_DEBUG
2019 //numInserted is only used inside the debug code below.
2020 auto numInserted =
2021#endif
2022 graph.insertGlobalIndicesImpl(rowInfo, gblColInds, numInputEnt, cb);
2023
2024#ifdef HAVE_TPETRA_DEBUG
2025 size_t newNumEnt = curNumEnt + numInserted;
2026 const size_t chkNewNumEnt =
2027 graph.getNumEntriesInLocalRow (rowInfo.localRow);
2028 if (chkNewNumEnt != newNumEnt) {
2029 std::ostringstream os;
2030 os << std::endl << "newNumEnt = " << newNumEnt
2031 << " != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
2032 << ") = " << chkNewNumEnt << "." << std::endl
2033 << "\torigNumEnt: " << origNumEnt << std::endl
2034 << "\tnumInputEnt: " << numInputEnt << std::endl
2035 << "\tgblColInds: [";
2036 for (size_t k = 0; k < numInputEnt; ++k) {
2037 os << gblColInds[k];
2038 if (k + size_t (1) < numInputEnt) {
2039 os << ",";
2040 }
2042 os << "]" << std::endl
2043 << "\tvals: [";
2044 for (size_t k = 0; k < numInputEnt; ++k) {
2045 os << vals[k];
2046 if (k + size_t (1) < numInputEnt) {
2047 os << ",";
2048 }
2049 }
2050 os << "]" << std::endl;
2051
2052 if (this->supportsRowViews ()) {
2053 values_host_view_type vals2;
2054 if (this->isGloballyIndexed ()) {
2055 global_inds_host_view_type gblColInds2;
2056 const GlobalOrdinal gblRow =
2057 graph.rowMap_->getGlobalElement (rowInfo.localRow);
2058 if (gblRow ==
2059 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
2060 os << "Local row index " << rowInfo.localRow << " is invalid!"
2061 << std::endl;
2062 }
2063 else {
2064 bool getViewThrew = false;
2065 try {
2066 this->getGlobalRowView (gblRow, gblColInds2, vals2);
2067 }
2068 catch (std::exception& e) {
2069 getViewThrew = true;
2070 os << "getGlobalRowView threw exception:" << std::endl
2071 << e.what () << std::endl;
2072 }
2073 if (! getViewThrew) {
2074 os << "\tNew global column indices: ";
2075 for (size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
2076 os << gblColInds2[jjj] << " ";
2077 os << std::endl;
2078 os << "\tNew values: ";
2079 for (size_t jjj = 0; jjj < vals2.extent(0); jjj++)
2080 os << vals2[jjj] << " ";
2081 os << std::endl;
2082 }
2083 }
2084 }
2085 else if (this->isLocallyIndexed ()) {
2086 local_inds_host_view_type lclColInds2;
2087 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
2088 os << "\tNew local column indices: ";
2089 for (size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
2090 os << lclColInds2[jjj] << " ";
2091 os << std::endl;
2092 os << "\tNew values: ";
2093 for (size_t jjj = 0; jjj < vals2.extent(0); jjj++)
2094 os << vals2[jjj] << " ";
2095 os << std::endl;
2096 }
2097 }
2098
2099 os << "Please report this bug to the Tpetra developers.";
2100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2101 (true, std::logic_error, os.str ());
2102 }
2103#endif // HAVE_TPETRA_DEBUG
2104 }
2105
2106 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2107 void
2109 insertGlobalValues (const GlobalOrdinal gblRow,
2110 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2111 const Teuchos::ArrayView<const Scalar>& values)
2112 {
2113 using Teuchos::toString;
2114 using std::endl;
2115 typedef impl_scalar_type IST;
2116 typedef LocalOrdinal LO;
2117 typedef GlobalOrdinal GO;
2118 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2119 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2120 const char tfecfFuncName[] = "insertGlobalValues: ";
2121
2122#ifdef HAVE_TPETRA_DEBUG
2123 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2124 (values.size () != indices.size (), std::runtime_error,
2125 "values.size() = " << values.size () << " != indices.size() = "
2126 << indices.size () << ".");
2127#endif // HAVE_TPETRA_DEBUG
2128
2129 // getRowMap() is not thread safe, because it increments RCP's
2130 // reference count. getCrsGraphRef() is thread safe.
2131 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2132 const LO lclRow = rowMap.getLocalElement (gblRow);
2133
2134 if (lclRow == OTLO::invalid ()) {
2135 // Input row is _not_ owned by the calling process.
2136 //
2137 // See a note (now deleted) from mfh 14 Dec 2012: If input row
2138 // is not in the row Map, it doesn't matter whether or not the
2139 // graph is static; the data just get stashed for later use by
2140 // globalAssemble().
2141 this->insertNonownedGlobalValues (gblRow, indices, values);
2142 }
2143 else { // Input row _is_ owned by the calling process
2144 if (this->isStaticGraph ()) {
2145 // Uh oh! Not allowed to insert into owned rows in that case.
2146 const int myRank = rowMap.getComm ()->getRank ();
2147 const int numProcs = rowMap.getComm ()->getSize ();
2148 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2149 (true, std::runtime_error,
2150 "The matrix was constructed with a constant (\"static\") graph, "
2151 "yet the given global row index " << gblRow << " is in the row "
2152 "Map on the calling process (with rank " << myRank << ", of " <<
2153 numProcs << " process(es)). In this case, you may not insert "
2154 "new entries into rows owned by the calling process.");
2155 }
2156
2157 crs_graph_type& graph = * (this->myGraph_);
2158 const IST* const inputVals =
2159 reinterpret_cast<const IST*> (values.getRawPtr ());
2160 const GO* const inputGblColInds = indices.getRawPtr ();
2161 const size_t numInputEnt = indices.size ();
2162 RowInfo rowInfo = graph.getRowInfo (lclRow);
2163
2164 // If the matrix has a column Map, check at this point whether
2165 // the column indices belong to the column Map.
2166 //
2167 // FIXME (mfh 16 May 2013) We may want to consider deferring the
2168 // test to the CrsGraph method, since it may have to do this
2169 // anyway.
2170 if (! graph.colMap_.is_null ()) {
2171 const map_type& colMap = * (graph.colMap_);
2172 // In a debug build, keep track of the nonowned ("bad") column
2173 // indices, so that we can display them in the exception
2174 // message. In a release build, just ditch the loop early if
2175 // we encounter a nonowned column index.
2176#ifdef HAVE_TPETRA_DEBUG
2177 Teuchos::Array<GO> badColInds;
2178#endif // HAVE_TPETRA_DEBUG
2179 const size_type numEntriesToInsert = indices.size ();
2180 bool allInColMap = true;
2181 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2182 if (! colMap.isNodeGlobalElement (indices[k])) {
2183 allInColMap = false;
2184#ifdef HAVE_TPETRA_DEBUG
2185 badColInds.push_back (indices[k]);
2186#else
2187 break;
2188#endif // HAVE_TPETRA_DEBUG
2189 }
2190 }
2191 if (! allInColMap) {
2192 std::ostringstream os;
2193 os << "You attempted to insert entries in owned row " << gblRow
2194 << ", at the following column indices: " << toString (indices)
2195 << "." << endl;
2196#ifdef HAVE_TPETRA_DEBUG
2197 os << "Of those, the following indices are not in the column Map "
2198 "on this process: " << toString (badColInds) << "." << endl
2199 << "Since the matrix has a column Map already, it is invalid "
2200 "to insert entries at those locations.";
2201#else
2202 os << "At least one of those indices is not in the column Map "
2203 "on this process." << endl << "It is invalid to insert into "
2204 "columns not in the column Map on the process that owns the "
2205 "row.";
2206#endif // HAVE_TPETRA_DEBUG
2207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2208 (true, std::invalid_argument, os.str ());
2210 }
2211
2212 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2213 inputVals, numInputEnt);
2214 }
2215 }
2216
2217
2218 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2219 void
2221 insertGlobalValues (const GlobalOrdinal globalRow,
2222 const LocalOrdinal numEnt,
2223 const Scalar vals[],
2224 const GlobalOrdinal inds[])
2225 {
2226 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2227 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2228 this->insertGlobalValues (globalRow, indsT, valsT);
2229 }
2230
2231
2232 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2233 void
2236 const GlobalOrdinal gblRow,
2237 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2238 const Teuchos::ArrayView<const Scalar>& values,
2239 const bool debug)
2241 typedef impl_scalar_type IST;
2242 typedef LocalOrdinal LO;
2243 typedef GlobalOrdinal GO;
2244 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2245 const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2246
2247 if (debug) {
2248 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2249 (values.size () != indices.size (), std::runtime_error,
2250 "values.size() = " << values.size () << " != indices.size() = "
2251 << indices.size () << ".");
2252 }
2253
2254 // getRowMap() is not thread safe, because it increments RCP's
2255 // reference count. getCrsGraphRef() is thread safe.
2256 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2257 const LO lclRow = rowMap.getLocalElement (gblRow);
2258 if (lclRow == OTLO::invalid ()) {
2259 // Input row is _not_ owned by the calling process.
2260 //
2261 // See a note (now deleted) from mfh 14 Dec 2012: If input row
2262 // is not in the row Map, it doesn't matter whether or not the
2263 // graph is static; the data just get stashed for later use by
2264 // globalAssemble().
2265 this->insertNonownedGlobalValues (gblRow, indices, values);
2266 }
2267 else { // Input row _is_ owned by the calling process
2268 if (this->isStaticGraph ()) {
2269 // Uh oh! Not allowed to insert into owned rows in that case.
2270 const int myRank = rowMap.getComm ()->getRank ();
2271 const int numProcs = rowMap.getComm ()->getSize ();
2272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2273 (true, std::runtime_error,
2274 "The matrix was constructed with a constant (\"static\") graph, "
2275 "yet the given global row index " << gblRow << " is in the row "
2276 "Map on the calling process (with rank " << myRank << ", of " <<
2277 numProcs << " process(es)). In this case, you may not insert "
2278 "new entries into rows owned by the calling process.");
2279 }
2280
2281 crs_graph_type& graph = * (this->myGraph_);
2282 const IST* const inputVals =
2283 reinterpret_cast<const IST*> (values.getRawPtr ());
2284 const GO* const inputGblColInds = indices.getRawPtr ();
2285 const size_t numInputEnt = indices.size ();
2286 RowInfo rowInfo = graph.getRowInfo (lclRow);
2287
2288 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2289 // This branch is similar in function to the following branch, but for
2290 // the special case that the target graph is locally indexed (and the
2291 // profile type is StaticProfile). In this case, we cannot simply filter
2292 // out global indices that don't exist on the receiving process and
2293 // insert the remaining (global) indices, but we must convert them (the
2294 // remaining global indices) to local and call `insertLocalValues`.
2295 const map_type& colMap = * (graph.colMap_);
2296 size_t curOffset = 0;
2297 while (curOffset < numInputEnt) {
2298 // Find a sequence of input indices that are in the column Map on the
2299 // calling process. Doing a sequence at a time, instead of one at a
2300 // time, amortizes some overhead.
2301 Teuchos::Array<LO> lclIndices;
2302 size_t endOffset = curOffset;
2303 for ( ; endOffset < numInputEnt; ++endOffset) {
2304 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2305 if (lclIndex != OTLO::invalid())
2306 lclIndices.push_back(lclIndex);
2307 else
2308 break;
2309 }
2310 // curOffset, endOffset: half-exclusive range of indices in the column
2311 // Map on the calling process. If endOffset == curOffset, the range is
2312 // empty.
2313 const LO numIndInSeq = (endOffset - curOffset);
2314 if (numIndInSeq != 0) {
2315 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2316 }
2317 // Invariant before the increment line: Either endOffset ==
2318 // numInputEnt, or inputGblColInds[endOffset] is not in the column Map
2319 // on the calling process.
2320 if (debug) {
2321 const bool invariant = endOffset == numInputEnt ||
2322 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2324 (! invariant, std::logic_error, std::endl << "Invariant failed!");
2325 }
2326 curOffset = endOffset + 1;
2328 }
2329 else if (! graph.colMap_.is_null ()) { // We have a column Map.
2330 const map_type& colMap = * (graph.colMap_);
2331 size_t curOffset = 0;
2332 while (curOffset < numInputEnt) {
2333 // Find a sequence of input indices that are in the column
2334 // Map on the calling process. Doing a sequence at a time,
2335 // instead of one at a time, amortizes some overhead.
2336 size_t endOffset = curOffset;
2337 for ( ; endOffset < numInputEnt &&
2338 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2339 ++endOffset)
2340 {}
2341 // curOffset, endOffset: half-exclusive range of indices in
2342 // the column Map on the calling process. If endOffset ==
2343 // curOffset, the range is empty.
2344 const LO numIndInSeq = (endOffset - curOffset);
2345 if (numIndInSeq != 0) {
2346 rowInfo = graph.getRowInfo(lclRow); // KDD 5/19 Need fresh RowInfo in each loop iteration
2347 this->insertGlobalValuesImpl (graph, rowInfo,
2348 inputGblColInds + curOffset,
2349 inputVals + curOffset,
2350 numIndInSeq);
2351 }
2352 // Invariant before the increment line: Either endOffset ==
2353 // numInputEnt, or inputGblColInds[endOffset] is not in the
2354 // column Map on the calling process.
2355 if (debug) {
2356 const bool invariant = endOffset == numInputEnt ||
2357 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2358 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2359 (! invariant, std::logic_error, std::endl << "Invariant failed!");
2360 }
2361 curOffset = endOffset + 1;
2362 }
2363 }
2364 else { // we don't have a column Map.
2365 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2366 inputVals, numInputEnt);
2368 }
2369 }
2371 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2372 void
2375 const GlobalOrdinal gblRow,
2376 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2377 const Teuchos::ArrayView<const Scalar>& values,
2378 const char* const prefix,
2379 const bool debug,
2380 const bool verbose)
2381 {
2383 using std::endl;
2384
2385 try {
2386 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2387 }
2388 catch(std::exception& e) {
2389 std::ostringstream os;
2390 if (verbose) {
2391 const size_t maxNumToPrint =
2393 os << *prefix << ": insertGlobalValuesFiltered threw an "
2394 "exception: " << e.what() << endl
2395 << "Global row index: " << gblRow << endl;
2396 verbosePrintArray(os, indices, "Global column indices",
2397 maxNumToPrint);
2398 os << endl;
2399 verbosePrintArray(os, values, "Values", maxNumToPrint);
2400 os << endl;
2401 }
2402 else {
2403 os << ": insertGlobalValuesFiltered threw an exception: "
2404 << e.what();
2405 }
2406 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
2407 }
2408 }
2409
2410 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2411 LocalOrdinal
2414 const crs_graph_type& graph,
2415 const RowInfo& rowInfo,
2416 const LocalOrdinal inds[],
2417 const impl_scalar_type newVals[],
2418 const LocalOrdinal numElts)
2419 {
2420 typedef LocalOrdinal LO;
2421 typedef GlobalOrdinal GO;
2422 const bool sorted = graph.isSorted ();
2423
2424 size_t hint = 0; // Guess for the current index k into rowVals
2425 LO numValid = 0; // number of valid local column indices
2426
2427 if (graph.isLocallyIndexed ()) {
2428 // Get a view of the column indices in the row. This amortizes
2429 // the cost of getting the view over all the entries of inds.
2430 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2431
2432 for (LO j = 0; j < numElts; ++j) {
2433 const LO lclColInd = inds[j];
2434 const size_t offset =
2435 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2436 lclColInd, hint, sorted);
2437 if (offset != rowInfo.numEntries) {
2438 rowVals[offset] = newVals[j];
2439 hint = offset + 1;
2440 ++numValid;
2441 }
2442 }
2443 }
2444 else if (graph.isGloballyIndexed ()) {
2445 if (graph.colMap_.is_null ()) {
2446 return Teuchos::OrdinalTraits<LO>::invalid ();
2447 }
2448 const map_type colMap = * (graph.colMap_);
2449
2450 // Get a view of the column indices in the row. This amortizes
2451 // the cost of getting the view over all the entries of inds.
2452 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2453
2454 for (LO j = 0; j < numElts; ++j) {
2455 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2456 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2457 const size_t offset =
2458 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2459 gblColInd, hint, sorted);
2460 if (offset != rowInfo.numEntries) {
2461 rowVals[offset] = newVals[j];
2462 hint = offset + 1;
2463 ++numValid;
2464 }
2465 }
2466 }
2467 }
2468 // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
2469 // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
2470 // to be neither locally nor globally indexed on a process.
2471 // This means that the graph or matrix has no entries on that
2472 // process. Epetra also works like this. It's related to lazy
2473 // allocation (on first insertion, not at graph / matrix
2474 // construction). Lazy allocation will go away because it is
2475 // not thread scalable.
2476
2477 return numValid;
2479
2480 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2481 LocalOrdinal
2483 replaceLocalValues (const LocalOrdinal localRow,
2484 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2485 const Teuchos::ArrayView<const Scalar>& vals)
2486 {
2487 typedef LocalOrdinal LO;
2488
2489 const LO numInputEnt = static_cast<LO> (lclCols.size ());
2490 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2491 return Teuchos::OrdinalTraits<LO>::invalid ();
2492 }
2493 const LO* const inputInds = lclCols.getRawPtr ();
2494 const Scalar* const inputVals = vals.getRawPtr ();
2495 return this->replaceLocalValues (localRow, numInputEnt,
2496 inputVals, inputInds);
2497 }
2498
2499 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2501 local_ordinal_type
2504 const local_ordinal_type localRow,
2505 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2506 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2507 {
2508 using LO = local_ordinal_type;
2509 const LO numInputEnt = inputInds.extent(0);
2510 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2511 return Teuchos::OrdinalTraits<LO>::invalid();
2512 }
2513 const Scalar* const inVals =
2514 reinterpret_cast<const Scalar*>(inputVals.data());
2515 return this->replaceLocalValues(localRow, numInputEnt,
2516 inVals, inputInds.data());
2517 }
2518
2519 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2520 LocalOrdinal
2522 replaceLocalValues (const LocalOrdinal localRow,
2523 const LocalOrdinal numEnt,
2524 const Scalar inputVals[],
2525 const LocalOrdinal inputCols[])
2526 {
2527 typedef impl_scalar_type IST;
2528 typedef LocalOrdinal LO;
2529
2530 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2531 // Fill must be active and the "nonconst" graph must exist.
2532 return Teuchos::OrdinalTraits<LO>::invalid ();
2533 }
2534 const crs_graph_type& graph = * (this->staticGraph_);
2535 const RowInfo rowInfo = graph.getRowInfo (localRow);
2536
2537 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2538 // The calling process does not own this row, so it is not
2539 // allowed to modify its values.
2540 return static_cast<LO> (0);
2541 }
2542 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2543 const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2544 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2545 inputCols, inVals, numEnt);
2546 }
2547
2548 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2549 LocalOrdinal
2552 const crs_graph_type& graph,
2553 const RowInfo& rowInfo,
2554 const GlobalOrdinal inds[],
2555 const impl_scalar_type newVals[],
2556 const LocalOrdinal numElts)
2557 {
2558 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2559 auto fun =
2560 [&](size_t const k, size_t const /*start*/, size_t const offset) {
2561 rowVals[offset] = newVals[k];
2562 };
2563 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2564 return graph.findGlobalIndices(rowInfo, indsT, cb);
2565 }
2566
2567 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2568 LocalOrdinal
2570 replaceGlobalValues (const GlobalOrdinal globalRow,
2571 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2572 const Teuchos::ArrayView<const Scalar>& inputVals)
2573 {
2574 typedef LocalOrdinal LO;
2575
2576 const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2577 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2578 return Teuchos::OrdinalTraits<LO>::invalid ();
2579 }
2580 return this->replaceGlobalValues (globalRow, numInputEnt,
2581 inputVals.getRawPtr (),
2582 inputGblColInds.getRawPtr ());
2583 }
2584
2585 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2586 LocalOrdinal
2588 replaceGlobalValues (const GlobalOrdinal globalRow,
2589 const LocalOrdinal numEnt,
2590 const Scalar inputVals[],
2591 const GlobalOrdinal inputGblColInds[])
2592 {
2593 typedef impl_scalar_type IST;
2594 typedef LocalOrdinal LO;
2595
2596 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2597 // Fill must be active and the "nonconst" graph must exist.
2598 return Teuchos::OrdinalTraits<LO>::invalid ();
2599 }
2600 const crs_graph_type& graph = * (this->staticGraph_);
2601
2602 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (globalRow);
2603 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2604 // The input local row is invalid on the calling process,
2605 // which means that the calling process summed 0 entries.
2606 return static_cast<LO> (0);
2607 }
2608
2609 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2610 const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2611 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2612 inputGblColInds, inVals, numEnt);
2613 }
2615 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2617 local_ordinal_type
2620 const global_ordinal_type globalRow,
2621 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2622 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2623 {
2624 // We use static_assert here to check the template parameters,
2625 // rather than std::enable_if (e.g., on the return value, to
2626 // enable compilation only if the template parameters match the
2627 // desired attributes). This turns obscure link errors into
2628 // clear compilation errors. It also makes the return value a
2629 // lot easier to see.
2630 using LO = local_ordinal_type;
2631 const LO numInputEnt = static_cast<LO>(inputInds.extent(0));
2632 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2633 return Teuchos::OrdinalTraits<LO>::invalid();
2634 }
2635 const Scalar* const inVals =
2636 reinterpret_cast<const Scalar*>(inputVals.data());
2637 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2638 inputInds.data());
2639 }
2640
2641 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2642 LocalOrdinal
2645 const crs_graph_type& graph,
2646 const RowInfo& rowInfo,
2647 const GlobalOrdinal inds[],
2648 const impl_scalar_type newVals[],
2649 const LocalOrdinal numElts,
2650 const bool atomic)
2651 {
2652 typedef LocalOrdinal LO;
2653 typedef GlobalOrdinal GO;
2654
2655 const bool sorted = graph.isSorted ();
2656
2657 size_t hint = 0; // guess at the index's relative offset in the row
2658 LO numValid = 0; // number of valid input column indices
2659
2660 if (graph.isLocallyIndexed ()) {
2661 // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2662 // pointer does NOT change its reference count. Thus, this
2663 // code is still thread safe.
2664 if (graph.colMap_.is_null ()) {
2665 // NO input column indices are valid in this case, since if
2666 // the column Map is null on the calling process, then the
2667 // calling process owns no graph entries.
2668 return numValid;
2669 }
2670 const map_type& colMap = * (graph.colMap_);
2672 // Get a view of the column indices in the row. This amortizes
2673 // the cost of getting the view over all the entries of inds.
2674 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2675 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2676
2677 for (LO j = 0; j < numElts; ++j) {
2678 const LO lclColInd = colMap.getLocalElement (inds[j]);
2679 if (lclColInd != LINV) {
2680 const size_t offset =
2681 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2682 lclColInd, hint, sorted);
2683 if (offset != rowInfo.numEntries) {
2684 if (atomic) {
2685 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2686 }
2687 else {
2688 rowVals[offset] += newVals[j];
2689 }
2690 hint = offset + 1;
2691 numValid++;
2692 }
2693 }
2694 }
2696 else if (graph.isGloballyIndexed ()) {
2697 // Get a view of the column indices in the row. This amortizes
2698 // the cost of getting the view over all the entries of inds.
2699 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2700
2701 for (LO j = 0; j < numElts; ++j) {
2702 const GO gblColInd = inds[j];
2703 const size_t offset =
2704 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2705 gblColInd, hint, sorted);
2706 if (offset != rowInfo.numEntries) {
2707 if (atomic) {
2708 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2709 }
2710 else {
2711 rowVals[offset] += newVals[j];
2712 }
2713 hint = offset + 1;
2714 numValid++;
2715 }
2716 }
2717 }
2718 // If the graph is neither locally nor globally indexed on the
2719 // calling process, that means the calling process has no graph
2720 // entries. Thus, none of the input column indices are valid.
2721
2722 return numValid;
2723 }
2724
2725 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2726 LocalOrdinal
2728 sumIntoGlobalValues (const GlobalOrdinal gblRow,
2729 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2730 const Teuchos::ArrayView<const Scalar>& inputVals,
2731 const bool atomic)
2732 {
2733 typedef LocalOrdinal LO;
2734
2735 const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2736 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2737 return Teuchos::OrdinalTraits<LO>::invalid ();
2738 }
2739 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2740 inputVals.getRawPtr (),
2741 inputGblColInds.getRawPtr (),
2742 atomic);
2743 }
2744
2745 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2746 LocalOrdinal
2748 sumIntoGlobalValues (const GlobalOrdinal gblRow,
2749 const LocalOrdinal numInputEnt,
2750 const Scalar inputVals[],
2751 const GlobalOrdinal inputGblColInds[],
2752 const bool atomic)
2753 {
2754 typedef impl_scalar_type IST;
2755 typedef LocalOrdinal LO;
2756 typedef GlobalOrdinal GO;
2757
2758 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2759 // Fill must be active and the "nonconst" graph must exist.
2760 return Teuchos::OrdinalTraits<LO>::invalid ();
2761 }
2762 const crs_graph_type& graph = * (this->staticGraph_);
2763
2764 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2765 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2766 // mfh 23 Mar 2017, 26 Jul 2017: This branch may not be not
2767 // thread safe in a debug build, in part because it uses
2768 // Teuchos::ArrayView, and in part because of the data structure
2769 // used to stash outgoing entries.
2770 using Teuchos::ArrayView;
2771 ArrayView<const GO> inputGblColInds_av(
2772 numInputEnt == 0 ? nullptr : inputGblColInds,
2773 numInputEnt);
2774 ArrayView<const Scalar> inputVals_av(
2775 numInputEnt == 0 ? nullptr :
2776 inputVals, numInputEnt);
2777 // gblRow is not in the row Map on the calling process, so stash
2778 // the given entries away in a separate data structure.
2779 // globalAssemble() (called during fillComplete()) will exchange
2780 // that data and sum it in using sumIntoGlobalValues().
2781 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2782 inputVals_av);
2783 // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2784 // since we won't know whether the given indices were valid
2785 // until globalAssemble (called in fillComplete) is called.
2786 // That's why insertNonownedGlobalValues doesn't return
2787 // anything. Just for consistency, I'll return the number of
2788 // entries that the user gave us.
2789 return numInputEnt;
2790 }
2791 else { // input row is in the row Map on the calling process
2792 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2793 const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2794 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2795 inputGblColInds, inVals,
2796 numInputEnt, atomic);
2797 }
2798 }
2799
2800 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2801 LocalOrdinal
2802 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2803 transformLocalValues (const LocalOrdinal lclRow,
2804 const LocalOrdinal numInputEnt,
2805 const impl_scalar_type inputVals[],
2806 const LocalOrdinal inputCols[],
2807 std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2808 const bool atomic)
2809 {
2810 using Tpetra::Details::OrdinalTraits;
2811 typedef LocalOrdinal LO;
2812
2813 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2814 // Fill must be active and the "nonconst" graph must exist.
2815 return Teuchos::OrdinalTraits<LO>::invalid ();
2816 }
2817 const crs_graph_type& graph = * (this->staticGraph_);
2818 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2819
2820 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2821 // The calling process does not own this row, so it is not
2822 // allowed to modify its values.
2823 return static_cast<LO> (0);
2824 }
2825 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2826 return this->transformLocalValues (curRowVals.data (), graph,
2827 rowInfo, inputCols, inputVals,
2828 numInputEnt, f, atomic);
2829 }
2830
2831 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2832 LocalOrdinal
2833 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2834 transformGlobalValues (const GlobalOrdinal gblRow,
2835 const LocalOrdinal numInputEnt,
2836 const impl_scalar_type inputVals[],
2837 const GlobalOrdinal inputCols[],
2838 std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2839 const bool atomic)
2841 using Tpetra::Details::OrdinalTraits;
2842 typedef LocalOrdinal LO;
2843
2844 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2845 // Fill must be active and the "nonconst" graph must exist.
2846 return OrdinalTraits<LO>::invalid ();
2847 }
2848 const crs_graph_type& graph = * (this->staticGraph_);
2849 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2850
2851 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2852 // The calling process does not own this row, so it is not
2853 // allowed to modify its values.
2854 return static_cast<LO> (0);
2855 }
2856 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2857 return this->transformGlobalValues (curRowVals.data (), graph,
2858 rowInfo, inputCols, inputVals,
2859 numInputEnt, f, atomic);
2860 }
2861
2862 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2863 LocalOrdinal
2866 const crs_graph_type& graph,
2867 const RowInfo& rowInfo,
2868 const LocalOrdinal inds[],
2869 const impl_scalar_type newVals[],
2870 const LocalOrdinal numElts,
2871 std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2872 const bool atomic)
2873 {
2874 typedef impl_scalar_type ST;
2875 typedef LocalOrdinal LO;
2876 typedef GlobalOrdinal GO;
2877
2878 //if (newVals.extent (0) != inds.extent (0)) {
2879 // The sizes of the input arrays must match.
2880 //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2881 //}
2882 //const LO numElts = static_cast<LO> (inds.extent (0));
2883 const bool sorted = graph.isSorted ();
2884
2885 LO numValid = 0; // number of valid input column indices
2886 size_t hint = 0; // Guess for the current index k into rowVals
2887
2888 if (graph.isLocallyIndexed ()) {
2889 // Get a view of the column indices in the row. This amortizes
2890 // the cost of getting the view over all the entries of inds.
2891 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2892
2893 for (LO j = 0; j < numElts; ++j) {
2894 const LO lclColInd = inds[j];
2895 const size_t offset =
2896 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2897 lclColInd, hint, sorted);
2898 if (offset != rowInfo.numEntries) {
2899 if (atomic) {
2900 // NOTE (mfh 30 Nov 2015) The commented-out code is
2901 // wrong because another thread may have changed
2902 // rowVals[offset] between those two lines of code.
2903 //
2904 //const ST newVal = f (rowVals[offset], newVals[j]);
2905 //Kokkos::atomic_assign (&rowVals[offset], newVal);
2907 volatile ST* const dest = &rowVals[offset];
2908 (void) atomic_binary_function_update (dest, newVals[j], f);
2909 }
2910 else {
2911 // use binary function f
2912 rowVals[offset] = f (rowVals[offset], newVals[j]);
2913 }
2914 hint = offset + 1;
2915 ++numValid;
2916 }
2917 }
2918 }
2919 else if (graph.isGloballyIndexed ()) {
2920 // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
2921 // pointer does NOT change its reference count. Thus, this
2922 // code is still thread safe.
2923 if (graph.colMap_.is_null ()) {
2924 // NO input column indices are valid in this case. Either
2925 // the column Map hasn't been set yet (so local indices
2926 // don't exist yet), or the calling process owns no graph
2927 // entries.
2928 return numValid;
2929 }
2930 const map_type& colMap = * (graph.colMap_);
2931 // Get a view of the column indices in the row. This amortizes
2932 // the cost of getting the view over all the entries of inds.
2933 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2934
2935 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2936 for (LO j = 0; j < numElts; ++j) {
2937 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2938 if (gblColInd != GINV) {
2939 const size_t offset =
2940 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2941 gblColInd, hint, sorted);
2942 if (offset != rowInfo.numEntries) {
2943 if (atomic) {
2944 // NOTE (mfh 30 Nov 2015) The commented-out code is
2945 // wrong because another thread may have changed
2946 // rowVals[offset] between those two lines of code.
2947 //
2948 //const ST newVal = f (rowVals[offset], newVals[j]);
2949 //Kokkos::atomic_assign (&rowVals[offset], newVal);
2950
2951 volatile ST* const dest = &rowVals[offset];
2952 (void) atomic_binary_function_update (dest, newVals[j], f);
2953 }
2954 else {
2955 // use binary function f
2956 rowVals[offset] = f (rowVals[offset], newVals[j]);
2957 }
2958 hint = offset + 1;
2959 numValid++;
2960 }
2961 }
2962 }
2963 }
2964 // If the graph is neither locally nor globally indexed on the
2965 // calling process, that means the calling process has no graph
2966 // entries. Thus, none of the input column indices are valid.
2967
2968 return numValid;
2969 }
2970
2971 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2972 LocalOrdinal
2973 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2974 transformGlobalValues (impl_scalar_type rowVals[],
2975 const crs_graph_type& graph,
2976 const RowInfo& rowInfo,
2977 const GlobalOrdinal inds[],
2978 const impl_scalar_type newVals[],
2979 const LocalOrdinal numElts,
2980 std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2981 const bool atomic)
2982 {
2983 typedef impl_scalar_type ST;
2984 typedef LocalOrdinal LO;
2985 typedef GlobalOrdinal GO;
2986
2987 //if (newVals.extent (0) != inds.extent (0)) {
2988 // The sizes of the input arrays must match.
2989 //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2990 //}
2991 //const LO numElts = static_cast<LO> (inds.extent (0));
2992 const bool sorted = graph.isSorted ();
2993
2994 LO numValid = 0; // number of valid input column indices
2995 size_t hint = 0; // Guess for the current index k into rowVals
2996
2997 if (graph.isGloballyIndexed ()) {
2998 // Get a view of the column indices in the row. This amortizes
2999 // the cost of getting the view over all the entries of inds.
3000 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
3002 for (LO j = 0; j < numElts; ++j) {
3003 const GO gblColInd = inds[j];
3004 const size_t offset =
3005 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3006 gblColInd, hint, sorted);
3007 if (offset != rowInfo.numEntries) {
3008 if (atomic) {
3009 // NOTE (mfh 30 Nov 2015) The commented-out code is
3010 // wrong because another thread may have changed
3011 // rowVals[offset] between those two lines of code.
3012 //
3013 //const ST newVal = f (rowVals[offset], newVals[j]);
3014 //Kokkos::atomic_assign (&rowVals[offset], newVal);
3015
3016 volatile ST* const dest = &rowVals[offset];
3017 (void) atomic_binary_function_update (dest, newVals[j], f);
3018 }
3019 else {
3020 // use binary function f
3021 rowVals[offset] = f (rowVals[offset], newVals[j]);
3022 }
3023 hint = offset + 1;
3024 ++numValid;
3026 }
3027 }
3028 else if (graph.isLocallyIndexed ()) {
3029 // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
3030 // pointer does NOT change its reference count. Thus, this
3031 // code is still thread safe.
3032 if (graph.colMap_.is_null ()) {
3033 // NO input column indices are valid in this case. Either the
3034 // column Map hasn't been set yet (so local indices don't
3035 // exist yet), or the calling process owns no graph entries.
3036 return numValid;
3037 }
3038 const map_type& colMap = * (graph.colMap_);
3039 // Get a view of the column indices in the row. This amortizes
3040 // the cost of getting the view over all the entries of inds.
3041 auto colInds = graph.getLocalIndsViewHost (rowInfo);
3043 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
3044 for (LO j = 0; j < numElts; ++j) {
3045 const LO lclColInd = colMap.getLocalElement (inds[j]);
3046 if (lclColInd != LINV) {
3047 const size_t offset =
3048 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3049 lclColInd, hint, sorted);
3050 if (offset != rowInfo.numEntries) {
3051 if (atomic) {
3052 // NOTE (mfh 30 Nov 2015) The commented-out code is
3053 // wrong because another thread may have changed
3054 // rowVals[offset] between those two lines of code.
3055 //
3056 //const ST newVal = f (rowVals[offset], newVals[j]);
3057 //Kokkos::atomic_assign (&rowVals[offset], newVal);
3058
3059 volatile ST* const dest = &rowVals[offset];
3060 (void) atomic_binary_function_update (dest, newVals[j], f);
3061 }
3062 else {
3063 // use binary function f
3064 rowVals[offset] = f (rowVals[offset], newVals[j]);
3065 }
3066 hint = offset + 1;
3067 numValid++;
3068 }
3069 }
3070 }
3071 }
3072 // If the graph is neither locally nor globally indexed on the
3073 // calling process, that means the calling process has no graph
3074 // entries. Thus, none of the input column indices are valid.
3076 return numValid;
3077 }
3078
3079 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3080 LocalOrdinal
3083 const crs_graph_type& graph,
3084 const RowInfo& rowInfo,
3085 const LocalOrdinal inds[],
3086 const impl_scalar_type newVals[],
3087 const LocalOrdinal numElts,
3088 const bool atomic)
3089 {
3090 typedef LocalOrdinal LO;
3091 typedef GlobalOrdinal GO;
3092
3093 const bool sorted = graph.isSorted ();
3094
3095 size_t hint = 0; // Guess for the current index k into rowVals
3096 LO numValid = 0; // number of valid local column indices
3097
3098 if (graph.isLocallyIndexed ()) {
3099 // Get a view of the column indices in the row. This amortizes
3100 // the cost of getting the view over all the entries of inds.
3101 auto colInds = graph.getLocalIndsViewHost (rowInfo);
3102
3103 for (LO j = 0; j < numElts; ++j) {
3104 const LO lclColInd = inds[j];
3105 const size_t offset =
3106 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3107 lclColInd, hint, sorted);
3108 if (offset != rowInfo.numEntries) {
3109 if (atomic) {
3110 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3111 }
3112 else {
3113 rowVals[offset] += newVals[j];
3114 }
3115 hint = offset + 1;
3116 ++numValid;
3117 }
3118 }
3119 }
3120 else if (graph.isGloballyIndexed ()) {
3121 if (graph.colMap_.is_null ()) {
3122 return Teuchos::OrdinalTraits<LO>::invalid ();
3123 }
3124 const map_type colMap = * (graph.colMap_);
3125
3126 // Get a view of the column indices in the row. This amortizes
3127 // the cost of getting the view over all the entries of inds.
3128 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
3129
3130 for (LO j = 0; j < numElts; ++j) {
3131 const GO gblColInd = colMap.getGlobalElement (inds[j]);
3132 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3133 const size_t offset =
3134 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3135 gblColInd, hint, sorted);
3136 if (offset != rowInfo.numEntries) {
3137 if (atomic) {
3138 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3139 }
3140 else {
3141 rowVals[offset] += newVals[j];
3142 }
3143 hint = offset + 1;
3144 ++numValid;
3145 }
3146 }
3147 }
3148 }
3149 // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
3150 // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
3151 // to be neither locally nor globally indexed on a process.
3152 // This means that the graph or matrix has no entries on that
3153 // process. Epetra also works like this. It's related to lazy
3154 // allocation (on first insertion, not at graph / matrix
3155 // construction). Lazy allocation will go away because it is
3156 // not thread scalable.
3158 return numValid;
3159 }
3160
3161 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3162 LocalOrdinal
3164 sumIntoLocalValues (const LocalOrdinal localRow,
3165 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3166 const Teuchos::ArrayView<const Scalar>& values,
3167 const bool atomic)
3168 {
3169 using LO = local_ordinal_type;
3170 const LO numInputEnt = static_cast<LO>(indices.size());
3171 if (static_cast<LO>(values.size()) != numInputEnt) {
3172 return Teuchos::OrdinalTraits<LO>::invalid();
3173 }
3174 const LO* const inputInds = indices.getRawPtr();
3175 const scalar_type* const inputVals = values.getRawPtr();
3176 return this->sumIntoLocalValues(localRow, numInputEnt,
3177 inputVals, inputInds, atomic);
3178 }
3179
3180 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3182 local_ordinal_type
3185 const local_ordinal_type localRow,
3186 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3187 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3188 const bool atomic)
3189 {
3190 using LO = local_ordinal_type;
3191 const LO numInputEnt = static_cast<LO>(inputInds.extent(0));
3192 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3193 return Teuchos::OrdinalTraits<LO>::invalid();
3194 }
3195 const scalar_type* inVals =
3196 reinterpret_cast<const scalar_type*>(inputVals.data());
3197 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3198 inputInds.data(), atomic);
3199 }
3200
3201 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3202 LocalOrdinal
3204 sumIntoLocalValues (const LocalOrdinal localRow,
3205 const LocalOrdinal numEnt,
3206 const Scalar vals[],
3207 const LocalOrdinal cols[],
3208 const bool atomic)
3209 {
3210 typedef impl_scalar_type IST;
3211 typedef LocalOrdinal LO;
3212
3213 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3214 // Fill must be active and the "nonconst" graph must exist.
3215 return Teuchos::OrdinalTraits<LO>::invalid ();
3216 }
3217 const crs_graph_type& graph = * (this->staticGraph_);
3218 const RowInfo rowInfo = graph.getRowInfo (localRow);
3219
3220 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3221 // The calling process does not own this row, so it is not
3222 // allowed to modify its values.
3223 return static_cast<LO> (0);
3224 }
3225 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3226 const IST* const inputVals = reinterpret_cast<const IST*> (vals);
3227 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3228 cols, inputVals, numEnt, atomic);
3229 }
3230
3231 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3233 values_dualv_type::t_host::const_type
3235 getValuesViewHost (const RowInfo& rowinfo) const
3236 {
3237 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3238 return typename values_dualv_type::t_host::const_type ();
3239 else
3240 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3241 rowinfo.allocSize,
3242 Access::ReadOnly);
3243 }
3244
3245 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3247 values_dualv_type::t_host
3249 getValuesViewHostNonConst (const RowInfo& rowinfo)
3250 {
3251 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3252 return typename values_dualv_type::t_host ();
3253 else
3254 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3255 rowinfo.allocSize,
3256 Access::ReadWrite);
3257 }
3258
3259 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3261 values_dualv_type::t_dev::const_type
3263 getValuesViewDevice (const RowInfo& rowinfo) const
3264 {
3265 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3266 return typename values_dualv_type::t_dev::const_type ();
3267 else
3268 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3269 rowinfo.allocSize,
3270 Access::ReadOnly);
3271 }
3272
3273 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3275 values_dualv_type::t_dev
3278 {
3279 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3280 return typename values_dualv_type::t_dev ();
3281 else
3282 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3283 rowinfo.allocSize,
3284 Access::ReadWrite);
3285 }
3286
3287#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3288 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3289 Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3291 getView (RowInfo rowinfo) const
3292 {
3293 using Kokkos::MemoryUnmanaged;
3294 using Kokkos::View;
3295 using Teuchos::ArrayView;
3296 using ST = impl_scalar_type;
3297 using range_type = std::pair<size_t, size_t>;
3298
3299 if (valuesUnpacked_wdv.extent (0) != 0 && rowinfo.allocSize > 0) {
3300
3301#ifdef HAVE_TPETRA_DEBUG
3302 TEUCHOS_TEST_FOR_EXCEPTION(
3303 rowinfo.offset1D + rowinfo.allocSize > valuesUnpacked_wdv.extent (0),
3304 std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
3305 "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
3306 rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
3307 ") > valuesUnpacked_wdv.extent(0) (" << valuesUnpacked_wdv.extent (0)
3308 << ").");
3309#endif // HAVE_TPETRA_DEBUG
3310
3311 range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
3312 // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3313 // directly, because that first creates a _managed_ subview,
3314 // then returns an unmanaged version of that. That touches the
3315 // reference count, which costs performance in a measurable way.
3316 // Instead, we create a temporary unmanaged view, then create
3317 // the subview from that.
3318 // KDDKDD UVM REMOVAL This method is unsafe and deprecated
3319 auto sv = valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3320 rowinfo.allocSize,
3321 Access::ReadOnly);
3322 const ST* const sv_raw = (rowinfo.allocSize == 0) ? nullptr : sv.data ();
3323 return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
3324 }
3325 else {
3326 return ArrayView<impl_scalar_type> ();
3327 }
3328 }
3329#endif // TPETRA_ENABLE_DEPRECATED_CODE
3330
3331
3332 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3333 void
3336 nonconst_local_inds_host_view_type &indices,
3337 nonconst_values_host_view_type &values,
3338 size_t& numEntries) const
3339 {
3340 using Teuchos::ArrayView;
3341 using Teuchos::av_reinterpret_cast;
3342 const char tfecfFuncName[] = "getLocalRowCopy: ";
3343
3344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3345 (! this->hasColMap (), std::runtime_error,
3346 "The matrix does not have a column Map yet. This means we don't have "
3347 "local indices for columns yet, so it doesn't make sense to call this "
3348 "method. If the matrix doesn't have a column Map yet, you should call "
3349 "fillComplete on it first.");
3350
3351 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3352 const size_t theNumEntries = rowinfo.numEntries;
3353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3354 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3355 static_cast<size_t> (values.size ()) < theNumEntries,
3356 std::runtime_error, "Row with local index " << localRow << " has " <<
3357 theNumEntries << " entry/ies, but indices.size() = " <<
3358 indices.size () << " and values.size() = " << values.size () << ".");
3359 numEntries = theNumEntries; // first side effect
3360
3361 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3362 if (staticGraph_->isLocallyIndexed ()) {
3363 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3364 auto curVals = getValuesViewHost(rowinfo);
3365
3366 for (size_t j = 0; j < theNumEntries; ++j) {
3367 values[j] = curVals[j];
3368 indices[j] = curLclInds(j);
3369 }
3370 }
3371 else if (staticGraph_->isGloballyIndexed ()) {
3372 // Don't call getColMap(), because it touches RCP's reference count.
3373 const map_type& colMap = * (staticGraph_->colMap_);
3374 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3375 auto curVals = getValuesViewHost(rowinfo);
3376
3377 for (size_t j = 0; j < theNumEntries; ++j) {
3378 values[j] = curVals[j];
3379 indices[j] = colMap.getLocalElement (curGblInds(j));
3380 }
3381 }
3382 }
3383 }
3384
3385#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3386 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3387 void
3389 getLocalRowCopy (LocalOrdinal localRow,
3390 const Teuchos::ArrayView<LocalOrdinal>& indices,
3391 const Teuchos::ArrayView<Scalar>& values,
3392 size_t& numEntries) const
3393 {
3394 using Teuchos::ArrayView;
3395 using Teuchos::av_reinterpret_cast;
3396 const char tfecfFuncName[] = "getLocalRowCopy: ";
3397
3398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3399 (! this->hasColMap (), std::runtime_error,
3400 "The matrix does not have a column Map yet. This means we don't have "
3401 "local indices for columns yet, so it doesn't make sense to call this "
3402 "method. If the matrix doesn't have a column Map yet, you should call "
3403 "fillComplete on it first.");
3404
3405 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3406 const size_t theNumEntries = rowinfo.numEntries;
3407 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3408 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3409 static_cast<size_t> (values.size ()) < theNumEntries,
3410 std::runtime_error, "Row with local index " << localRow << " has " <<
3411 theNumEntries << " entry/ies, but indices.size() = " <<
3412 indices.size () << " and values.size() = " << values.size () << ".");
3413 numEntries = theNumEntries; // first side effect
3414
3415 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3416 if (staticGraph_->isLocallyIndexed ()) {
3417 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3418 auto curVals = getValuesViewHost(rowinfo);
3419
3420 for (size_t j = 0; j < theNumEntries; ++j) {
3421 values[j] = curVals[j];
3422 indices[j] = curLclInds(j);
3423 }
3424 }
3425 else if (staticGraph_->isGloballyIndexed ()) {
3426 // Don't call getColMap(), because it touches RCP's reference count.
3427 const map_type& colMap = * (staticGraph_->colMap_);
3428 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3429 auto curVals = getValuesViewHost(rowinfo);
3430
3431 for (size_t j = 0; j < theNumEntries; ++j) {
3432 values[j] = curVals[j];
3433 indices[j] = colMap.getLocalElement (curGblInds(j));
3434 }
3435 }
3436 }
3437 }
3438#endif
3439
3440template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3441void
3444 nonconst_global_inds_host_view_type &indices,
3445 nonconst_values_host_view_type &values,
3446 size_t& numEntries) const
3447 {
3448 using Teuchos::ArrayView;
3449 using Teuchos::av_reinterpret_cast;
3450 const char tfecfFuncName[] = "getGlobalRowCopy: ";
3451
3452 const RowInfo rowinfo =
3453 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3454 const size_t theNumEntries = rowinfo.numEntries;
3455 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3456 static_cast<size_t> (indices.size ()) < theNumEntries ||
3457 static_cast<size_t> (values.size ()) < theNumEntries,
3458 std::runtime_error, "Row with global index " << globalRow << " has "
3459 << theNumEntries << " entry/ies, but indices.size() = " <<
3460 indices.size () << " and values.size() = " << values.size () << ".");
3461 numEntries = theNumEntries; // first side effect
3462
3463 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3464 if (staticGraph_->isLocallyIndexed ()) {
3465 const map_type& colMap = * (staticGraph_->colMap_);
3466 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3467 auto curVals = getValuesViewHost(rowinfo);
3468
3469 for (size_t j = 0; j < theNumEntries; ++j) {
3470 values[j] = curVals[j];
3471 indices[j] = colMap.getGlobalElement (curLclInds(j));
3472 }
3473 }
3474 else if (staticGraph_->isGloballyIndexed ()) {
3475 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3476 auto curVals = getValuesViewHost(rowinfo);
3477
3478 for (size_t j = 0; j < theNumEntries; ++j) {
3479 values[j] = curVals[j];
3480 indices[j] = curGblInds(j);
3481 }
3482 }
3483 }
3484 }
3485
3486#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3487 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3488 void
3490 getGlobalRowCopy (GlobalOrdinal globalRow,
3491 const Teuchos::ArrayView<GlobalOrdinal>& indices,
3492 const Teuchos::ArrayView<Scalar>& values,
3493 size_t& numEntries) const
3494 {
3495 using Teuchos::ArrayView;
3496 using Teuchos::av_reinterpret_cast;
3497 const char tfecfFuncName[] = "getGlobalRowCopy: ";
3498
3499 const RowInfo rowinfo =
3500 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3501 const size_t theNumEntries = rowinfo.numEntries;
3502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3503 static_cast<size_t> (indices.size ()) < theNumEntries ||
3504 static_cast<size_t> (values.size ()) < theNumEntries,
3505 std::runtime_error, "Row with global index " << globalRow << " has "
3506 << theNumEntries << " entry/ies, but indices.size() = " <<
3507 indices.size () << " and values.size() = " << values.size () << ".");
3508 numEntries = theNumEntries; // first side effect
3509
3510 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3511 if (staticGraph_->isLocallyIndexed ()) {
3512 const map_type& colMap = * (staticGraph_->colMap_);
3513 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3514 auto curVals = getValuesViewHost(rowinfo);
3515
3516 for (size_t j = 0; j < theNumEntries; ++j) {
3517 values[j] = curVals[j];
3518 indices[j] = colMap.getGlobalElement (curLclInds(j));
3519 }
3520 }
3521 else if (staticGraph_->isGloballyIndexed ()) {
3522 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3523 auto curVals = getValuesViewHost(rowinfo);
3524
3525 for (size_t j = 0; j < theNumEntries; ++j) {
3526 values[j] = curVals[j];
3527 indices[j] = curGblInds(j);
3528 }
3529 }
3530 }
3531 }
3532#endif
3533
3534 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3535 void
3537 getLocalRowView(LocalOrdinal localRow,
3538 local_inds_host_view_type &indices,
3539 values_host_view_type &values) const
3540 {
3541 const char tfecfFuncName[] = "getLocalRowView: ";
3543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3544 isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
3545 "its indices as global indices, so you cannot get a view with local "
3546 "column indices. If the matrix has a column Map, you may call "
3547 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3548 "a view with global column indices by calling getGlobalRowCopy().");
3549
3550 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3551 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3552 rowInfo.numEntries > 0) {
3553 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3554 rowInfo.offset1D,
3555 rowInfo.numEntries,
3556 Access::ReadOnly);
3557 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3558 rowInfo.numEntries,
3559 Access::ReadOnly);
3560 }
3561 else {
3562 // This does the right thing (reports an empty row) if the input
3563 // row is invalid.
3564 indices = local_inds_host_view_type();
3565 values = values_host_view_type();
3566 }
3567
3568#ifdef HAVE_TPETRA_DEBUG
3569 const char suffix[] = ". This should never happen. Please report this "
3570 "bug to the Tpetra developers.";
3571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3572 (static_cast<size_t> (indices.size ()) !=
3573 static_cast<size_t> (values.size ()), std::logic_error,
3574 "At the end of this method, for local row " << localRow << ", "
3575 "indices.size() = " << indices.size () << " != values.size () = "
3576 << values.size () << suffix);
3577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3578 (static_cast<size_t> (indices.size ()) !=
3579 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3580 "At the end of this method, for local row " << localRow << ", "
3581 "indices.size() = " << indices.size () << " != rowInfo.numEntries = "
3582 << rowInfo.numEntries << suffix);
3583 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3584 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3585 (rowInfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3586 "of this method, for local row " << localRow << ", rowInfo.numEntries = "
3587 << rowInfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
3588 expectedNumEntries << suffix);
3589#endif // HAVE_TPETRA_DEBUG
3590 }
3591
3592#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3593 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3594 void
3596 getLocalRowView (LocalOrdinal localRow,
3597 Teuchos::ArrayView<const LocalOrdinal>& indices,
3598 Teuchos::ArrayView<const Scalar>& values) const
3599 {
3600 using Teuchos::ArrayView;
3601 using Teuchos::av_reinterpret_cast;
3602 typedef LocalOrdinal LO;
3603 const char tfecfFuncName[] = "getLocalRowView: ";
3604
3605 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3606 isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
3607 "its indices as global indices, so you cannot get a view with local "
3608 "column indices. If the matrix has a column Map, you may call "
3609 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3610 "a view with global column indices by calling getGlobalRowCopy().");
3611 indices = Teuchos::null;
3612 values = Teuchos::null;
3613 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3614 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3615 rowinfo.numEntries > 0) {
3616 ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
3617 ArrayView<const Scalar> valTmp =
3618 av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3619 indices = indTmp (0, rowinfo.numEntries);
3620 values = valTmp (0, rowinfo.numEntries);
3621 }
3622
3623#ifdef HAVE_TPETRA_DEBUG
3624 const char suffix[] = ". This should never happen. Please report this "
3625 "bug to the Tpetra developers.";
3626 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3627 (static_cast<size_t> (indices.size ()) !=
3628 static_cast<size_t> (values.size ()), std::logic_error,
3629 "At the end of this method, for local row " << localRow << ", "
3630 "indices.size() = " << indices.size () << " != values.size () = "
3631 << values.size () << suffix);
3632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3633 (static_cast<size_t> (indices.size ()) !=
3634 static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3635 "At the end of this method, for local row " << localRow << ", "
3636 "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3637 << rowinfo.numEntries << suffix);
3638 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640 (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3641 "of this method, for local row " << localRow << ", rowinfo.numEntries = "
3642 << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
3643 expectedNumEntries << suffix);
3644#endif // HAVE_TPETRA_DEBUG
3645 }
3646#endif // TPETRA_ENABLE_DEPRECATED_CODE
3647
3648#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3649 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3650 LocalOrdinal
3652 getLocalRowView (const LocalOrdinal lclRow,
3653 LocalOrdinal& numEnt,
3654 const impl_scalar_type*& val,
3655 const LocalOrdinal*& ind) const
3656 {
3657 typedef LocalOrdinal LO;
3658
3659 // Don't call getCrsGraph(), because that modfies an RCP reference
3660 // count, which is not thread safe. Checking whether an RCP is
3661 // null does NOT modify its reference count, and is therefore
3662 // thread safe. Note that isGloballyIndexed() calls
3663 // getCrsGraph(), so we have to go to the graph directly.
3664 if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
3665 return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3666 }
3667 else {
3668 const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
3669 if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
3670 numEnt = 0; // no valid entries in this row on the calling process
3671 val = nullptr;
3672 ind = nullptr;
3673 // First argument (lclRow) invalid, so make 1 the error code.
3674 return static_cast<LO> (1);
3675 }
3676 else {
3677 numEnt = static_cast<LO> (rowInfo.numEntries);
3678 auto lclColInds = staticGraph_->getLocalIndsViewHost (rowInfo);
3679 // KDDKDD UVM Breaks reference counting; unsafe
3680 ind = lclColInds.data ();
3681
3682 auto values = getValuesViewHost (rowInfo);
3683 // KDDKDD UVM Breaks reference counting; unsafe
3684 val = values.data();
3685 return values.extent(0);
3686 }
3687 }
3688 }
3689#endif // TPETRA_ENABLE_DEPRECATED_CODE
3690
3691#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3692 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3693 LocalOrdinal
3694 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3695 getLocalRowViewRaw (const LocalOrdinal lclRow,
3696 LocalOrdinal& numEnt,
3697 const LocalOrdinal*& lclColInds,
3698 const Scalar*& vals) const
3699 {
3700 const impl_scalar_type* vals_ist = nullptr;
3701 const LocalOrdinal errCode =
3702 this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
3703 vals = reinterpret_cast<const Scalar*> (vals_ist);
3704 return errCode;
3705 }
3706#endif // TPETRA_ENABLE_DEPRECATED_CODE
3707
3708 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3709 void
3711 getGlobalRowView (GlobalOrdinal globalRow,
3712 global_inds_host_view_type &indices,
3713 values_host_view_type &values) const
3714 {
3715 const char tfecfFuncName[] = "getGlobalRowView: ";
3716
3717 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3718 isLocallyIndexed (), std::runtime_error,
3719 "The matrix is locally indexed, so we cannot return a view of the row "
3720 "with global column indices. Use getGlobalRowCopy() instead.");
3721
3722 // This does the right thing (reports an empty row) if the input
3723 // row is invalid.
3724 const RowInfo rowInfo =
3725 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3726 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3727 rowInfo.numEntries > 0) {
3728 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3729 rowInfo.numEntries,
3730 Access::ReadOnly);
3731 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3732 rowInfo.numEntries,
3733 Access::ReadOnly);
3734 }
3735 else {
3736 indices = global_inds_host_view_type();
3737 values = values_host_view_type();
3738 }
3739
3740#ifdef HAVE_TPETRA_DEBUG
3741 const char suffix[] = ". This should never happen. Please report this "
3742 "bug to the Tpetra developers.";
3743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3744 (static_cast<size_t> (indices.size ()) !=
3745 static_cast<size_t> (values.size ()), std::logic_error,
3746 "At the end of this method, for global row " << globalRow << ", "
3747 "indices.size() = " << indices.size () << " != values.size () = "
3748 << values.size () << suffix);
3749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3750 (static_cast<size_t> (indices.size ()) !=
3751 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3752 "At the end of this method, for global row " << globalRow << ", "
3753 "indices.size() = " << indices.size () << " != rowInfo.numEntries = "
3754 << rowInfo.numEntries << suffix);
3755 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3757 (rowInfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3758 "of this method, for global row " << globalRow << ", rowInfo.numEntries "
3759 "= " << rowInfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3760 " " << expectedNumEntries << suffix);
3761#endif // HAVE_TPETRA_DEBUG
3762 }
3763
3764#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3765 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3766 void
3768 getGlobalRowView (GlobalOrdinal globalRow,
3769 Teuchos::ArrayView<const GlobalOrdinal>& indices,
3770 Teuchos::ArrayView<const Scalar>& values) const
3771 {
3772 using Teuchos::ArrayView;
3773 using Teuchos::av_reinterpret_cast;
3774 typedef GlobalOrdinal GO;
3775 const char tfecfFuncName[] = "getGlobalRowView: ";
3776
3777 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3778 isLocallyIndexed (), std::runtime_error,
3779 "The matrix is locally indexed, so we cannot return a view of the row "
3780 "with global column indices. Use getGlobalRowCopy() instead.");
3781 indices = Teuchos::null;
3782 values = Teuchos::null;
3783 const RowInfo rowinfo =
3784 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3785 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3786 rowinfo.numEntries > 0) {
3787 ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
3788 ArrayView<const Scalar> valTmp =
3789 av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3790#ifdef HAVE_TPETRA_DEBUG
3791 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3792 (static_cast<size_t> (indTmp.size ()) < rowinfo.numEntries ||
3793 static_cast<size_t> (valTmp.size ()) < rowinfo.numEntries,
3794 std::logic_error, std::endl << "rowinfo.numEntries not accurate. "
3795 << std::endl << "indTmp.size() = " << indTmp.size ()
3796 << ", valTmp.size() = " << valTmp.size ()
3797 << ", rowinfo.numEntries = " << rowinfo.numEntries << ".");
3798#endif // HAVE_TPETRA_DEBUG
3799 indices = indTmp (0, rowinfo.numEntries);
3800 values = valTmp (0, rowinfo.numEntries);
3801 }
3802
3803#ifdef HAVE_TPETRA_DEBUG
3804 const char suffix[] = ". This should never happen. Please report this "
3805 "bug to the Tpetra developers.";
3806 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3807 (static_cast<size_t> (indices.size ()) !=
3808 static_cast<size_t> (values.size ()), std::logic_error,
3809 "At the end of this method, for global row " << globalRow << ", "
3810 "indices.size() = " << indices.size () << " != values.size () = "
3811 << values.size () << suffix);
3812 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3813 (static_cast<size_t> (indices.size ()) !=
3814 static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3815 "At the end of this method, for global row " << globalRow << ", "
3816 "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3817 << rowinfo.numEntries << suffix);
3818 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3819 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3820 (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3821 "of this method, for global row " << globalRow << ", rowinfo.numEntries "
3822 "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3823 " " << expectedNumEntries << suffix);
3824#endif // HAVE_TPETRA_DEBUG
3825 }
3826#endif // TPETRA_ENABLE_DEPRECATED_CODE
3827
3828 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3829 void
3831 scale (const Scalar& alpha)
3832 {
3833 const char tfecfFuncName[] = "scale: ";
3834 const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3835
3836 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3837 ! isFillActive (), std::runtime_error,
3838 "Fill must be active before you may call this method. "
3839 "Please call resumeFill() to make fill active.");
3840
3841 const size_t nlrs = staticGraph_->getNodeNumRows ();
3842 const size_t numEntries = staticGraph_->getNodeNumEntries ();
3843 if (! staticGraph_->indicesAreAllocated () ||
3844 nlrs == 0 || numEntries == 0) {
3845 // do nothing
3846 }
3847 else {
3849 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3850 KokkosBlas::scal(vals, theAlpha, vals);
3851
3852 }
3853 }
3854
3855 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3856 void
3858 setAllToScalar (const Scalar& alpha)
3859 {
3860 const char tfecfFuncName[] = "setAllToScalar: ";
3861 const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3863 ! isFillActive (), std::runtime_error,
3864 "Fill must be active before you may call this method. "
3865 "Please call resumeFill() to make fill active.");
3866
3867 // replace all values in the matrix
3868 // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
3869 // however, if there are no valid entries, we can short-circuit
3870 // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
3871 const size_t numEntries = staticGraph_->getNodeNumEntries();
3872 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3873 // do nothing
3874 }
3875 else {
3876 Kokkos::deep_copy (valuesUnpacked_wdv.getDeviceView(Access::OverwriteAll),
3877 theAlpha);
3878 }
3879 }
3880
3881 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3882 void
3884 setAllValues (const typename local_graph_device_type::row_map_type& rowPointers,
3885 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3886 const typename local_matrix_device_type::values_type& values)
3887 {
3888 const char tfecfFuncName[] = "setAllValues: ";
3889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3890 (columnIndices.size () != values.size (), std::invalid_argument,
3891 "columnIndices.size() = " << columnIndices.size () << " != values.size()"
3892 " = " << values.size () << ".");
3893 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3894 (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
3895
3896 try {
3897 myGraph_->setAllIndices (rowPointers, columnIndices);
3898 }
3899 catch (std::exception &e) {
3900 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3901 (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
3902 "exception: " << e.what ());
3903 }
3904 // Make sure that myGraph_ now has a local graph. It may not be
3905 // fillComplete yet, so it's important to check. We don't care
3906 // whether setAllIndices() did a shallow copy or a deep copy, so a
3907 // good way to check is to compare dimensions.
3908 auto lclGraph = myGraph_->getLocalGraphDevice ();
3909 const size_t numEnt = lclGraph.entries.extent (0);
3910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3911 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3912 numEnt != static_cast<size_t> (columnIndices.extent (0)),
3913 std::logic_error, "myGraph_->setAllIndices() did not correctly create "
3914 "local graph. Please report this bug to the Tpetra developers.");
3915
3916 valuesPacked_wdv = values_wdv_type(values);
3917 valuesUnpacked_wdv = valuesPacked_wdv;
3918
3919 // FIXME (22 Jun 2016) I would very much like to get rid of
3920 // k_values1D_ at some point. I find it confusing to have all
3921 // these extra references lying around.
3922// k_values1D_ = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3923
3924 // Storage MUST be packed, since the interface doesn't give any
3925 // way to indicate any extra space at the end of each row.
3926 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3927
3928 checkInternalState ();
3929 }
3930
3931 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3932 void
3934 setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
3935 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3936 const Teuchos::ArrayRCP<Scalar>& val)
3937 {
3938 using Kokkos::Compat::getKokkosViewDeepCopy;
3939 using Teuchos::ArrayRCP;
3940 using Teuchos::av_reinterpret_cast;
3941 typedef device_type DT;
3942 typedef impl_scalar_type IST;
3943 typedef typename local_graph_device_type::row_map_type row_map_type;
3944 //typedef typename row_map_type::non_const_value_type row_offset_type;
3945 const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3946
3947 // The row offset type may depend on the execution space. It may
3948 // not necessarily be size_t. If it's not, we need to make a deep
3949 // copy. We need to make a deep copy anyway so that Kokkos can
3950 // own the memory. Regardless, ptrIn gets the copy.
3951 typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
3952 Kokkos::View<const size_t*,
3953 typename row_map_type::array_layout,
3954 Kokkos::HostSpace,
3955 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3956 ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
3957
3958 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3959 (ptrNative.extent (0) != ptrSizeT.extent (0),
3960 std::logic_error, "ptrNative.extent(0) = " <<
3961 ptrNative.extent (0) << " != ptrSizeT.extent(0) = "
3962 << ptrSizeT.extent (0) << ". Please report this bug to the "
3963 "Tpetra developers.");
3964
3965 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3966 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3967 this->setAllValues (ptrNative, indIn, valIn);
3969
3970 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3971 void
3973 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
3974 {
3975 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
3976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3977 (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
3978
3979 // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
3980 // this method in CrsGraph too, so don't call it (otherwise build
3981 // warnings will show up and annoy users). Instead, copy results
3982 // in and out, if the memory space requires it.
3983
3984 const size_t lclNumRows = staticGraph_->getNodeNumRows ();
3985 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3986 offsets.resize (lclNumRows);
3987 }
3988
3989 // The input ArrayRCP must always be a host pointer. Thus, if
3990 // device_type::memory_space is Kokkos::HostSpace, it's OK for us
3991 // to write to that allocation directly as a Kokkos::View.
3992 typedef typename device_type::memory_space memory_space;
3993 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3994 // It is always syntactically correct to assign a raw host
3995 // pointer to a device View, so this code will compile correctly
3996 // even if this branch never runs.
3997 typedef Kokkos::View<size_t*, device_type,
3998 Kokkos::MemoryUnmanaged> output_type;
3999 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
4000 staticGraph_->getLocalDiagOffsets (offsetsOut);
4002 else {
4003 Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
4004 staticGraph_->getLocalDiagOffsets (offsetsTmp);
4005 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
4006 Kokkos::MemoryUnmanaged> output_type;
4007 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
4008 Kokkos::deep_copy (offsetsOut, offsetsTmp);
4009 }
4010 }
4011
4012 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4013 void
4016 {
4017 using Teuchos::ArrayRCP;
4018 using Teuchos::ArrayView;
4019 using Teuchos::av_reinterpret_cast;
4020 const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
4021 typedef local_ordinal_type LO;
4023
4024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4025 staticGraph_.is_null (), std::runtime_error,
4026 "This method requires that the matrix have a graph.");
4027 auto rowMapPtr = this->getRowMap ();
4028 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
4029 // Processes on which the row Map or its communicator is null
4030 // don't participate. Users shouldn't even call this method on
4031 // those processes.
4032 return;
4033 }
4034 auto colMapPtr = this->getColMap ();
4035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4036 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
4037 "This method requires that the matrix have a column Map.");
4038 const map_type& rowMap = * rowMapPtr;
4039 const map_type& colMap = * colMapPtr;
4040 const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4041
4042#ifdef HAVE_TPETRA_DEBUG
4043 // isCompatible() requires an all-reduce, and thus this check
4044 // should only be done in debug mode.
4045 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4046 ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4047 "The input Vector's Map must be compatible with the CrsMatrix's row "
4048 "Map. You may check this by using Map's isCompatible method: "
4049 "diag.getMap ()->isCompatible (A.getRowMap ());");
4050#endif // HAVE_TPETRA_DEBUG
4051
4052 if (this->isFillComplete ()) {
4053 const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
4054 // 1-D subview of the first (and only) column of D_lcl.
4055 const auto D_lcl_1d =
4056 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
4057
4058 const auto lclRowMap = rowMap.getLocalMap ();
4059 const auto lclColMap = colMap.getLocalMap ();
4061 (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
4062 lclColMap,
4063 getLocalMatrixDevice ());
4064 }
4065 else {
4068 }
4069 }
4070
4071 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4072 void
4075 const Kokkos::View<const size_t*, device_type,
4076 Kokkos::MemoryUnmanaged>& offsets) const
4077 {
4078 typedef LocalOrdinal LO;
4079
4080#ifdef HAVE_TPETRA_DEBUG
4081 const char tfecfFuncName[] = "getLocalDiagCopy: ";
4082 const map_type& rowMap = * (this->getRowMap ());
4083 // isCompatible() requires an all-reduce, and thus this check
4084 // should only be done in debug mode.
4085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4086 ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4087 "The input Vector's Map must be compatible with (in the sense of Map::"
4088 "isCompatible) the CrsMatrix's row Map.");
4089#endif // HAVE_TPETRA_DEBUG
4090
4091 // For now, we fill the Vector on the host and sync to device.
4092 // Later, we may write a parallel kernel that works entirely on
4093 // device.
4094 //
4095 // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
4096 // we write a device kernel, it will not need to assume UVM.
4097
4098 auto D_lcl = diag.getLocalViewDevice (Access::OverwriteAll);
4099 const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4100 // Get 1-D subview of the first (and only) column of D_lcl.
4101 auto D_lcl_1d =
4102 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
4103
4104 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
4105 getLocalMatrixDevice ());
4106 }
4107
4108 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4109 void
4112 const Teuchos::ArrayView<const size_t>& offsets) const
4113 {
4114 using LO = LocalOrdinal;
4115 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
4116 using IST = impl_scalar_type;
4117
4118#ifdef HAVE_TPETRA_DEBUG
4119 const char tfecfFuncName[] = "getLocalDiagCopy: ";
4120 const map_type& rowMap = * (this->getRowMap ());
4121 // isCompatible() requires an all-reduce, and thus this check
4122 // should only be done in debug mode.
4123 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4124 ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4125 "The input Vector's Map must be compatible with (in the sense of Map::"
4126 "isCompatible) the CrsMatrix's row Map.");
4127#endif // HAVE_TPETRA_DEBUG
4128
4129 // See #1510. In case diag has already been marked modified on
4130 // device, we need to clear that flag, since the code below works
4131 // on host.
4132 //diag.clear_sync_state ();
4133
4134 // For now, we fill the Vector on the host and sync to device.
4135 // Later, we may write a parallel kernel that works entirely on
4136 // device.
4137 auto lclVecHost = diag.getLocalViewHost(Access::OverwriteAll);
4138 // 1-D subview of the first (and only) column of lclVecHost.
4139 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
4140
4141 using host_offsets_view_type =
4142 Kokkos::View<const size_t*, Kokkos::HostSpace,
4143 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
4144 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
4145 // Find the diagonal entries and put them in lclVecHost1d.
4146 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4147 const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4148 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
4149
4150 auto rowPtrsPackedHost = staticGraph_->rowPtrsPacked_host_;
4151 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
4152 Kokkos::parallel_for
4153 ("Tpetra::CrsMatrix::getLocalDiagCopy",
4154 range_type (0, myNumRows),
4155 [&, INV, h_offsets] (const LO lclRow) { // Value capture is a workaround for cuda + gcc-7.2 compiler bug w/c++14
4156 lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
4157 if (h_offsets[lclRow] != INV) {
4158 auto curRowOffset = rowPtrsPackedHost (lclRow);
4159 lclVecHost1d(lclRow) =
4160 static_cast<IST> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
4161 }
4162 });
4163 //diag.sync_device ();
4164 }
4165
4166
4167 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4168 void
4171 {
4172 using ::Tpetra::Details::ProfilingRegion;
4173 using Teuchos::ArrayRCP;
4174 using Teuchos::ArrayView;
4175 using Teuchos::null;
4176 using Teuchos::RCP;
4177 using Teuchos::rcp;
4178 using Teuchos::rcpFromRef;
4180 const char tfecfFuncName[] = "leftScale: ";
4181
4182 ProfilingRegion region ("Tpetra::CrsMatrix::leftScale");
4183
4184 RCP<const vec_type> xp;
4185 if (this->getRangeMap ()->isSameAs (* (x.getMap ()))) {
4186 // Take from Epetra: If we have a non-trivial exporter, we must
4187 // import elements that are permuted or are on other processors.
4188 auto exporter = this->getCrsGraphRef ().getExporter ();
4189 if (exporter.get () != nullptr) {
4190 RCP<vec_type> tempVec (new vec_type (this->getRowMap ()));
4191 tempVec->doImport (x, *exporter, REPLACE); // reverse mode
4192 xp = tempVec;
4193 }
4194 else {
4195 xp = rcpFromRef (x);
4196 }
4197 }
4198 else if (this->getRowMap ()->isSameAs (* (x.getMap ()))) {
4199 xp = rcpFromRef (x);
4200 }
4201 else {
4202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4203 (true, std::invalid_argument, "x's Map must be the same as "
4204 "either the row Map or the range Map of the CrsMatrix.");
4205 }
4206
4207 if (this->isFillComplete()) {
4208 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
4209 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4211 leftScaleLocalCrsMatrix (getLocalMatrixDevice (),
4212 x_lcl_1d, false, false);
4213 }
4214 else {
4215 // 6/2020 Disallow leftScale of non-fillComplete matrices #7446
4216 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4217 (true, std::runtime_error, "CrsMatrix::leftScale requires matrix to be"
4218 " fillComplete");
4219 }
4220 }
4221
4222 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4223 void
4226 {
4227 using ::Tpetra::Details::ProfilingRegion;
4228 using Teuchos::ArrayRCP;
4229 using Teuchos::ArrayView;
4230 using Teuchos::null;
4231 using Teuchos::RCP;
4232 using Teuchos::rcp;
4233 using Teuchos::rcpFromRef;
4235 const char tfecfFuncName[] = "rightScale: ";
4236
4237 ProfilingRegion region ("Tpetra::CrsMatrix::rightScale");
4238
4239 RCP<const vec_type> xp;
4240 if (this->getDomainMap ()->isSameAs (* (x.getMap ()))) {
4241 // Take from Epetra: If we have a non-trivial exporter, we must
4242 // import elements that are permuted or are on other processors.
4243 auto importer = this->getCrsGraphRef ().getImporter ();
4244 if (importer.get () != nullptr) {
4245 RCP<vec_type> tempVec (new vec_type (this->getColMap ()));
4246 tempVec->doImport (x, *importer, REPLACE);
4247 xp = tempVec;
4248 }
4249 else {
4250 xp = rcpFromRef (x);
4251 }
4252 }
4253 else if (this->getColMap ()->isSameAs (* (x.getMap ()))) {
4254 xp = rcpFromRef (x);
4255 } else {
4256 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4257 (true, std::runtime_error, "x's Map must be the same as "
4258 "either the domain Map or the column Map of the CrsMatrix.");
4259 }
4260
4261 if (this->isFillComplete()) {
4262 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
4263 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4265 rightScaleLocalCrsMatrix (getLocalMatrixDevice (),
4266 x_lcl_1d, false, false);
4267 }
4268 else {
4269 // 6/2020 Disallow rightScale of non-fillComplete matrices #7446
4270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4271 (true, std::runtime_error, "CrsMatrix::rightScale requires matrix to be"
4272 " fillComplete");
4273 }
4274 }
4275
4276 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4279 getFrobeniusNorm () const
4280 {
4281 using Teuchos::ArrayView;
4282 using Teuchos::outArg;
4283 using Teuchos::REDUCE_SUM;
4284 using Teuchos::reduceAll;
4285
4286 // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
4287 // local part of this computation. It could make sense to put
4288 // this operation in the Kokkos::CrsMatrix.
4289
4290 // check the cache first
4291 mag_type frobNorm = frobNorm_;
4292 if (frobNorm == -STM::one ()) {
4293 mag_type mySum = STM::zero ();
4294 if (getNodeNumEntries() > 0) {
4295 if (isStorageOptimized ()) {
4296 // "Optimized" storage is packed storage. That means we can
4297 // iterate in one pass through the 1-D values array.
4298 const size_t numEntries = getNodeNumEntries ();
4299 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
4300 for (size_t k = 0; k < numEntries; ++k) {
4301 auto val = values[k];
4302 // Note (etp 06 Jan 2015) We need abs() here for composite types
4303 // (in general, if mag_type is on the left-hand-side, we need
4304 // abs() on the right-hand-side)
4305 const mag_type val_abs = STS::abs (val);
4306 mySum += val_abs * val_abs;
4307 }
4308 }
4309 else {
4310 const LocalOrdinal numRows =
4311 static_cast<LocalOrdinal> (this->getNodeNumRows ());
4312 for (LocalOrdinal r = 0; r < numRows; ++r) {
4313 const RowInfo rowInfo = myGraph_->getRowInfo (r);
4314 const size_t numEntries = rowInfo.numEntries;
4315 auto A_r = this->getValuesViewHost(rowInfo);
4316 for (size_t k = 0; k < numEntries; ++k) {
4317 const impl_scalar_type val = A_r[k];
4318 const mag_type val_abs = STS::abs (val);
4319 mySum += val_abs * val_abs;
4320 }
4321 }
4322 }
4323 }
4324 mag_type totalSum = STM::zero ();
4325 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
4326 mySum, outArg (totalSum));
4327 frobNorm = STM::sqrt (totalSum);
4328 }
4329 if (isFillComplete ()) {
4330 // Only cache the result if the matrix is fill complete.
4331 // Otherwise, the values might still change. resumeFill clears
4332 // the cache.
4333 frobNorm_ = frobNorm;
4334 }
4335 return frobNorm;
4336 }
4337
4338 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4339 void
4341 replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4342 {
4343 const char tfecfFuncName[] = "replaceColMap: ";
4344 // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
4345 // Then replacing the column Map might mean that we need to
4346 // reindex the column indices.
4347 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4348 myGraph_.is_null (), std::runtime_error,
4349 "This method does not work if the matrix has a const graph. The whole "
4350 "idea of a const graph is that you are not allowed to change it, but "
4351 "this method necessarily must modify the graph, since the graph owns "
4352 "the matrix's column Map.");
4353 myGraph_->replaceColMap (newColMap);
4354 }
4355
4356 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4357 void
4359 reindexColumns (crs_graph_type* const graph,
4360 const Teuchos::RCP<const map_type>& newColMap,
4361 const Teuchos::RCP<const import_type>& newImport,
4362 const bool sortEachRow)
4363 {
4364 const char tfecfFuncName[] = "reindexColumns: ";
4365 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4366 graph == nullptr && myGraph_.is_null (), std::invalid_argument,
4367 "The input graph is null, but the matrix does not own its graph.");
4368
4369 crs_graph_type& theGraph = (graph == nullptr) ? *myGraph_ : *graph;
4370 const bool sortGraph = false; // we'll sort graph & matrix together below
4371
4372 theGraph.reindexColumns (newColMap, newImport, sortGraph);
4373
4374 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
4375 const LocalOrdinal lclNumRows =
4376 static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
4377
4378 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4379
4380 const RowInfo rowInfo = theGraph.getRowInfo (row);
4381 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
4382 auto vals = this->getValuesViewHostNonConst (rowInfo);
4383
4384 sort2 (lclColInds.data (),
4385 lclColInds.data () + rowInfo.numEntries,
4386 vals.data ());
4387 }
4388 theGraph.indicesAreSorted_ = true;
4389 }
4390 }
4391
4392 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4393 void
4395 replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4396 {
4397 const char tfecfFuncName[] = "replaceDomainMap: ";
4398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4399 myGraph_.is_null (), std::runtime_error,
4400 "This method does not work if the matrix has a const graph. The whole "
4401 "idea of a const graph is that you are not allowed to change it, but this"
4402 " method necessarily must modify the graph, since the graph owns the "
4403 "matrix's domain Map and Import objects.");
4404 myGraph_->replaceDomainMap (newDomainMap);
4405 }
4406
4407 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4408 void
4410 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4411 Teuchos::RCP<const import_type>& newImporter)
4412 {
4413 const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
4414 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4415 myGraph_.is_null (), std::runtime_error,
4416 "This method does not work if the matrix has a const graph. The whole "
4417 "idea of a const graph is that you are not allowed to change it, but this"
4418 " method necessarily must modify the graph, since the graph owns the "
4419 "matrix's domain Map and Import objects.");
4420 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
4421 }
4422
4423 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4424 void
4426 replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4427 {
4428 const char tfecfFuncName[] = "replaceRangeMap: ";
4429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4430 myGraph_.is_null (), std::runtime_error,
4431 "This method does not work if the matrix has a const graph. The whole "
4432 "idea of a const graph is that you are not allowed to change it, but this"
4433 " method necessarily must modify the graph, since the graph owns the "
4434 "matrix's domain Map and Import objects.");
4435 myGraph_->replaceRangeMap (newRangeMap);
4436 }
4437
4438 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4439 void
4441 replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4442 Teuchos::RCP<const export_type>& newExporter)
4443 {
4444 const char tfecfFuncName[] = "replaceRangeMapAndExporter: ";
4445 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4446 myGraph_.is_null (), std::runtime_error,
4447 "This method does not work if the matrix has a const graph. The whole "
4448 "idea of a const graph is that you are not allowed to change it, but this"
4449 " method necessarily must modify the graph, since the graph owns the "
4450 "matrix's domain Map and Import objects.");
4451 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
4452 }
4453
4454 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4455 void
4457 insertNonownedGlobalValues (const GlobalOrdinal globalRow,
4458 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4459 const Teuchos::ArrayView<const Scalar>& values)
4460 {
4461 using Teuchos::Array;
4462 typedef GlobalOrdinal GO;
4463 typedef typename Array<GO>::size_type size_type;
4464
4465 const size_type numToInsert = indices.size ();
4466 // Add the new data to the list of nonlocals.
4467 // This creates the arrays if they don't exist yet.
4468 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4469 Array<GO>& curRowInds = curRow.first;
4470 Array<Scalar>& curRowVals = curRow.second;
4471 const size_type newCapacity = curRowInds.size () + numToInsert;
4472 curRowInds.reserve (newCapacity);
4473 curRowVals.reserve (newCapacity);
4474 for (size_type k = 0; k < numToInsert; ++k) {
4475 curRowInds.push_back (indices[k]);
4476 curRowVals.push_back (values[k]);
4477 }
4478 }
4479
4480 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4481 void
4484 {
4485 using Details::Behavior;
4487 using Teuchos::Comm;
4488 using Teuchos::outArg;
4489 using Teuchos::RCP;
4490 using Teuchos::rcp;
4491 using Teuchos::REDUCE_MAX;
4492 using Teuchos::REDUCE_MIN;
4493 using Teuchos::reduceAll;
4494 using std::endl;
4496 //typedef LocalOrdinal LO;
4497 typedef GlobalOrdinal GO;
4498 typedef typename Teuchos::Array<GO>::size_type size_type;
4499 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
4500 ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble");
4501
4502 const bool verbose = Behavior::verbose("CrsMatrix");
4503 std::unique_ptr<std::string> prefix;
4504 if (verbose) {
4505 prefix = this->createPrefix("CrsMatrix", "globalAssemble");
4506 std::ostringstream os;
4507 os << *prefix << "nonlocals_.size()=" << nonlocals_.size()
4508 << endl;
4509 std::cerr << os.str();
4510 }
4511 RCP<const Comm<int> > comm = getComm ();
4512
4513 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4514 (! isFillActive (), std::runtime_error, "Fill must be active before "
4515 "you may call this method.");
4516
4517 const size_t myNumNonlocalRows = nonlocals_.size ();
4518
4519 // If no processes have nonlocal rows, then we don't have to do
4520 // anything. Checking this is probably cheaper than constructing
4521 // the Map of nonlocal rows (see below) and noticing that it has
4522 // zero global entries.
4523 {
4524 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4525 int someoneHasNonlocalRows = 0;
4526 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4527 outArg (someoneHasNonlocalRows));
4528 if (someoneHasNonlocalRows == 0) {
4529 return; // no process has nonlocal rows, so nothing to do
4530 }
4531 }
4532
4533 // 1. Create a list of the "nonlocal" rows on each process. this
4534 // requires iterating over nonlocals_, so while we do this,
4535 // deduplicate the entries and get a count for each nonlocal
4536 // row on this process.
4537 // 2. Construct a new row Map corresponding to those rows. This
4538 // Map is likely overlapping. We know that the Map is not
4539 // empty on all processes, because the above all-reduce and
4540 // return exclude that case.
4541
4542 RCP<const map_type> nonlocalRowMap;
4543 // Keep this for CrsGraph's constructor, so we can use StaticProfile.
4544 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4545 {
4546 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4547 size_type curPos = 0;
4548 for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4549 ++mapIter, ++curPos) {
4550 myNonlocalGblRows[curPos] = mapIter->first;
4551 // Get the values and column indices by reference, since we
4552 // intend to change them in place (that's what "erase" does).
4553 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4554 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4555
4556 // Sort both arrays jointly, using the column indices as keys,
4557 // then merge them jointly. "Merge" here adds values
4558 // corresponding to the same column indices. The first 2 args
4559 // of merge2 are output arguments that work just like the
4560 // return value of std::unique.
4561 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4562 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4563 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4564 merge2 (gblCols_newEnd, vals_newEnd,
4565 gblCols.begin (), gblCols.end (),
4566 vals.begin (), vals.end ());
4567 gblCols.erase (gblCols_newEnd, gblCols.end ());
4568 vals.erase (vals_newEnd, vals.end ());
4569 numEntPerNonlocalRow[curPos] = gblCols.size ();
4570 }
4571
4572 // Currently, Map requires that its indexBase be the global min
4573 // of all its global indices. Map won't compute this for us, so
4574 // we must do it. If our process has no nonlocal rows, set the
4575 // "min" to the max possible GO value. This ensures that if
4576 // some process has at least one nonlocal row, then it will pick
4577 // that up as the min. We know that at least one process has a
4578 // nonlocal row, since the all-reduce and return at the top of
4579 // this method excluded that case.
4580 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4581 {
4582 auto iter = std::min_element (myNonlocalGblRows.begin (),
4583 myNonlocalGblRows.end ());
4584 if (iter != myNonlocalGblRows.end ()) {
4585 myMinNonlocalGblRow = *iter;
4586 }
4587 }
4588 GO gblMinNonlocalGblRow = 0;
4589 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4590 outArg (gblMinNonlocalGblRow));
4591 const GO indexBase = gblMinNonlocalGblRow;
4592 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4593 nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4594 }
4595
4596 // 3. Use the values and column indices for each nonlocal row, as
4597 // stored in nonlocals_, to construct a CrsMatrix corresponding
4598 // to nonlocal rows. We may use StaticProfile, since we have
4599 // exact counts of the number of entries in each nonlocal row.
4600
4601 if (verbose) {
4602 std::ostringstream os;
4603 os << *prefix << "Create nonlocal matrix" << endl;
4604 std::cerr << os.str();
4605 }
4606 RCP<crs_matrix_type> nonlocalMatrix =
4607 rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow (),
4608 StaticProfile));
4609 {
4610 size_type curPos = 0;
4611 for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4612 ++mapIter, ++curPos) {
4613 const GO gblRow = mapIter->first;
4614 // Get values & column indices by ref, just to avoid copy.
4615 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4616 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4617 //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
4618 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4619 }
4620 }
4621 // There's no need to fill-complete the nonlocals matrix.
4622 // We just use it as a temporary container for the Export.
4623
4624 // 4. If the original row Map is one to one, then we can Export
4625 // directly from nonlocalMatrix into this. Otherwise, we have
4626 // to create a temporary matrix with a one-to-one row Map,
4627 // Export into that, then Import from the temporary matrix into
4628 // *this.
4629
4630 auto origRowMap = this->getRowMap ();
4631 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4632
4633 int isLocallyComplete = 1; // true by default
4634
4635 if (origRowMapIsOneToOne) {
4636 if (verbose) {
4637 std::ostringstream os;
4638 os << *prefix << "Original row Map is 1-to-1" << endl;
4639 std::cerr << os.str();
4640 }
4641 export_type exportToOrig (nonlocalRowMap, origRowMap);
4642 if (! exportToOrig.isLocallyComplete ()) {
4643 isLocallyComplete = 0;
4644 }
4645 if (verbose) {
4646 std::ostringstream os;
4647 os << *prefix << "doExport from nonlocalMatrix" << endl;
4648 std::cerr << os.str();
4649 }
4650 this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
4651 // We're done at this point!
4652 }
4653 else {
4654 if (verbose) {
4655 std::ostringstream os;
4656 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
4657 std::cerr << os.str();
4658 }
4659 // If you ask a Map whether it is one to one, it does some
4660 // communication and stashes intermediate results for later use
4661 // by createOneToOne. Thus, calling createOneToOne doesn't cost
4662 // much more then the original cost of calling isOneToOne.
4663 auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
4664 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4665 if (! exportToOneToOne.isLocallyComplete ()) {
4666 isLocallyComplete = 0;
4667 }
4668
4669 // Create a temporary matrix with the one-to-one row Map.
4670 //
4671 // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
4672 // each row, to avoid reallocation during the Export operation.
4673 if (verbose) {
4674 std::ostringstream os;
4675 os << *prefix << "Create & doExport into 1-to-1 matrix"
4676 << endl;
4677 std::cerr << os.str();
4678 }
4679 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4680 // Export from matrix of nonlocals into the temp one-to-one matrix.
4681 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4682 Tpetra::ADD);
4683
4684 // We don't need the matrix of nonlocals anymore, so get rid of
4685 // it, to keep the memory high-water mark down.
4686 if (verbose) {
4687 std::ostringstream os;
4688 os << *prefix << "Free nonlocalMatrix" << endl;
4689 std::cerr << os.str();
4690 }
4691 nonlocalMatrix = Teuchos::null;
4692
4693 // Import from the one-to-one matrix to the original matrix.
4694 if (verbose) {
4695 std::ostringstream os;
4696 os << *prefix << "doImport from 1-to-1 matrix" << endl;
4697 std::cerr << os.str();
4698 }
4699 import_type importToOrig (oneToOneRowMap, origRowMap);
4700 this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
4701 }
4702
4703 // It's safe now to clear out nonlocals_, since we've already
4704 // committed side effects to *this. The standard idiom for
4705 // clearing a Container like std::map, is to swap it with an empty
4706 // Container and let the swapped Container fall out of scope.
4707 if (verbose) {
4708 std::ostringstream os;
4709 os << *prefix << "Free nonlocals_ (std::map)" << endl;
4710 std::cerr << os.str();
4711 }
4712 decltype (nonlocals_) newNonlocals;
4713 std::swap (nonlocals_, newNonlocals);
4714
4715 // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
4716 // don't like throwing an exception here. A local return value
4717 // would likely be more useful to users. However, if users find
4718 // themselves exercising nonlocal inserts often, then they are
4719 // probably novice users who need the help. See Gibhub Issues
4720 // #603 and #601 (esp. the latter) for discussion.
4721
4722 int isGloballyComplete = 0; // output argument of reduceAll
4723 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4724 outArg (isGloballyComplete));
4725 TEUCHOS_TEST_FOR_EXCEPTION
4726 (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
4727 "you called insertGlobalValues with a global row index which is not in "
4728 "the matrix's row Map on any process in its communicator.");
4729 }
4730
4731 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4732 void
4734 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
4735 {
4736 if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
4737 myGraph_->resumeFill (params);
4738 }
4739 clearGlobalConstants ();
4740 fillComplete_ = false;
4741 }
4742
4743 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4744 void
4747 {
4748 // This method doesn't do anything. The analogous method in
4749 // CrsGraph does actually compute something.
4750 //
4751 // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
4752 // setting it to -1), but computeGlobalConstants() does _not_
4753 // compute the Frobenius norm; this is done on demand in
4754 // getFrobeniusNorm(), and the result is cached there.
4755 }
4756
4757 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4758 bool
4760 haveGlobalConstants() const {
4761 return getCrsGraphRef ().haveGlobalConstants ();
4762 }
4763
4764 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4765 void
4768 // We use -1 to indicate that the Frobenius norm needs to be
4769 // recomputed, since the values might change between now and the
4770 // next fillComplete call.
4771 //
4772 // Oddly enough, clearGlobalConstants() clears frobNorm_, but
4773 // computeGlobalConstants() does _not_ compute the Frobenius norm;
4774 // this is done on demand in getFrobeniusNorm(), and the result is
4775 // cached there.
4776 frobNorm_ = -STM::one ();
4777 }
4778
4779 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4780 void
4782 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
4783 {
4784 const char tfecfFuncName[] = "fillComplete(params): ";
4785
4786 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4787 (this->getCrsGraph ().is_null (), std::logic_error,
4788 "getCrsGraph() returns null. This should not happen at this point. "
4789 "Please report this bug to the Tpetra developers.");
4790
4791 const crs_graph_type& graph = this->getCrsGraphRef ();
4792 if (this->isStaticGraph () && graph.isFillComplete ()) {
4793 // If this matrix's graph is fill complete and the user did not
4794 // supply a domain or range Map, use the graph's domain and
4795 // range Maps.
4796 this->fillComplete (graph.getDomainMap (), graph.getRangeMap (), params);
4797 }
4798 else { // assume that user's row Map is the domain and range Map
4799 Teuchos::RCP<const map_type> rangeMap = graph.getRowMap ();
4800 Teuchos::RCP<const map_type> domainMap = rangeMap;
4801 this->fillComplete (domainMap, rangeMap, params);
4802 }
4803 }
4804
4805 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4806 void
4808 fillComplete (const Teuchos::RCP<const map_type>& domainMap,
4809 const Teuchos::RCP<const map_type>& rangeMap,
4810 const Teuchos::RCP<Teuchos::ParameterList>& params)
4811 {
4812 using Details::Behavior;
4814 using Teuchos::ArrayRCP;
4815 using Teuchos::RCP;
4816 using Teuchos::rcp;
4817 using std::endl;
4818 const char tfecfFuncName[] = "fillComplete: ";
4819 ProfilingRegion regionFillComplete
4820 ("Tpetra::CrsMatrix::fillComplete");
4821 const bool verbose = Behavior::verbose("CrsMatrix");
4822 std::unique_ptr<std::string> prefix;
4823 if (verbose) {
4824 prefix = this->createPrefix("CrsMatrix", "fillComplete(dom,ran,p)");
4825 std::ostringstream os;
4826 os << *prefix << endl;
4827 std::cerr << os.str ();
4828 }
4830 "Tpetra::CrsMatrix::fillCompete",
4831 "fillCompete");
4832
4833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4834 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4835 "Matrix fill state must be active (isFillActive() "
4836 "must be true) before you may call fillComplete().");
4837 const int numProcs = this->getComm ()->getSize ();
4838
4839 //
4840 // Read parameters from the input ParameterList.
4841 //
4842 {
4843 Details::ProfilingRegion region_fc("Tpetra::CrsMatrix::fillCompete", "ParameterList");
4844
4845 // If true, the caller promises that no process did nonlocal
4846 // changes since the last call to fillComplete.
4847 bool assertNoNonlocalInserts = false;
4848 // If true, makeColMap sorts remote GIDs (within each remote
4849 // process' group).
4850 bool sortGhosts = true;
4851
4852 if (! params.is_null ()) {
4853 assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
4854 assertNoNonlocalInserts);
4855 if (params->isParameter ("sort column map ghost gids")) {
4856 sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
4857 }
4858 else if (params->isParameter ("Sort column Map ghost GIDs")) {
4859 sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
4860 }
4861 }
4862 // We also don't need to do global assembly if there is only one
4863 // process in the communicator.
4864 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4865 // This parameter only matters if this matrix owns its graph.
4866 if (! this->myGraph_.is_null ()) {
4867 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4868 }
4869
4870 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4871 if (this->hasColMap ()) { // use local indices
4872 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4873 }
4874 else { // no column Map, so use global indices
4875 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4876 }
4877 }
4878 // Global assemble, if we need to. This call only costs a single
4879 // all-reduce if we didn't need global assembly after all.
4880 if (needGlobalAssemble) {
4881 this->globalAssemble ();
4882 }
4883 else {
4884 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4885 (numProcs == 1 && nonlocals_.size() > 0,
4886 std::runtime_error, "Cannot have nonlocal entries on a serial run. "
4887 "An invalid entry (i.e., with row index not in the row Map) must have "
4888 "been submitted to the CrsMatrix.");
4889 }
4890 }
4891 if (this->isStaticGraph ()) {
4892 Details::ProfilingRegion region_isg("Tpetra::CrsMatrix::fillCompete", "isStaticGraph");
4893 // FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
4894 // checks below only in debug mode. It would be nicer to do a
4895 // local check, then propagate the error state in a deferred
4896 // way, whenever communication happens. That would reduce the
4897 // cost of checking, to the point where it may make sense to
4898 // enable it even in release mode.
4899#ifdef HAVE_TPETRA_DEBUG
4900 // FIXME (mfh 18 Jun 2014) This check for correctness of the
4901 // input Maps incurs a penalty of two all-reduces for the
4902 // otherwise optimal const graph case.
4903 //
4904 // We could turn these (max) 2 all-reduces into (max) 1, by
4905 // fusing them. We could do this by adding a "locallySameAs"
4906 // method to Map, which would return one of four states:
4907 //
4908 // a. Certainly globally the same
4909 // b. Certainly globally not the same
4910 // c. Locally the same
4911 // d. Locally not the same
4912 //
4913 // The first two states don't require further communication.
4914 // The latter two states require an all-reduce to communicate
4915 // globally, but we only need one all-reduce, since we only need
4916 // to check whether at least one of the Maps is wrong.
4917 const bool domainMapsMatch =
4918 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4919 const bool rangeMapsMatch =
4920 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4921
4922 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4923 (! domainMapsMatch, std::runtime_error,
4924 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4925 "The graph cannot be changed because it was given to the CrsMatrix "
4926 "constructor as const. You can fix this by passing in the graph's "
4927 "domain Map and range Map to the matrix's fillComplete call.");
4928
4929 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4930 (! rangeMapsMatch, std::runtime_error,
4931 "The CrsMatrix's range Map does not match the graph's range Map. "
4932 "The graph cannot be changed because it was given to the CrsMatrix "
4933 "constructor as const. You can fix this by passing in the graph's "
4934 "domain Map and range Map to the matrix's fillComplete call.");
4935#endif // HAVE_TPETRA_DEBUG
4936
4937 // The matrix does _not_ own the graph, and the graph's
4938 // structure is already fixed, so just fill the local matrix.
4939 this->fillLocalMatrix (params);
4940 }
4941 else {
4942 Details::ProfilingRegion region_insg("Tpetra::CrsMatrix::fillCompete", "isNotStaticGraph");
4943 // Set the graph's domain and range Maps. This will clear the
4944 // Import if the domain Map has changed (is a different
4945 // pointer), and the Export if the range Map has changed (is a
4946 // different pointer).
4947 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4948
4949 // Make the graph's column Map, if necessary.
4950 Teuchos::Array<int> remotePIDs (0);
4951 const bool mustBuildColMap = ! this->hasColMap ();
4952 if (mustBuildColMap) {
4953 this->myGraph_->makeColMap (remotePIDs);
4954 }
4955
4956 // Make indices local, if necessary. The method won't do
4957 // anything if the graph is already locally indexed.
4958 const std::pair<size_t, std::string> makeIndicesLocalResult =
4959 this->myGraph_->makeIndicesLocal(verbose);
4960 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
4961 // the error state to makeImportExport or
4962 // computeGlobalConstants, which may do all-reduces and thus may
4963 // have the opportunity to communicate that error state.
4964 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4965 (makeIndicesLocalResult.first != 0, std::runtime_error,
4966 makeIndicesLocalResult.second);
4967
4968 const bool sorted = this->myGraph_->isSorted ();
4969 const bool merged = this->myGraph_->isMerged ();
4970 this->sortAndMergeIndicesAndValues (sorted, merged);
4971
4972 // Make Import and Export objects, if they haven't been made
4973 // already. If we made a column Map above, reuse information
4974 // from that process to avoid communiation in the Import setup.
4975 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4976
4977 // The matrix _does_ own the graph, so fill the local graph at
4978 // the same time as the local matrix.
4979 this->fillLocalGraphAndMatrix (params);
4980
4981 const bool callGraphComputeGlobalConstants = params.get () == nullptr ||
4982 params->get ("compute global constants", true);
4983 if (callGraphComputeGlobalConstants) {
4984 this->myGraph_->computeGlobalConstants ();
4985 }
4986 else {
4987 this->myGraph_->computeLocalConstants ();
4988 }
4989 this->myGraph_->fillComplete_ = true;
4990 this->myGraph_->checkInternalState ();
4991 }
4992
4993 {
4994 Details::ProfilingRegion region_ccgc(
4995 "Tpetra::CrsMatrix::fillCompete", "callComputeGlobalConstamnts"
4996 );
4997 const bool callComputeGlobalConstants = params.get () == nullptr ||
4998 params->get ("compute global constants", true);
4999 if (callComputeGlobalConstants) {
5000 this->computeGlobalConstants ();
5001 }
5002 }
5003
5004 // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
5005
5006 this->fillComplete_ = true; // Now we're fill complete!
5007 {
5008 Details::ProfilingRegion region_cis(
5009 "Tpetra::CrsMatrix::fillCompete", "checkInternalState"
5010 );
5011 this->checkInternalState ();
5012 }
5013 }
5014
5015 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5016 void
5018 expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
5019 const Teuchos::RCP<const map_type> & rangeMap,
5020 const Teuchos::RCP<const import_type>& importer,
5021 const Teuchos::RCP<const export_type>& exporter,
5022 const Teuchos::RCP<Teuchos::ParameterList> &params)
5023 {
5024#ifdef HAVE_TPETRA_MMM_TIMINGS
5025 std::string label;
5026 if(!params.is_null())
5027 label = params->get("Timer Label",label);
5028 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
5029 using Teuchos::TimeMonitor;
5030
5031 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-all")));
5032#endif
5033
5034 const char tfecfFuncName[] = "expertStaticFillComplete: ";
5035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
5036 std::runtime_error, "Matrix fill state must be active (isFillActive() "
5037 "must be true) before calling fillComplete().");
5038 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5039 myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
5040
5041 {
5042#ifdef HAVE_TPETRA_MMM_TIMINGS
5043 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-Graph")));
5044#endif
5045 // We will presume globalAssemble is not needed, so we do the ESFC on the graph
5046 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
5047 }
5048
5049 const bool callComputeGlobalConstants = params.get () == nullptr ||
5050 params->get ("compute global constants", true);
5051 if (callComputeGlobalConstants) {
5052 this->computeGlobalConstants ();
5053 }
5054
5055 {
5056#ifdef HAVE_TPETRA_MMM_TIMINGS
5057 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-fLGAM")));
5058#endif
5059 // Fill the local graph and matrix
5060 fillLocalGraphAndMatrix (params);
5061 }
5062 // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
5063
5064 // Now we're fill complete!
5065 fillComplete_ = true;
5066
5067 // Sanity checks at the end.
5068#ifdef HAVE_TPETRA_DEBUG
5069 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
5070 ": We're at the end of fillComplete(), but isFillActive() is true. "
5071 "Please report this bug to the Tpetra developers.");
5072 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
5073 ": We're at the end of fillComplete(), but isFillActive() is true. "
5074 "Please report this bug to the Tpetra developers.");
5075#endif // HAVE_TPETRA_DEBUG
5076 {
5077#ifdef HAVE_TPETRA_MMM_TIMINGS
5078 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS")));
5079#endif
5080
5081 checkInternalState();
5082 }
5083 }
5084
5085 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5087 mergeRowIndicesAndValues (size_t rowLen, LocalOrdinal* cols, impl_scalar_type* vals)
5088 {
5089 impl_scalar_type* rowValueIter = vals;
5090 // beg,end define a half-exclusive interval over which to iterate.
5091 LocalOrdinal* beg = cols;
5092 LocalOrdinal* end = cols + rowLen;
5093 LocalOrdinal* newend = beg;
5094 if (beg != end) {
5095 LocalOrdinal* cur = beg + 1;
5096 impl_scalar_type* vcur = rowValueIter + 1;
5097 impl_scalar_type* vend = rowValueIter;
5098 cur = beg+1;
5099 while (cur != end) {
5100 if (*cur != *newend) {
5101 // new entry; save it
5102 ++newend;
5103 ++vend;
5104 (*newend) = (*cur);
5105 (*vend) = (*vcur);
5106 }
5107 else {
5108 // old entry; merge it
5109 //(*vend) = f (*vend, *vcur);
5110 (*vend) += *vcur;
5111 }
5112 ++cur;
5113 ++vcur;
5114 }
5115 ++newend; // one past the last entry, per typical [beg,end) semantics
5116 }
5117 return newend - beg;
5118 }
5119
5120 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5121 void
5123 sortAndMergeIndicesAndValues (const bool sorted, const bool merged)
5124 {
5125 using ::Tpetra::Details::ProfilingRegion;
5126 typedef LocalOrdinal LO;
5127 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5128 host_execution_space;
5129 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5130 const char tfecfFuncName[] = "sortAndMergeIndicesAndValues: ";
5131 ProfilingRegion regionSAM ("Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
5132
5133 if (! sorted || ! merged) {
5134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5135 (this->isStaticGraph (), std::runtime_error, "Cannot sort or merge with "
5136 "\"static\" (const) graph, since the matrix does not own the graph.");
5137 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5138 (this->myGraph_.is_null (), std::logic_error, "myGraph_ is null, but "
5139 "this matrix claims ! isStaticGraph(). "
5140 "Please report this bug to the Tpetra developers.");
5141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5142 (this->isStorageOptimized (), std::logic_error, "It is invalid to call "
5143 "this method if the graph's storage has already been optimized. "
5144 "Please report this bug to the Tpetra developers.");
5145
5146 crs_graph_type& graph = * (this->myGraph_);
5147 const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5148 size_t totalNumDups = 0;
5149 {
5150 //Accessing host unpacked (4-array CRS) local matrix.
5151 auto rowBegins_ = graph.rowPtrsUnpacked_host_;
5152 auto rowLengths_ = graph.k_numRowEntries_;
5153 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
5154 auto cols_ = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
5155 Kokkos::parallel_reduce ("sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
5156 [=] (const LO lclRow, size_t& numDups) {
5157 size_t rowBegin = rowBegins_(lclRow);
5158 size_t rowLen = rowLengths_(lclRow);
5159 LO* cols = cols_.data() + rowBegin;
5160 impl_scalar_type* vals = vals_.data() + rowBegin;
5161 if (! sorted) {
5162 sort2 (cols, cols + rowLen, vals);
5163 }
5164 if (! merged) {
5165 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
5166 rowLengths_(lclRow) = newRowLength;
5167 numDups += rowLen - newRowLength;
5168 }
5169 }, totalNumDups);
5170 }
5171 if (! sorted) {
5172 graph.indicesAreSorted_ = true; // we just sorted every row
5173 }
5174 if (! merged) {
5175 graph.noRedundancies_ = true; // we just merged every row
5176 }
5177 }
5178 }
5179
5180 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5181 void
5185 Scalar alpha,
5186 Scalar beta) const
5187 {
5189 using Teuchos::RCP;
5190 using Teuchos::rcp;
5191 using Teuchos::rcp_const_cast;
5192 using Teuchos::rcpFromRef;
5193 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5194 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
5195
5196 // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
5197 // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
5198 // failing only for the Kokkos refactor version of Tpetra. It's a
5199 // good idea regardless to have the bypass.
5200 if (alpha == ZERO) {
5201 if (beta == ZERO) {
5202 Y_in.putScalar (ZERO);
5203 } else if (beta != ONE) {
5204 Y_in.scale (beta);
5205 }
5206 return;
5207 }
5208
5209 // It's possible that X is a view of Y or vice versa. We don't
5210 // allow this (apply() requires that X and Y not alias one
5211 // another), but it's helpful to detect and work around this case.
5212 // We don't try to to detect the more subtle cases (e.g., one is a
5213 // subview of the other, but their initial pointers differ). We
5214 // only need to do this if this matrix's Import is trivial;
5215 // otherwise, we don't actually apply the operator from X into Y.
5216
5217 RCP<const import_type> importer = this->getGraph ()->getImporter ();
5218 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5219
5220 // If beta == 0, then the output MV will be overwritten; none of
5221 // its entries should be read. (Sparse BLAS semantics say that we
5222 // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
5223 // This matters if we need to do an Export operation; see below.
5224 const bool Y_is_overwritten = (beta == ZERO);
5225
5226 // We treat the case of a replicated MV output specially.
5227 const bool Y_is_replicated =
5228 (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
5229
5230 // This is part of the special case for replicated MV output.
5231 // We'll let each process do its thing, but do an all-reduce at
5232 // the end to sum up the results. Setting beta=0 on all processes
5233 // but Proc 0 makes the math work out for the all-reduce. (This
5234 // assumes that the replicated data is correctly replicated, so
5235 // that the data are the same on all processes.)
5236 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5237 beta = ZERO;
5238 }
5239
5240 // Temporary MV for Import operation. After the block of code
5241 // below, this will be an (Imported if necessary) column Map MV
5242 // ready to give to localApply(...).
5243 RCP<const MV> X_colMap;
5244 if (importer.is_null ()) {
5245 if (! X_in.isConstantStride ()) {
5246 // Not all sparse mat-vec kernels can handle an input MV with
5247 // nonconstant stride correctly, so we have to copy it in that
5248 // case into a constant stride MV. To make a constant stride
5249 // copy of X_in, we force creation of the column (== domain)
5250 // Map MV (if it hasn't already been created, else fetch the
5251 // cached copy). This avoids creating a new MV each time.
5252 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
5253 Tpetra::deep_copy (*X_colMapNonConst, X_in);
5254 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5255 }
5256 else {
5257 // The domain and column Maps are the same, so do the local
5258 // multiply using the domain Map input MV X_in.
5259 X_colMap = rcpFromRef (X_in);
5260 }
5261 }
5262 else { // need to Import source (multi)vector
5263 ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply: Import");
5264
5265 // We're doing an Import anyway, which will copy the relevant
5266 // elements of the domain Map MV X_in into a separate column Map
5267 // MV. Thus, we don't have to worry whether X_in is constant
5268 // stride.
5269 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
5270
5271 // Import from the domain Map MV to the column Map MV.
5272 X_colMapNonConst->doImport (X_in, *importer, INSERT);
5273 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5274 }
5275
5276 // Temporary MV for doExport (if needed), or for copying a
5277 // nonconstant stride output MV into a constant stride MV. This
5278 // is null if we don't need the temporary MV, that is, if the
5279 // Export is trivial (null).
5280 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
5281
5282 // If we have a nontrivial Export object, we must perform an
5283 // Export. In that case, the local multiply result will go into
5284 // the row Map multivector. We don't have to make a
5285 // constant-stride version of Y_in in this case, because we had to
5286 // make a constant stride Y_rowMap MV and do an Export anyway.
5287 if (! exporter.is_null ()) {
5288 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
5289 {
5290 ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply: Export");
5291
5292 // If we're overwriting the output MV Y_in completely (beta ==
5293 // 0), then make sure that it is filled with zeros before we
5294 // do the Export. Otherwise, the ADD combine mode will use
5295 // data in Y_in, which is supposed to be zero.
5296 if (Y_is_overwritten) {
5297 Y_in.putScalar (ZERO);
5298 }
5299 else {
5300 // Scale output MV by beta, so that doExport sums in the
5301 // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
5302 Y_in.scale (beta);
5303 }
5304 // Do the Export operation.
5305 Y_in.doExport (*Y_rowMap, *exporter, ADD_ASSIGN);
5306 }
5307 }
5308 else { // Don't do an Export: row Map and range Map are the same.
5309 //
5310 // If Y_in does not have constant stride, or if the column Map
5311 // MV aliases Y_in, then we can't let the kernel write directly
5312 // to Y_in. Instead, we have to use the cached row (== range)
5313 // Map MV as temporary storage.
5314 //
5315 // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5316 // the user passed in the same MultiVector for both X and Y. It
5317 // won't detect whether one MultiVector views the other. We
5318 // should also check the MultiVectors' raw data pointers.
5319 if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
5320 // Force creating the MV if it hasn't been created already.
5321 // This will reuse a previously created cached MV.
5322 Y_rowMap = getRowMapMultiVector (Y_in, true);
5323
5324 // If beta == 0, we don't need to copy Y_in into Y_rowMap,
5325 // since we're overwriting it anyway.
5326 if (beta != ZERO) {
5327 Tpetra::deep_copy (*Y_rowMap, Y_in);
5328 }
5329 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
5330 Tpetra::deep_copy (Y_in, *Y_rowMap);
5331 }
5332 else {
5333 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
5334 }
5335 }
5336
5337 // If the range Map is a locally replicated Map, sum up
5338 // contributions from each process. We set beta = 0 on all
5339 // processes but Proc 0 initially, so this will handle the scaling
5340 // factor beta correctly.
5341 if (Y_is_replicated) {
5342 ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply: Reduce Y");
5343 Y_in.reduce ();
5344 }
5345 }
5346
5347 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5348 void
5352 const Teuchos::ETransp mode,
5353 Scalar alpha,
5354 Scalar beta) const
5355 {
5357 using Teuchos::null;
5358 using Teuchos::RCP;
5359 using Teuchos::rcp;
5360 using Teuchos::rcp_const_cast;
5361 using Teuchos::rcpFromRef;
5362 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5363
5364 // Take shortcuts for alpha == 0.
5365 if (alpha == ZERO) {
5366 // Follow the Sparse BLAS convention by ignoring both the matrix
5367 // and X_in, in this case.
5368 if (beta == ZERO) {
5369 // Follow the Sparse BLAS convention by overwriting any Inf or
5370 // NaN values in Y_in, in this case.
5371 Y_in.putScalar (ZERO);
5372 }
5373 else {
5374 Y_in.scale (beta);
5375 }
5376 return;
5377 }
5378
5379 const size_t numVectors = X_in.getNumVectors ();
5380
5381 // We don't allow X_in and Y_in to alias one another. It's hard
5382 // to check this, because advanced users could create views from
5383 // raw pointers. However, if X_in and Y_in reference the same
5384 // object, we will do the user a favor by copying X into new
5385 // storage (with a warning). We only need to do this if we have
5386 // trivial importers; otherwise, we don't actually apply the
5387 // operator from X into Y.
5388 RCP<const import_type> importer = this->getGraph ()->getImporter ();
5389 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5390 // access X indirectly, in case we need to create temporary storage
5391 RCP<const MV> X;
5392
5393 // some parameters for below
5394 const bool Y_is_replicated = ! Y_in.isDistributed ();
5395 const bool Y_is_overwritten = (beta == ZERO);
5396 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5397 beta = ZERO;
5398 }
5399
5400 // The kernels do not allow input or output with nonconstant stride.
5401 if (! X_in.isConstantStride () && importer.is_null ()) {
5402 X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
5403 } else {
5404 X = rcpFromRef (X_in); // Reference to X_in
5405 }
5406
5407 // Set up temporary multivectors for Import and/or Export.
5408 if (importer != Teuchos::null) {
5409 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
5410 importMV_ = null;
5411 }
5412 if (importMV_ == null) {
5413 importMV_ = rcp (new MV (this->getColMap (), numVectors));
5414 }
5415 }
5416 if (exporter != Teuchos::null) {
5417 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
5418 exportMV_ = null;
5419 }
5420 if (exportMV_ == null) {
5421 exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
5422 }
5423 }
5424
5425 // If we have a non-trivial exporter, we must import elements that
5426 // are permuted or are on other processors.
5427 if (! exporter.is_null ()) {
5428 ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply (transpose): Import");
5429 exportMV_->doImport (X_in, *exporter, INSERT);
5430 X = exportMV_; // multiply out of exportMV_
5431 }
5432
5433 // If we have a non-trivial importer, we must export elements that
5434 // are permuted or belong to other processors. We will compute
5435 // solution into the to-be-exported MV; get a view.
5436 if (importer != Teuchos::null) {
5437 ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply (transpose): Export");
5438
5439 // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
5440 // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
5441 // back and figure out why this helps. importMV_ SHOULD be
5442 // completely overwritten in the localApply(...) call
5443 // below, because beta == ZERO there.
5444 importMV_->putScalar (ZERO);
5445 // Do the local computation.
5446 this->localApply (*X, *importMV_, mode, alpha, ZERO);
5447
5448 if (Y_is_overwritten) {
5449 Y_in.putScalar (ZERO);
5450 } else {
5451 Y_in.scale (beta);
5452 }
5453 Y_in.doExport (*importMV_, *importer, ADD_ASSIGN);
5454 }
5455 // otherwise, multiply into Y
5456 else {
5457 // can't multiply in-situ; can't multiply into non-strided multivector
5458 //
5459 // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5460 // the user passed in the same MultiVector for both X and Y. It
5461 // won't detect whether one MultiVector views the other. We
5462 // should also check the MultiVectors' raw data pointers.
5463 if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
5464 // Make a deep copy of Y_in, into which to write the multiply result.
5465 MV Y (Y_in, Teuchos::Copy);
5466 this->localApply (*X, Y, mode, alpha, beta);
5467 Tpetra::deep_copy (Y_in, Y);
5468 } else {
5469 this->localApply (*X, Y_in, mode, alpha, beta);
5470 }
5471 }
5472
5473 // If the range Map is a locally replicated map, sum the
5474 // contributions from each process. (That's why we set beta=0
5475 // above for all processes but Proc 0.)
5476 if (Y_is_replicated) {
5477 ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5478 Y_in.reduce ();
5479 }
5480 }
5481
5482 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5483 void
5487 const Teuchos::ETransp mode,
5488 const Scalar& alpha,
5489 const Scalar& beta) const
5490 {
5492 using Teuchos::NO_TRANS;
5493 ProfilingRegion regionLocalApply ("Tpetra::CrsMatrix::localApply");
5494
5495 auto X_lcl = X.getLocalViewDevice(Access::ReadOnly);
5496 auto Y_lcl = Y.getLocalViewDevice(Access::ReadWrite);
5497 auto matrix_lcl = getLocalMultiplyOperator();
5498
5499 const bool debug = ::Tpetra::Details::Behavior::debug ();
5500 if (debug) {
5501 const char tfecfFuncName[] = "localApply: ";
5502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5503 (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
5504 "X.getNumVectors() = " << X.getNumVectors () << " != "
5505 "Y.getNumVectors() = " << Y.getNumVectors () << ".");
5506 const bool transpose = (mode != Teuchos::NO_TRANS);
5507 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5508 (! transpose && X.getLocalLength () !=
5509 getColMap ()->getNodeNumElements (), std::runtime_error,
5510 "NO_TRANS case: X has the wrong number of local rows. "
5511 "X.getLocalLength() = " << X.getLocalLength () << " != "
5512 "getColMap()->getNodeNumElements() = " <<
5513 getColMap ()->getNodeNumElements () << ".");
5514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5515 (! transpose && Y.getLocalLength () !=
5516 getRowMap ()->getNodeNumElements (), std::runtime_error,
5517 "NO_TRANS case: Y has the wrong number of local rows. "
5518 "Y.getLocalLength() = " << Y.getLocalLength () << " != "
5519 "getRowMap()->getNodeNumElements() = " <<
5520 getRowMap ()->getNodeNumElements () << ".");
5521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5522 (transpose && X.getLocalLength () !=
5523 getRowMap ()->getNodeNumElements (), std::runtime_error,
5524 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5525 "rows. X.getLocalLength() = " << X.getLocalLength ()
5526 << " != getRowMap()->getNodeNumElements() = "
5527 << getRowMap ()->getNodeNumElements () << ".");
5528 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5529 (transpose && Y.getLocalLength () !=
5530 getColMap ()->getNodeNumElements (), std::runtime_error,
5531 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5532 "rows. Y.getLocalLength() = " << Y.getLocalLength ()
5533 << " != getColMap()->getNodeNumElements() = "
5534 << getColMap ()->getNodeNumElements () << ".");
5535 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5536 (! isFillComplete (), std::runtime_error, "The matrix is not "
5537 "fill complete. You must call fillComplete() (possibly with "
5538 "domain and range Map arguments) without an intervening "
5539 "resumeFill() call before you may call this method.");
5540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5541 (! X.isConstantStride () || ! Y.isConstantStride (),
5542 std::runtime_error, "X and Y must be constant stride.");
5543 // If the two pointers are null, then they don't alias one
5544 // another, even though they are equal.
5545 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5546 (X_lcl.data () == Y_lcl.data () && X_lcl.data () != nullptr,
5547 std::runtime_error, "X and Y may not alias one another.");
5548 }
5549
5550 LocalOrdinal nrows = getNodeNumRows();
5551 LocalOrdinal maxRowImbalance = 0;
5552 if(nrows != 0)
5553 maxRowImbalance = getNodeMaxNumRowEntries() - (getNodeNumEntries() / nrows);
5554
5555 if(size_t(maxRowImbalance) >= Tpetra::Details::Behavior::rowImbalanceThreshold())
5556 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5557 else
5558 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5559 }
5560
5561 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5562 void
5566 Teuchos::ETransp mode,
5567 Scalar alpha,
5568 Scalar beta) const
5569 {
5571 const char fnName[] = "Tpetra::CrsMatrix::apply";
5572
5573 TEUCHOS_TEST_FOR_EXCEPTION
5574 (! isFillComplete (), std::runtime_error,
5575 fnName << ": Cannot call apply() until fillComplete() "
5576 "has been called.");
5577
5578 if (mode == Teuchos::NO_TRANS) {
5579 ProfilingRegion regionNonTranspose (fnName);
5580 this->applyNonTranspose (X, Y, alpha, beta);
5581 }
5582 else {
5583 ProfilingRegion regionTranspose ("Tpetra::CrsMatrix::apply (transpose)");
5584
5585 //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
5586 //when bets==0. This was not the case with transpose in a multithreaded
5587 //environment where a multiplication with subsequent atomic_adds is used
5588 //since 0 is effectively not special cased. Doing the explicit set to zero here
5589 //This catches cases where Y is nan or inf.
5590 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5591 if (beta == ZERO) {
5592 Y.putScalar (ZERO);
5593 }
5594 this->applyTranspose (X, Y, mode, alpha, beta);
5595 }
5596 }
5597
5598
5599 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5600 template<class T>
5601 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5603 convert () const
5604 {
5605 using Teuchos::RCP;
5606 typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
5607 const char tfecfFuncName[] = "convert: ";
5608
5609 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5610 (! this->isFillComplete (), std::runtime_error, "This matrix (the source "
5611 "of the conversion) is not fill complete. You must first call "
5612 "fillComplete() (possibly with the domain and range Map) without an "
5613 "intervening call to resumeFill(), before you may call this method.");
5614
5615 RCP<output_matrix_type> newMatrix
5616 (new output_matrix_type (this->getCrsGraph ()));
5617 // Copy old values into new values. impl_scalar_type and T may
5618 // differ, so we can't use Kokkos::deep_copy.
5620 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5621 this->getLocalMatrixDevice ().values);
5622 // Since newmat has a static (const) graph, the graph already has
5623 // a column Map, and Import and Export objects already exist (if
5624 // applicable). Thus, calling fillComplete is cheap.
5625 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5626
5627 return newMatrix;
5628 }
5629
5630
5631 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5632 void
5634 checkInternalState () const
5635 {
5636 const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
5637 if (debug) {
5638 const char tfecfFuncName[] = "checkInternalState: ";
5639 const char err[] = "Internal state is not consistent. "
5640 "Please report this bug to the Tpetra developers.";
5641
5642 // This version of the graph (RCP<const crs_graph_type>) must
5643 // always be nonnull.
5644 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645 (staticGraph_.is_null (), std::logic_error, err);
5646 // myGraph == null means that the matrix has a const ("static")
5647 // graph. Otherwise, the matrix has a dynamic graph (it owns its
5648 // graph).
5649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5650 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5651 std::logic_error, err);
5652 // if matrix is fill complete, then graph must be fill complete
5653 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5654 (isFillComplete () && ! staticGraph_->isFillComplete (),
5655 std::logic_error, err << " Specifically, the matrix is fill complete, "
5656 "but its graph is NOT fill complete.");
5657 // if values are allocated and they are non-zero in number, then
5658 // one of the allocations should be present
5659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5660 (staticGraph_->indicesAreAllocated () &&
5661 staticGraph_->getNodeAllocationSize() > 0 &&
5662 staticGraph_->getNodeNumRows() > 0 &&
5663 valuesUnpacked_wdv.extent (0) == 0,
5664 std::logic_error, err);
5665 }
5666 }
5667
5668 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5669 std::string
5671 description () const
5672 {
5673 std::ostringstream os;
5674
5675 os << "Tpetra::CrsMatrix (Kokkos refactor): {";
5676 if (this->getObjectLabel () != "") {
5677 os << "Label: \"" << this->getObjectLabel () << "\", ";
5678 }
5679 if (isFillComplete ()) {
5680 os << "isFillComplete: true"
5681 << ", global dimensions: [" << getGlobalNumRows () << ", "
5682 << getGlobalNumCols () << "]"
5683 << ", global number of entries: " << getGlobalNumEntries ()
5684 << "}";
5685 }
5686 else {
5687 os << "isFillComplete: false"
5688 << ", global dimensions: [" << getGlobalNumRows () << ", "
5689 << getGlobalNumCols () << "]}";
5690 }
5691 return os.str ();
5692 }
5693
5694 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5695 void
5697 describe (Teuchos::FancyOStream &out,
5698 const Teuchos::EVerbosityLevel verbLevel) const
5699 {
5700 using std::endl;
5701 using std::setw;
5702 using Teuchos::ArrayView;
5703 using Teuchos::Comm;
5704 using Teuchos::RCP;
5705 using Teuchos::TypeNameTraits;
5706 using Teuchos::VERB_DEFAULT;
5707 using Teuchos::VERB_NONE;
5708 using Teuchos::VERB_LOW;
5709 using Teuchos::VERB_MEDIUM;
5710 using Teuchos::VERB_HIGH;
5711 using Teuchos::VERB_EXTREME;
5712
5713 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5714
5715 if (vl == VERB_NONE) {
5716 return; // Don't print anything at all
5717 }
5718
5719 // By convention, describe() always begins with a tab.
5720 Teuchos::OSTab tab0 (out);
5721
5722 RCP<const Comm<int> > comm = this->getComm();
5723 const int myRank = comm->getRank();
5724 const int numProcs = comm->getSize();
5725 size_t width = 1;
5726 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5727 ++width;
5728 }
5729 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5730
5731 // none: print nothing
5732 // low: print O(1) info from node 0
5733 // medium: print O(P) info, num entries per process
5734 // high: print O(N) info, num entries per row
5735 // extreme: print O(NNZ) info: print indices and values
5736 //
5737 // for medium and higher, print constituent objects at specified verbLevel
5738 if (myRank == 0) {
5739 out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5740 }
5741 Teuchos::OSTab tab1 (out);
5742
5743 if (myRank == 0) {
5744 if (this->getObjectLabel () != "") {
5745 out << "Label: \"" << this->getObjectLabel () << "\", ";
5746 }
5747 {
5748 out << "Template parameters:" << endl;
5749 Teuchos::OSTab tab2 (out);
5750 out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
5751 << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5752 << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5753 << "Node: " << TypeNameTraits<Node>::name () << endl;
5754 }
5755 if (isFillComplete()) {
5756 out << "isFillComplete: true" << endl
5757 << "Global dimensions: [" << getGlobalNumRows () << ", "
5758 << getGlobalNumCols () << "]" << endl
5759 << "Global number of entries: " << getGlobalNumEntries () << endl
5760 << endl << "Global max number of entries in a row: "
5761 << getGlobalMaxNumRowEntries () << endl;
5762 }
5763 else {
5764 out << "isFillComplete: false" << endl
5765 << "Global dimensions: [" << getGlobalNumRows () << ", "
5766 << getGlobalNumCols () << "]" << endl;
5767 }
5768 }
5769
5770 if (vl < VERB_MEDIUM) {
5771 return; // all done!
5772 }
5773
5774 // Describe the row Map.
5775 if (myRank == 0) {
5776 out << endl << "Row Map:" << endl;
5777 }
5778 if (getRowMap ().is_null ()) {
5779 if (myRank == 0) {
5780 out << "null" << endl;
5781 }
5782 }
5783 else {
5784 if (myRank == 0) {
5785 out << endl;
5786 }
5787 getRowMap ()->describe (out, vl);
5788 }
5789
5790 // Describe the column Map.
5791 if (myRank == 0) {
5792 out << "Column Map: ";
5793 }
5794 if (getColMap ().is_null ()) {
5795 if (myRank == 0) {
5796 out << "null" << endl;
5797 }
5798 } else if (getColMap () == getRowMap ()) {
5799 if (myRank == 0) {
5800 out << "same as row Map" << endl;
5801 }
5802 } else {
5803 if (myRank == 0) {
5804 out << endl;
5805 }
5806 getColMap ()->describe (out, vl);
5807 }
5808
5809 // Describe the domain Map.
5810 if (myRank == 0) {
5811 out << "Domain Map: ";
5812 }
5813 if (getDomainMap ().is_null ()) {
5814 if (myRank == 0) {
5815 out << "null" << endl;
5816 }
5817 } else if (getDomainMap () == getRowMap ()) {
5818 if (myRank == 0) {
5819 out << "same as row Map" << endl;
5820 }
5821 } else if (getDomainMap () == getColMap ()) {
5822 if (myRank == 0) {
5823 out << "same as column Map" << endl;
5824 }
5825 } else {
5826 if (myRank == 0) {
5827 out << endl;
5828 }
5829 getDomainMap ()->describe (out, vl);
5830 }
5831
5832 // Describe the range Map.
5833 if (myRank == 0) {
5834 out << "Range Map: ";
5835 }
5836 if (getRangeMap ().is_null ()) {
5837 if (myRank == 0) {
5838 out << "null" << endl;
5839 }
5840 } else if (getRangeMap () == getDomainMap ()) {
5841 if (myRank == 0) {
5842 out << "same as domain Map" << endl;
5843 }
5844 } else if (getRangeMap () == getRowMap ()) {
5845 if (myRank == 0) {
5846 out << "same as row Map" << endl;
5847 }
5848 } else {
5849 if (myRank == 0) {
5850 out << endl;
5851 }
5852 getRangeMap ()->describe (out, vl);
5853 }
5854
5855 // O(P) data
5856 for (int curRank = 0; curRank < numProcs; ++curRank) {
5857 if (myRank == curRank) {
5858 out << "Process rank: " << curRank << endl;
5859 Teuchos::OSTab tab2 (out);
5860 if (! staticGraph_->indicesAreAllocated ()) {
5861 out << "Graph indices not allocated" << endl;
5862 }
5863 else {
5864 out << "Number of allocated entries: "
5865 << staticGraph_->getNodeAllocationSize () << endl;
5866 }
5867 out << "Number of entries: " << getNodeNumEntries () << endl
5868 << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
5869 << endl;
5870 }
5871 // Give output time to complete by executing some barriers.
5872 comm->barrier ();
5873 comm->barrier ();
5874 comm->barrier ();
5875 }
5876
5877 if (vl < VERB_HIGH) {
5878 return; // all done!
5879 }
5880
5881 // O(N) and O(NNZ) data
5882 for (int curRank = 0; curRank < numProcs; ++curRank) {
5883 if (myRank == curRank) {
5884 out << std::setw(width) << "Proc Rank"
5885 << std::setw(width) << "Global Row"
5886 << std::setw(width) << "Num Entries";
5887 if (vl == VERB_EXTREME) {
5888 out << std::setw(width) << "(Index,Value)";
5889 }
5890 out << endl;
5891 for (size_t r = 0; r < getNodeNumRows (); ++r) {
5892 const size_t nE = getNumEntriesInLocalRow(r);
5893 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5894 out << std::setw(width) << myRank
5895 << std::setw(width) << gid
5896 << std::setw(width) << nE;
5897 if (vl == VERB_EXTREME) {
5898 if (isGloballyIndexed()) {
5899 global_inds_host_view_type rowinds;
5900 values_host_view_type rowvals;
5901 getGlobalRowView (gid, rowinds, rowvals);
5902 for (size_t j = 0; j < nE; ++j) {
5903 out << " (" << rowinds[j]
5904 << ", " << rowvals[j]
5905 << ") ";
5906 }
5907 }
5908 else if (isLocallyIndexed()) {
5909 local_inds_host_view_type rowinds;
5910 values_host_view_type rowvals;
5911 getLocalRowView (r, rowinds, rowvals);
5912 for (size_t j=0; j < nE; ++j) {
5913 out << " (" << getColMap()->getGlobalElement(rowinds[j])
5914 << ", " << rowvals[j]
5915 << ") ";
5916 }
5917 } // globally or locally indexed
5918 } // vl == VERB_EXTREME
5919 out << endl;
5920 } // for each row r on this process
5921 } // if (myRank == curRank)
5922
5923 // Give output time to complete
5924 comm->barrier ();
5925 comm->barrier ();
5926 comm->barrier ();
5927 } // for each process p
5928 }
5929
5930 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5931 bool
5933 checkSizes (const SrcDistObject& source)
5934 {
5935 // It's not clear what kind of compatibility checks on sizes can
5936 // be performed here. Epetra_CrsGraph doesn't check any sizes for
5937 // compatibility.
5938
5939 // Currently, the source object must be a RowMatrix with the same
5940 // four template parameters as the target CrsMatrix. We might
5941 // relax this requirement later.
5942 const row_matrix_type* srcRowMat =
5943 dynamic_cast<const row_matrix_type*> (&source);
5944 return (srcRowMat != nullptr);
5945 }
5946
5947 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5948 void
5951 const typename crs_graph_type::padding_type& padding,
5952 const bool verbose)
5953 {
5956 using std::endl;
5957 using LO = local_ordinal_type;
5958 using row_ptrs_type =
5959 typename local_graph_device_type::row_map_type::non_const_type;
5960 using range_policy =
5961 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5962 const char tfecfFuncName[] = "applyCrsPadding";
5963 const char suffix[] =
5964 ". Please report this bug to the Tpetra developers.";
5965 ProfilingRegion regionCAP("Tpetra::CrsMatrix::applyCrsPadding");
5966
5967 std::unique_ptr<std::string> prefix;
5968 if (verbose) {
5969 prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
5970 std::ostringstream os;
5971 os << *prefix << "padding: ";
5972 padding.print(os);
5973 os << endl;
5974 std::cerr << os.str();
5975 }
5976 const int myRank = ! verbose ? -1 : [&] () {
5977 auto map = this->getMap();
5978 if (map.is_null()) {
5979 return -1;
5980 }
5981 auto comm = map->getComm();
5982 if (comm.is_null()) {
5983 return -1;
5984 }
5985 return comm->getRank();
5986 } ();
5987
5988 // NOTE (mfh 29 Jan 2020) This allocates the values array.
5989 if (! myGraph_->indicesAreAllocated()) {
5990 if (verbose) {
5991 std::ostringstream os;
5992 os << *prefix << "Call allocateIndices" << endl;
5993 std::cerr << os.str();
5994 }
5995 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5996 }
5997
5998 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
5999 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
6000 // size needs to increase. That should be the job of
6001 // padCrsArrays.
6002
6003 // Making copies here because rowPtrsUnpacked_ has a const type. Otherwise, we
6004 // would use it directly.
6005
6006 if (verbose) {
6007 std::ostringstream os;
6008 os << *prefix << "Allocate row_ptrs_beg: "
6009 << myGraph_->rowPtrsUnpacked_host_.extent(0) << endl;
6010 std::cerr << os.str();
6011 }
6012 using Kokkos::view_alloc;
6013 using Kokkos::WithoutInitializing;
6014 row_ptrs_type row_ptr_beg(
6015 view_alloc("row_ptr_beg", WithoutInitializing),
6016 myGraph_->rowPtrsUnpacked_dev_.extent(0));
6017 Kokkos::deep_copy(row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
6018
6019 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
6020 size_t(row_ptr_beg.extent(0) - 1);
6021 if (verbose) {
6022 std::ostringstream os;
6023 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
6024 std::cerr << os.str();
6025 }
6026 row_ptrs_type row_ptr_end(
6027 view_alloc("row_ptr_end", WithoutInitializing), N);
6028
6029 row_ptrs_type num_row_entries_d;
6030
6031 const bool refill_num_row_entries =
6032 myGraph_->k_numRowEntries_.extent(0) != 0;
6033
6034 if (refill_num_row_entries) { // unpacked storage
6035 // We can't assume correct *this capture until C++17, and it's
6036 // likely more efficient just to capture what we need anyway.
6037 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
6038 myGraph_->k_numRowEntries_);
6039 Kokkos::parallel_for
6040 ("Fill end row pointers", range_policy(0, N),
6041 KOKKOS_LAMBDA (const size_t i) {
6042 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
6043 });
6044 }
6045 else {
6046 // FIXME (mfh 04 Feb 2020) Fix padCrsArrays so that if packed
6047 // storage, we don't need row_ptr_end to be separate allocation;
6048 // could just have it alias row_ptr_beg+1.
6049 Kokkos::parallel_for
6050 ("Fill end row pointers", range_policy(0, N),
6051 KOKKOS_LAMBDA (const size_t i) {
6052 row_ptr_end(i) = row_ptr_beg(i+1);
6053 });
6054 }
6055
6056 if (myGraph_->isGloballyIndexed()) {
6057 padCrsArrays(row_ptr_beg, row_ptr_end,
6058 myGraph_->gblInds_wdv,
6059 valuesUnpacked_wdv, padding, myRank, verbose);
6060 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
6061 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
6062 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6063 (newValuesLen != newColIndsLen, std::logic_error,
6064 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
6065 << " != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
6066 << suffix);
6067 }
6068 else {
6069 padCrsArrays(row_ptr_beg, row_ptr_end,
6070 myGraph_->lclIndsUnpacked_wdv,
6071 valuesUnpacked_wdv, padding, myRank, verbose);
6072 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
6073 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
6074 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6075 (newValuesLen != newColIndsLen, std::logic_error,
6076 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
6077 << " != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
6078 << suffix);
6079 }
6080
6081 if (refill_num_row_entries) {
6082 Kokkos::parallel_for
6083 ("Fill num entries", range_policy(0, N),
6084 KOKKOS_LAMBDA (const size_t i) {
6085 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
6086 });
6087 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
6088 }
6089
6090 if (verbose) {
6091 std::ostringstream os;
6092 os << *prefix << "Assign myGraph_->rowPtrsUnpacked_; "
6093 << "old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
6094 << ", new size: " << row_ptr_beg.extent(0) << endl;
6095 std::cerr << os.str();
6096 TEUCHOS_ASSERT( myGraph_->rowPtrsUnpacked_host_.extent(0) ==
6097 row_ptr_beg.extent(0) );
6098 }
6099 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
6100 }
6101
6102 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6103 void
6104 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6105 copyAndPermuteStaticGraph(
6106 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6107 const size_t numSameIDs,
6108 const LocalOrdinal permuteToLIDs[],
6109 const LocalOrdinal permuteFromLIDs[],
6110 const size_t numPermutes)
6111 {
6112 using Details::ProfilingRegion;
6113 using Teuchos::Array;
6114 using Teuchos::ArrayView;
6115 using std::endl;
6116 using LO = LocalOrdinal;
6117 using GO = GlobalOrdinal;
6118 const char tfecfFuncName[] = "copyAndPermuteStaticGraph";
6119 const char suffix[] =
6120 " Please report this bug to the Tpetra developers.";
6121 ProfilingRegion regionCAP
6122 ("Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
6123
6124 const bool debug = Details::Behavior::debug("CrsGraph");
6125 const bool verbose = Details::Behavior::verbose("CrsGraph");
6126 std::unique_ptr<std::string> prefix;
6127 if (verbose) {
6128 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6129 std::ostringstream os;
6130 os << *prefix << "Start" << endl;
6131 }
6132 const char* const prefix_raw =
6133 verbose ? prefix.get()->c_str() : nullptr;
6134
6135 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6136 //
6137 // Copy the first numSame row from source to target (this matrix).
6138 // This involves copying rows corresponding to LIDs [0, numSame-1].
6139 //
6140 const map_type& srcRowMap = * (srcMat.getRowMap ());
6141 nonconst_global_inds_host_view_type rowInds;
6142 nonconst_values_host_view_type rowVals;
6143 const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6144 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6145 // Global ID for the current row index in the source matrix.
6146 // The first numSameIDs GIDs in the two input lists are the
6147 // same, so sourceGID == targetGID in this case.
6148 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6149 const GO targetGID = sourceGID;
6150
6151 ArrayView<const GO>rowIndsConstView;
6152 ArrayView<const Scalar> rowValsConstView;
6153
6154 if (sourceIsLocallyIndexed) {
6155 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6156 if (rowLength > static_cast<size_t> (rowInds.size())) {
6157 Kokkos::resize(rowInds,rowLength);
6158 Kokkos::resize(rowVals,rowLength);
6159 }
6160 // Resizing invalidates an Array's views, so we must make new
6161 // ones, even if rowLength hasn't changed.
6162 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((size_t)0, rowLength));
6163 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((size_t)0, rowLength));
6164
6165 // The source matrix is locally indexed, so we have to get a
6166 // copy. Really it's the GIDs that have to be copied (because
6167 // they have to be converted from LIDs).
6168 size_t checkRowLength = 0;
6169 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
6170 rowValsView, checkRowLength);
6171 if (debug) {
6172 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6173 (rowLength != checkRowLength, std::logic_error, "For "
6174 "global row index " << sourceGID << ", the source "
6175 "matrix's getNumEntriesInGlobalRow returns a row length "
6176 "of " << rowLength << ", but getGlobalRowCopy reports "
6177 "a row length of " << checkRowLength << "." << suffix);
6178 }
6179
6180 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6181 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6182 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6183 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6184 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6185 rowIndsView.data(), rowIndsView.extent(0),
6186 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6187 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6188 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6189 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6190 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6191 // KDDKDD UVM TEMPORARY: KokkosView interface
6192 }
6193 else { // source matrix is globally indexed.
6194 global_inds_host_view_type rowIndsView;
6195 values_host_view_type rowValsView;
6196 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6197 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6198 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6199 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6200 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6201 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6202 rowIndsView.data(), rowIndsView.extent(0),
6203 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6204 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6205 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6206 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6207 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6208 // KDDKDD UVM TEMPORARY: KokkosView interface
6209
6210 }
6211
6212 // Applying a permutation to a matrix with a static graph
6213 // means REPLACE-ing entries.
6214 combineGlobalValues(targetGID, rowIndsConstView,
6215 rowValsConstView, REPLACE,
6216 prefix_raw, debug, verbose);
6217 }
6218
6219 if (verbose) {
6220 std::ostringstream os;
6221 os << *prefix << "Do permutes" << endl;
6222 }
6223
6224 const map_type& tgtRowMap = * (this->getRowMap ());
6225 for (size_t p = 0; p < numPermutes; ++p) {
6226 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6227 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6228
6229 ArrayView<const GO> rowIndsConstView;
6230 ArrayView<const Scalar> rowValsConstView;
6231
6232 if (sourceIsLocallyIndexed) {
6233 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6234 if (rowLength > static_cast<size_t> (rowInds.size ())) {
6235 Kokkos::resize(rowInds,rowLength);
6236 Kokkos::resize(rowVals,rowLength);
6237 }
6238 // Resizing invalidates an Array's views, so we must make new
6239 // ones, even if rowLength hasn't changed.
6240 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((size_t)0, rowLength));
6241 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((size_t)0, rowLength));
6242
6243 // The source matrix is locally indexed, so we have to get a
6244 // copy. Really it's the GIDs that have to be copied (because
6245 // they have to be converted from LIDs).
6246 size_t checkRowLength = 0;
6247 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6248 rowValsView, checkRowLength);
6249 if (debug) {
6250 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6251 (rowLength != checkRowLength, std::logic_error, "For "
6252 "source matrix global row index " << sourceGID << ", "
6253 "getNumEntriesInGlobalRow returns a row length of " <<
6254 rowLength << ", but getGlobalRowCopy a row length of "
6255 << checkRowLength << "." << suffix);
6256 }
6257
6258 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6259 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6260 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6261 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6262 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6263 rowIndsView.data(), rowIndsView.extent(0),
6264 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6265 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6266 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6267 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6268 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6269 // KDDKDD UVM TEMPORARY: KokkosView interface
6270 }
6271 else {
6272 global_inds_host_view_type rowIndsView;
6273 values_host_view_type rowValsView;
6274 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6275 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6276 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6277 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6278 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6279 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6280 rowIndsView.data(), rowIndsView.extent(0),
6281 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6282 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6283 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6284 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6285 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6286 // KDDKDD UVM TEMPORARY: KokkosView interface
6287 }
6288
6289 combineGlobalValues(targetGID, rowIndsConstView,
6290 rowValsConstView, REPLACE,
6291 prefix_raw, debug, verbose);
6292 }
6293
6294 if (verbose) {
6295 std::ostringstream os;
6296 os << *prefix << "Done" << endl;
6297 }
6298 }
6299
6300 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6301 void
6302 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6303 copyAndPermuteNonStaticGraph(
6304 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6305 const size_t numSameIDs,
6306 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
6307 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
6308 const size_t numPermutes)
6309 {
6310 using Details::ProfilingRegion;
6311 using Teuchos::Array;
6312 using Teuchos::ArrayView;
6313 using std::endl;
6314 using LO = LocalOrdinal;
6315 using GO = GlobalOrdinal;
6316 const char tfecfFuncName[] = "copyAndPermuteNonStaticGraph";
6317 const char suffix[] =
6318 " Please report this bug to the Tpetra developers.";
6319 ProfilingRegion regionCAP
6320 ("Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
6321
6322 const bool debug = Details::Behavior::debug("CrsGraph");
6323 const bool verbose = Details::Behavior::verbose("CrsGraph");
6324 std::unique_ptr<std::string> prefix;
6325 if (verbose) {
6326 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6327 std::ostringstream os;
6328 os << *prefix << "Start" << endl;
6329 }
6330 const char* const prefix_raw =
6331 verbose ? prefix.get()->c_str() : nullptr;
6332
6333 {
6334 using row_graph_type = RowGraph<LO, GO, Node>;
6335 const row_graph_type& srcGraph = *(srcMat.getGraph());
6336 auto padding =
6337 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
6338 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
6339 applyCrsPadding(*padding, verbose);
6340 }
6341 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6342 //
6343 // Copy the first numSame row from source to target (this matrix).
6344 // This involves copying rows corresponding to LIDs [0, numSame-1].
6345 //
6346 const map_type& srcRowMap = * (srcMat.getRowMap ());
6347 const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6348 using gids_type = nonconst_global_inds_host_view_type;
6349 using vals_type = nonconst_values_host_view_type;
6350 gids_type rowInds;
6351 vals_type rowVals;
6352 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6353 // Global ID for the current row index in the source matrix.
6354 // The first numSameIDs GIDs in the two input lists are the
6355 // same, so sourceGID == targetGID in this case.
6356 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6357 const GO targetGID = sourceGID;
6358
6359 ArrayView<const GO> rowIndsConstView;
6360 ArrayView<const Scalar> rowValsConstView;
6361
6362 if (sourceIsLocallyIndexed) {
6363
6364 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6365 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
6366 Kokkos::resize(rowInds,rowLength);
6367 Kokkos::resize(rowVals,rowLength);
6368 }
6369 // Resizing invalidates an Array's views, so we must make new
6370 // ones, even if rowLength hasn't changed.
6371 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((size_t)0, rowLength));
6372 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((size_t)0, rowLength));
6373
6374 // The source matrix is locally indexed, so we have to get a
6375 // copy. Really it's the GIDs that have to be copied (because
6376 // they have to be converted from LIDs).
6377 size_t checkRowLength = 0;
6378 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
6379 checkRowLength);
6380 if (debug) {
6381 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6382 (rowLength != checkRowLength, std::logic_error, ": For "
6383 "global row index " << sourceGID << ", the source "
6384 "matrix's getNumEntriesInGlobalRow returns a row length "
6385 "of " << rowLength << ", but getGlobalRowCopy reports "
6386 "a row length of " << checkRowLength << "." << suffix);
6387 }
6388 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6389 rowValsConstView = Teuchos::ArrayView<const Scalar>(reinterpret_cast<Scalar *>(rowValsView.data()), rowLength);
6390 }
6391 else { // source matrix is globally indexed.
6392 global_inds_host_view_type rowIndsView;
6393 values_host_view_type rowValsView;
6394 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6395
6396 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6397 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6398 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6399 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6400 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6401 rowIndsView.data(), rowIndsView.extent(0),
6402 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6403 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6404 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6405 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6406 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6407 // KDDKDD UVM TEMPORARY: KokkosView interface
6408 }
6409
6410 // Combine the data into the target matrix.
6411 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6412 rowValsConstView, prefix_raw, debug, verbose);
6413 }
6414
6415 if (verbose) {
6416 std::ostringstream os;
6417 os << *prefix << "Do permutes" << endl;
6418 }
6419 const LO* const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
6420 const LO* const permuteToLIDs = permuteToLIDs_dv.view_host().data();
6421
6422 const map_type& tgtRowMap = * (this->getRowMap ());
6423 for (size_t p = 0; p < numPermutes; ++p) {
6424 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6425 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6426
6427 ArrayView<const GO> rowIndsConstView;
6428 ArrayView<const Scalar> rowValsConstView;
6429
6430 if (sourceIsLocallyIndexed) {
6431 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6432 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
6433 Kokkos::resize(rowInds,rowLength);
6434 Kokkos::resize(rowVals,rowLength);
6435 }
6436 // Resizing invalidates an Array's views, so we must make new
6437 // ones, even if rowLength hasn't changed.
6438 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((size_t)0, rowLength));
6439 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((size_t)0, rowLength));
6440
6441 // The source matrix is locally indexed, so we have to get a
6442 // copy. Really it's the GIDs that have to be copied (because
6443 // they have to be converted from LIDs).
6444 size_t checkRowLength = 0;
6445 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6446 rowValsView, checkRowLength);
6447 if (debug) {
6448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6449 (rowLength != checkRowLength, std::logic_error, "For "
6450 "source matrix global row index " << sourceGID << ", "
6451 "getNumEntriesInGlobalRow returns a row length of " <<
6452 rowLength << ", but getGlobalRowCopy a row length of "
6453 << checkRowLength << "." << suffix);
6454 }
6455 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6456 rowValsConstView = Teuchos::ArrayView<const Scalar>(reinterpret_cast<Scalar *>(rowValsView.data()), rowLength);
6457 }
6458 else {
6459 global_inds_host_view_type rowIndsView;
6460 values_host_view_type rowValsView;
6461 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6462
6463 // KDDKDD UVM TEMPORARY: refactor combineGlobalValues to take
6464 // KDDKDD UVM TEMPORARY: Kokkos::View instead of ArrayView
6465 // KDDKDD UVM TEMPORARY: For now, wrap the view in ArrayViews
6466 // KDDKDD UVM TEMPORARY: Should be safe because we hold the KokkosViews
6467 rowIndsConstView = Teuchos::ArrayView<const GO> ( // BAD BAD BAD
6468 rowIndsView.data(), rowIndsView.extent(0),
6469 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6470 rowValsConstView = Teuchos::ArrayView<const Scalar> ( // BAD BAD BAD
6471 reinterpret_cast<const Scalar*>(rowValsView.data()), rowValsView.extent(0),
6472 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6473 // KDDKDD UVM TEMPORARY: Add replace, sum, transform methods with
6474 // KDDKDD UVM TEMPORARY: KokkosView interface
6475 }
6476
6477 // Combine the data into the target matrix.
6478 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6479 rowValsConstView, prefix_raw, debug, verbose);
6480 }
6481
6482 if (verbose) {
6483 std::ostringstream os;
6484 os << *prefix << "Done" << endl;
6485 }
6486 }
6487
6488 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6489 void
6490 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6491 copyAndPermute(
6492 const SrcDistObject& srcObj,
6493 const size_t numSameIDs,
6494 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6495 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6496 const CombineMode /*CM*/)
6497 {
6498 using Details::Behavior;
6500 using Details::ProfilingRegion;
6501 using std::endl;
6502
6503 // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6504 const char tfecfFuncName[] = "copyAndPermute: ";
6505 ProfilingRegion regionCAP("Tpetra::CrsMatrix::copyAndPermute");
6506
6507 const bool verbose = Behavior::verbose("CrsMatrix");
6508 std::unique_ptr<std::string> prefix;
6509 if (verbose) {
6510 prefix = this->createPrefix("CrsMatrix", "copyAndPermute");
6511 std::ostringstream os;
6512 os << *prefix << endl
6513 << *prefix << " numSameIDs: " << numSameIDs << endl
6514 << *prefix << " numPermute: " << permuteToLIDs.extent(0)
6515 << endl
6516 << *prefix << " "
6517 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
6518 << endl
6519 << *prefix << " "
6520 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
6521 << endl
6522 << *prefix << " "
6523 << "isStaticGraph: " << (isStaticGraph() ? "true" : "false")
6524 << endl;
6525 std::cerr << os.str ();
6526 }
6527
6528 const auto numPermute = permuteToLIDs.extent (0);
6529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6530 (numPermute != permuteFromLIDs.extent (0),
6531 std::invalid_argument, "permuteToLIDs.extent(0) = "
6532 << numPermute << "!= permuteFromLIDs.extent(0) = "
6533 << permuteFromLIDs.extent (0) << ".");
6534
6535 // This dynamic cast should succeed, because we've already tested
6536 // it in checkSizes().
6537 using RMT = RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
6538 const RMT& srcMat = dynamic_cast<const RMT&> (srcObj);
6539 if (isStaticGraph ()) {
6540 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6541 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6542 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6543 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6544
6545 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6546 permuteToLIDs_h.data(),
6547 permuteFromLIDs_h.data(),
6548 numPermute);
6549 }
6550 else {
6551 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6552 permuteFromLIDs, numPermute);
6553 }
6554
6555 if (verbose) {
6556 std::ostringstream os;
6557 os << *prefix << "Done" << endl;
6558 std::cerr << os.str();
6559 }
6560 }
6561
6562 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6563 void
6564 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6565 packAndPrepare
6566 (const SrcDistObject& source,
6567 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6568 Kokkos::DualView<char*, buffer_device_type>& exports,
6569 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6570 size_t& constantNumPackets)
6571 {
6572 using Details::Behavior;
6574 using Details::ProfilingRegion;
6575 using Teuchos::outArg;
6576 using Teuchos::REDUCE_MAX;
6577 using Teuchos::reduceAll;
6578 using std::endl;
6579 typedef LocalOrdinal LO;
6580 typedef GlobalOrdinal GO;
6581 const char tfecfFuncName[] = "packAndPrepare: ";
6582 ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepare");
6583
6584 const bool debug = Behavior::debug("CrsMatrix");
6585 const bool verbose = Behavior::verbose("CrsMatrix");
6586
6587 // Processes on which the communicator is null should not participate.
6588 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6589 if (pComm.is_null ()) {
6590 return;
6591 }
6592 const Teuchos::Comm<int>& comm = *pComm;
6593 const int myRank = comm.getSize ();
6594
6595 std::unique_ptr<std::string> prefix;
6596 if (verbose) {
6597 prefix = this->createPrefix("CrsMatrix", "packAndPrepare");
6598 std::ostringstream os;
6599 os << *prefix << "Start" << endl
6600 << *prefix << " "
6601 << dualViewStatusToString (exportLIDs, "exportLIDs")
6602 << endl
6603 << *prefix << " "
6604 << dualViewStatusToString (exports, "exports")
6605 << endl
6606 << *prefix << " "
6607 << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
6608 << endl;
6609 std::cerr << os.str ();
6610 }
6611
6612 // Attempt to cast the source object to CrsMatrix. If successful,
6613 // use the source object's packNew() method to pack its data for
6614 // communication. Otherwise, attempt to cast to RowMatrix; if
6615 // successful, use the source object's pack() method. Otherwise,
6616 // the source object doesn't have the right type.
6617 //
6618 // FIXME (mfh 30 Jun 2013, 11 Sep 2017) We don't even need the
6619 // RowMatrix to have the same Node type. Unfortunately, we don't
6620 // have a way to ask if the RowMatrix is "a RowMatrix with any
6621 // Node type," since RowMatrix doesn't have a base class. A
6622 // hypothetical RowMatrixBase<Scalar, LO, GO> class, which does
6623 // not currently exist, would satisfy this requirement.
6624 //
6625 // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
6626 // type doesn't technically need to match the target object's
6627 // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
6628 // and GO need not be the same, as long as there is no overflow of
6629 // the indices. However, checking for index overflow is global
6630 // and therefore undesirable.
6631
6632 std::ostringstream msg; // for collecting error messages
6633 int lclBad = 0; // to be set below
6634
6635 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6636 const crs_matrix_type* srcCrsMat =
6637 dynamic_cast<const crs_matrix_type*> (&source);
6638 if (srcCrsMat != nullptr) {
6639 if (verbose) {
6640 std::ostringstream os;
6641 os << *prefix << "Source matrix same (CrsMatrix) type as target; "
6642 "calling packNew" << endl;
6643 std::cerr << os.str ();
6644 }
6645 try {
6646 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6647 constantNumPackets);
6648 }
6649 catch (std::exception& e) {
6650 lclBad = 1;
6651 msg << "Proc " << myRank << ": " << e.what () << std::endl;
6652 }
6653 }
6654 else {
6655 using Kokkos::HostSpace;
6656 using Kokkos::subview;
6657 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6658 using range_type = Kokkos::pair<size_t, size_t>;
6659
6660 if (verbose) {
6661 std::ostringstream os;
6662 os << *prefix << "Source matrix NOT same (CrsMatrix) type as target"
6663 << endl;
6664 std::cerr << os.str ();
6665 }
6666
6667 const row_matrix_type* srcRowMat =
6668 dynamic_cast<const row_matrix_type*> (&source);
6669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6670 (srcRowMat == nullptr, std::invalid_argument,
6671 "The source object of the Import or Export operation is neither a "
6672 "CrsMatrix (with the same template parameters as the target object), "
6673 "nor a RowMatrix (with the same first four template parameters as the "
6674 "target object).");
6675
6676 // For the RowMatrix case, we need to convert from
6677 // Kokkos::DualView to Teuchos::Array*. This doesn't need to be
6678 // so terribly efficient, since packing a non-CrsMatrix
6679 // RowMatrix for Import/Export into a CrsMatrix is not a
6680 // critical case. Thus, we may allocate Teuchos::Array objects
6681 // here and copy to and from Kokkos::*View.
6682
6683 // View exportLIDs's host data as a Teuchos::ArrayView.
6684 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6685 auto exportLIDs_h = exportLIDs.view_host ();
6686 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6687 exportLIDs_h.size ());
6688
6689 // pack() will allocate exports_a as needed. We'll copy back
6690 // into exports (after (re)allocating exports if needed) below.
6691 Teuchos::Array<char> exports_a;
6692
6693 // View exportLIDs' host data as a Teuchos::ArrayView. We don't
6694 // need to sync, since we're doing write-only access, but we do
6695 // need to mark the DualView as modified on host.
6696
6697 numPacketsPerLID.clear_sync_state (); // write-only access
6698 numPacketsPerLID.modify_host ();
6699 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6700 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6701 numPacketsPerLID_h.size ());
6702
6703 // Invoke RowMatrix's legacy pack() interface, using above
6704 // Teuchos::Array* objects.
6705 try {
6706 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6707 constantNumPackets);
6708 }
6709 catch (std::exception& e) {
6710 lclBad = 1;
6711 msg << "Proc " << myRank << ": " << e.what () << std::endl;
6712 }
6713
6714 // Allocate 'exports', and copy exports_a back into it.
6715 const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
6716 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6717 const std::string oldLabel = exports.d_view.label ();
6718 const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
6719 exports = exports_type (newLabel, newAllocSize);
6720 }
6721 // It's safe to assume that we're working on host anyway, so
6722 // just keep exports sync'd to host.
6723 // ignore current device contents
6724 exports.modify_host();
6725
6726 auto exports_h = exports.view_host ();
6727 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6728
6729 // Kokkos::deep_copy needs a Kokkos::View input, so turn
6730 // exports_a into a nonowning Kokkos::View first before copying.
6731 typedef typename exports_type::t_host::execution_space HES;
6732 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6733 Kokkos::View<const char*, host_device_type>
6734 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6735 Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6736 }
6737
6738 if (debug) {
6739 int gblBad = 0; // output argument; to be set below
6740 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6741 if (gblBad != 0) {
6742 Tpetra::Details::gathervPrint (std::cerr, msg.str (), comm);
6743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6744 (true, std::logic_error, "packNew() or pack() threw an exception on "
6745 "one or more participating processes.");
6746 }
6747 }
6748 else {
6749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6750 (lclBad != 0, std::logic_error, "packNew threw an exception on one "
6751 "or more participating processes. Here is this process' error "
6752 "message: " << msg.str ());
6753 }
6754
6755 if (verbose) {
6756 std::ostringstream os;
6757 os << *prefix << "packAndPrepare: Done!" << endl
6758 << *prefix << " "
6759 << dualViewStatusToString (exportLIDs, "exportLIDs")
6760 << endl
6761 << *prefix << " "
6762 << dualViewStatusToString (exports, "exports")
6763 << endl
6764 << *prefix << " "
6765 << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
6766 << endl;
6767 std::cerr << os.str ();
6768 }
6769 }
6770
6771 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6772 size_t
6773 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6774 packRow (char exports[],
6775 const size_t offset,
6776 const size_t numEnt,
6777 const GlobalOrdinal gidsIn[],
6778 const impl_scalar_type valsIn[],
6779 const size_t numBytesPerValue) const
6780 {
6781 using Kokkos::View;
6782 using Kokkos::subview;
6784 typedef LocalOrdinal LO;
6785 typedef GlobalOrdinal GO;
6786 typedef impl_scalar_type ST;
6787
6788 if (numEnt == 0) {
6789 // Empty rows always take zero bytes, to ensure sparsity.
6790 return 0;
6791 }
6792
6793 const GO gid = 0; // packValueCount wants this
6794 const LO numEntLO = static_cast<size_t> (numEnt);
6795
6796 const size_t numEntBeg = offset;
6797 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
6798 const size_t gidsBeg = numEntBeg + numEntLen;
6799 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6800 const size_t valsBeg = gidsBeg + gidsLen;
6801 const size_t valsLen = numEnt * numBytesPerValue;
6802
6803 char* const numEntOut = exports + numEntBeg;
6804 char* const gidsOut = exports + gidsBeg;
6805 char* const valsOut = exports + valsBeg;
6806
6807 size_t numBytesOut = 0;
6808 int errorCode = 0;
6809 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
6810
6811 {
6812 Kokkos::pair<int, size_t> p;
6813 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6814 errorCode += p.first;
6815 numBytesOut += p.second;
6816
6817 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6818 errorCode += p.first;
6819 numBytesOut += p.second;
6820 }
6821
6822 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6823 TEUCHOS_TEST_FOR_EXCEPTION
6824 (numBytesOut != expectedNumBytes, std::logic_error, "packRow: "
6825 "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
6826 << expectedNumBytes << ".");
6827 TEUCHOS_TEST_FOR_EXCEPTION
6828 (errorCode != 0, std::runtime_error, "packRow: "
6829 "PackTraits::packArray returned a nonzero error code");
6830
6831 return numBytesOut;
6832 }
6833
6834 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6835 size_t
6836 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6837 unpackRow (GlobalOrdinal gidsOut[],
6838 impl_scalar_type valsOut[],
6839 const char imports[],
6840 const size_t offset,
6841 const size_t numBytes,
6842 const size_t numEnt,
6843 const size_t numBytesPerValue)
6844 {
6845 using Kokkos::View;
6846 using Kokkos::subview;
6848 typedef LocalOrdinal LO;
6849 typedef GlobalOrdinal GO;
6850 typedef impl_scalar_type ST;
6851
6852 Details::ProfilingRegion region_upack_row(
6853 "Tpetra::CrsMatrix::unpackRow",
6854 "Import/Export"
6855 );
6856
6857 if (numBytes == 0) {
6858 // Rows with zero bytes should always have zero entries.
6859 if (numEnt != 0) {
6860 const int myRank = this->getMap ()->getComm ()->getRank ();
6861 TEUCHOS_TEST_FOR_EXCEPTION
6862 (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
6863 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6864 "number of entries to unpack (as reported by numPacketsPerLID) "
6865 "for this row numEnt=" << numEnt << " != 0.");
6866 }
6867 return 0;
6868 }
6869
6870 if (numEnt == 0 && numBytes != 0) {
6871 const int myRank = this->getMap ()->getComm ()->getRank ();
6872 TEUCHOS_TEST_FOR_EXCEPTION
6873 (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
6874 "unpackRow: The number of entries to unpack (as reported by "
6875 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6876 "numBytes=" << numBytes << " != 0.");
6877 }
6878
6879 const GO gid = 0; // packValueCount wants this
6880 const LO lid = 0; // packValueCount wants this
6881
6882 const size_t numEntBeg = offset;
6883 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
6884 const size_t gidsBeg = numEntBeg + numEntLen;
6885 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6886 const size_t valsBeg = gidsBeg + gidsLen;
6887 const size_t valsLen = numEnt * numBytesPerValue;
6888
6889 const char* const numEntIn = imports + numEntBeg;
6890 const char* const gidsIn = imports + gidsBeg;
6891 const char* const valsIn = imports + valsBeg;
6892
6893 size_t numBytesOut = 0;
6894 int errorCode = 0;
6895 LO numEntOut;
6896 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
6897 if (static_cast<size_t> (numEntOut) != numEnt ||
6898 numEntOut == static_cast<LO> (0)) {
6899 const int myRank = this->getMap ()->getComm ()->getRank ();
6900 std::ostringstream os;
6901 os << "(Proc " << myRank << ") CrsMatrix::unpackRow: ";
6902 bool firstErrorCondition = false;
6903 if (static_cast<size_t> (numEntOut) != numEnt) {
6904 os << "Number of entries from numPacketsPerLID numEnt=" << numEnt
6905 << " does not equal number of entries unpacked from imports "
6906 "buffer numEntOut=" << numEntOut << ".";
6907 firstErrorCondition = true;
6908 }
6909 if (numEntOut == static_cast<LO> (0)) {
6910 if (firstErrorCondition) {
6911 os << " Also, ";
6912 }
6913 os << "Number of entries unpacked from imports buffer numEntOut=0, "
6914 "but number of bytes to unpack for this row numBytes=" << numBytes
6915 << " != 0. This should never happen, since packRow should only "
6916 "ever pack rows with a nonzero number of entries. In this case, "
6917 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6918 << ".";
6919 }
6920 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str ());
6921 }
6922
6923 {
6924 Kokkos::pair<int, size_t> p;
6925 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6926 errorCode += p.first;
6927 numBytesOut += p.second;
6928
6929 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6930 errorCode += p.first;
6931 numBytesOut += p.second;
6932 }
6933
6934 TEUCHOS_TEST_FOR_EXCEPTION
6935 (numBytesOut != numBytes, std::logic_error, "unpackRow: numBytesOut = "
6936 << numBytesOut << " != numBytes = " << numBytes << ".");
6937
6938 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6939 TEUCHOS_TEST_FOR_EXCEPTION
6940 (numBytesOut != expectedNumBytes, std::logic_error, "unpackRow: "
6941 "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
6942 << expectedNumBytes << ".");
6943
6944 TEUCHOS_TEST_FOR_EXCEPTION
6945 (errorCode != 0, std::runtime_error, "unpackRow: "
6946 "PackTraits::unpackArray returned a nonzero error code");
6947
6948 return numBytesOut;
6949 }
6950
6951 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6952 void
6953 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6954 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6955 size_t& totalNumEntries,
6956 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs) const
6957 {
6958 using Details::Behavior;
6960 using std::endl;
6961 typedef impl_scalar_type IST;
6962 typedef LocalOrdinal LO;
6963 typedef GlobalOrdinal GO;
6964 //const char tfecfFuncName[] = "allocatePackSpaceNew: ";
6965
6966 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
6967 // output to std::cerr on every MPI process. This is unwise for
6968 // runs with large numbers of MPI processes.
6969 const bool verbose = Behavior::verbose("CrsMatrix");
6970 std::unique_ptr<std::string> prefix;
6971 if (verbose) {
6972 prefix = this->createPrefix("CrsMatrix", "allocatePackSpaceNew");
6973 std::ostringstream os;
6974 os << *prefix << "Before:"
6975 << endl
6976 << *prefix << " "
6977 << dualViewStatusToString (exports, "exports")
6978 << endl
6979 << *prefix << " "
6980 << dualViewStatusToString (exportLIDs, "exportLIDs")
6981 << endl;
6982 std::cerr << os.str ();
6983 }
6984
6985 // The number of export LIDs must fit in LocalOrdinal, assuming
6986 // that the LIDs are distinct and valid on the calling process.
6987 const LO numExportLIDs = static_cast<LO> (exportLIDs.extent (0));
6988
6989 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6990 auto exportLIDs_h = exportLIDs.view_host ();
6991
6992 // Count the total number of matrix entries to send.
6993 totalNumEntries = 0;
6994 for (LO i = 0; i < numExportLIDs; ++i) {
6995 const LO lclRow = exportLIDs_h[i];
6996 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6997 // FIXME (mfh 25 Jan 2015) We should actually report invalid row
6998 // indices as an error. Just consider them nonowned for now.
6999 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
7000 curNumEntries = 0;
7001 }
7002 totalNumEntries += curNumEntries;
7003 }
7004
7005 // FIXME (mfh 24 Feb 2013, 24 Mar 2017) This code is only correct
7006 // if sizeof(IST) is a meaningful representation of the amount of
7007 // data in a Scalar instance. (LO and GO are always built-in
7008 // integer types.)
7009 //
7010 // Allocate the exports array. It does NOT need padding for
7011 // alignment, since we use memcpy to write to / read from send /
7012 // receive buffers.
7013 const size_t allocSize =
7014 static_cast<size_t> (numExportLIDs) * sizeof (LO) +
7015 totalNumEntries * (sizeof (IST) + sizeof (GO));
7016 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
7017 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
7018
7019 const std::string oldLabel = exports.d_view.label ();
7020 const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7021 exports = exports_type (newLabel, allocSize);
7022 }
7023
7024 if (verbose) {
7025 std::ostringstream os;
7026 os << *prefix << "After:"
7027 << endl
7028 << *prefix << " "
7029 << dualViewStatusToString (exports, "exports")
7030 << endl
7031 << *prefix << " "
7032 << dualViewStatusToString (exportLIDs, "exportLIDs")
7033 << endl;
7034 std::cerr << os.str ();
7035 }
7036 }
7037
7038 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7039 void
7041 packNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7042 Kokkos::DualView<char*, buffer_device_type>& exports,
7043 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7044 size_t& constantNumPackets) const
7045 {
7046 // The call to packNew in packAndPrepare catches and handles any exceptions.
7047 Details::ProfilingRegion region_pack_new("Tpetra::CrsMatrix::packNew", "Import/Export");
7048 if (this->isStaticGraph ()) {
7050 packCrsMatrixNew (*this, exports, numPacketsPerLID, exportLIDs,
7051 constantNumPackets);
7052 }
7053 else {
7054 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
7055 constantNumPackets);
7056 }
7057 }
7058
7059 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7060 void
7062 packNonStaticNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7063 Kokkos::DualView<char*, buffer_device_type>& exports,
7064 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7065 size_t& constantNumPackets) const
7066 {
7067 using Details::Behavior;
7069 using Details::PackTraits;
7071 using Kokkos::View;
7072 using std::endl;
7073 using LO = LocalOrdinal;
7074 using GO = GlobalOrdinal;
7075 using ST = impl_scalar_type;
7076 const char tfecfFuncName[] = "packNonStaticNew: ";
7077
7078 const bool verbose = Behavior::verbose("CrsMatrix");
7079 std::unique_ptr<std::string> prefix;
7080 if (verbose) {
7081 prefix = this->createPrefix("CrsMatrix", "packNonStaticNew");
7082 std::ostringstream os;
7083 os << *prefix << "Start" << endl;
7084 std::cerr << os.str ();
7085 }
7086
7087 const size_t numExportLIDs = static_cast<size_t> (exportLIDs.extent (0));
7088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7089 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
7090 std::invalid_argument, "exportLIDs.size() = " << numExportLIDs
7091 << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7092 << ".");
7093
7094 // Setting this to zero tells the caller to expect a possibly
7095 // different ("nonconstant") number of packets per local index
7096 // (i.e., a possibly different number of entries per row).
7097 constantNumPackets = 0;
7098
7099 // The pack buffer 'exports' enters this method possibly
7100 // unallocated. Do the first two parts of "Count, allocate, fill,
7101 // compute."
7102 size_t totalNumEntries = 0;
7103 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
7104 const size_t bufSize = static_cast<size_t> (exports.extent (0));
7105
7106 // Write-only host access
7107 exports.clear_sync_state();
7108 exports.modify_host();
7109 auto exports_h = exports.view_host ();
7110 if (verbose) {
7111 std::ostringstream os;
7112 os << *prefix << "After marking exports as modified on host, "
7113 << dualViewStatusToString (exports, "exports") << endl;
7114 std::cerr << os.str ();
7115 }
7116
7117 // Read-only host access
7118 auto exportLIDs_h = exportLIDs.view_host ();
7119
7120 // Write-only host access
7121 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->clear_sync_state();
7122 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->modify_host();
7123 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7124
7125 // Compute the number of "packets" (in this case, bytes) per
7126 // export LID (in this case, local index of the row to send), and
7127 // actually pack the data.
7128 auto maxRowNumEnt = this->getNodeMaxNumRowEntries();
7129
7130
7131 // Temporary buffer for global column indices.
7132 typename global_inds_host_view_type::non_const_type gidsIn_k;
7133 if (this->isLocallyIndexed()) { // Need storage for Global IDs
7134 gidsIn_k =
7135 typename global_inds_host_view_type::non_const_type("packGids",
7136 maxRowNumEnt);
7137 }
7138
7139 size_t offset = 0; // current index into 'exports' array.
7140 for (size_t i = 0; i < numExportLIDs; ++i) {
7141 const LO lclRow = exportLIDs_h[i];
7142
7143 size_t numBytes;
7144 size_t numEnt;
7145 numEnt = this->getNumEntriesInLocalRow (lclRow);
7146
7147 // Only pack this row's data if it has a nonzero number of
7148 // entries. We can do this because receiving processes get the
7149 // number of packets, and will know that zero packets means zero
7150 // entries.
7151 if (numEnt == 0) {
7152 numPacketsPerLID_h[i] = 0;
7153 continue;
7154 }
7155
7156 if (this->isLocallyIndexed ()) {
7157 typename global_inds_host_view_type::non_const_type gidsIn;
7158 values_host_view_type valsIn;
7159 // If the matrix is locally indexed on the calling process, we
7160 // have to use its column Map (which it _must_ have in this
7161 // case) to convert to global indices.
7162 local_inds_host_view_type lidsIn;
7163 this->getLocalRowView (lclRow, lidsIn, valsIn);
7164 const map_type& colMap = * (this->getColMap ());
7165 for (size_t k = 0; k < numEnt; ++k) {
7166 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
7167 }
7168 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
7169
7170 const size_t numBytesPerValue =
7171 PackTraits<ST>::packValueCount (valsIn[0]);
7172 numBytes = this->packRow (exports_h.data (), offset, numEnt,
7173 gidsIn.data (), valsIn.data (),
7174 numBytesPerValue);
7175 }
7176 else if (this->isGloballyIndexed ()) {
7177 global_inds_host_view_type gidsIn;
7178 values_host_view_type valsIn;
7179 // If the matrix is globally indexed on the calling process,
7180 // then we can use the column indices directly. However, we
7181 // have to get the global row index. The calling process must
7182 // have a row Map, since otherwise it shouldn't be participating
7183 // in packing operations.
7184 const map_type& rowMap = * (this->getRowMap ());
7185 const GO gblRow = rowMap.getGlobalElement (lclRow);
7186 this->getGlobalRowView (gblRow, gidsIn, valsIn);
7187
7188 const size_t numBytesPerValue =
7189 PackTraits<ST>::packValueCount (valsIn[0]);
7190 numBytes = this->packRow (exports_h.data (), offset, numEnt,
7191 gidsIn.data (), valsIn.data (),
7192 numBytesPerValue);
7193 }
7194 // mfh 11 Sep 2017: Currently, if the matrix is neither globally
7195 // nor locally indexed, then it has no entries. Therefore,
7196 // there is nothing to pack. No worries!
7197
7198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7199 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
7200 "First invalid offset into 'exports' pack buffer at index i = " << i
7201 << ". exportLIDs_h[i]: " << exportLIDs_h[i] << ", bufSize: " <<
7202 bufSize << ", offset: " << offset << ", numBytes: " << numBytes <<
7203 ".");
7204 // numPacketsPerLID_h[i] is the number of "packets" in the
7205 // current local row i. Packet=char (really "byte") so use the
7206 // number of bytes of the packed data for that row.
7207 numPacketsPerLID_h[i] = numBytes;
7208 offset += numBytes;
7209 }
7210
7211 if (verbose) {
7212 std::ostringstream os;
7213 os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
7214 << *prefix << " "
7215 << dualViewStatusToString (exports, "exports")
7216 << endl
7217 << *prefix << " "
7218 << dualViewStatusToString (exportLIDs, "exportLIDs")
7219 << endl;
7220 std::cerr << os.str ();
7221 }
7222 }
7223
7224 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7225 LocalOrdinal
7226 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7227 combineGlobalValuesRaw(const LocalOrdinal lclRow,
7228 const LocalOrdinal numEnt,
7229 const impl_scalar_type vals[],
7230 const GlobalOrdinal cols[],
7231 const Tpetra::CombineMode combMode,
7232 const char* const prefix,
7233 const bool debug,
7234 const bool verbose)
7235 {
7236 using GO = GlobalOrdinal;
7237
7238 // mfh 23 Mar 2017: This branch is not thread safe in a debug
7239 // build, due to use of Teuchos::ArrayView; see #229.
7240 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
7241 Teuchos::ArrayView<const GO> cols_av
7242 (numEnt == 0 ? nullptr : cols, numEnt);
7243 Teuchos::ArrayView<const Scalar> vals_av
7244 (numEnt == 0 ? nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
7245
7246 // FIXME (mfh 23 Mar 2017) This is a work-around for less common
7247 // combine modes. combineGlobalValues throws on error; it does
7248 // not return an error code. Thus, if it returns, it succeeded.
7249 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
7250 prefix, debug, verbose);
7251 return numEnt;
7252 }
7253
7254 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7255 void
7256 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7257 combineGlobalValues(
7258 const GlobalOrdinal globalRowIndex,
7259 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
7260 const Teuchos::ArrayView<const Scalar>& values,
7261 const Tpetra::CombineMode combineMode,
7262 const char* const prefix,
7263 const bool debug,
7264 const bool verbose)
7265 {
7266 const char tfecfFuncName[] = "combineGlobalValues: ";
7267
7268 if (isStaticGraph ()) {
7269 // INSERT doesn't make sense for a static graph, since you
7270 // aren't allowed to change the structure of the graph.
7271 // However, all the other combine modes work.
7272 if (combineMode == ADD) {
7273 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
7274 }
7275 else if (combineMode == REPLACE) {
7276 replaceGlobalValues (globalRowIndex, columnIndices, values);
7277 }
7278 else if (combineMode == ABSMAX) {
7279 using ::Tpetra::Details::AbsMax;
7280 AbsMax<Scalar> f;
7281 this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
7282 columnIndices,
7283 values, f);
7284 }
7285 else if (combineMode == INSERT) {
7286 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7287 (isStaticGraph() && combineMode == INSERT,
7288 std::invalid_argument, "INSERT combine mode is forbidden "
7289 "if the matrix has a static (const) graph (i.e., was "
7290 "constructed with the CrsMatrix constructor that takes a "
7291 "const CrsGraph pointer).");
7292 }
7293 else {
7294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7295 (true, std::logic_error, "Invalid combine mode; should "
7296 "never get here! "
7297 "Please report this bug to the Tpetra developers.");
7298 }
7299 }
7300 else { // The matrix has a dynamic graph.
7301 if (combineMode == ADD || combineMode == INSERT) {
7302 // For a dynamic graph, all incoming column indices are
7303 // inserted into the target graph. Duplicate indices will
7304 // have their values summed. In this context, ADD and INSERT
7305 // are equivalent. We need to call insertGlobalValues()
7306 // anyway if the column indices don't yet exist in this row,
7307 // so we just call insertGlobalValues() for both cases.
7308 insertGlobalValuesFilteredChecked(globalRowIndex,
7309 columnIndices, values, prefix, debug, verbose);
7310 }
7311 // FIXME (mfh 14 Mar 2012):
7312 //
7313 // Implementing ABSMAX or REPLACE for a dynamic graph would
7314 // require modifying assembly to attach a possibly different
7315 // combine mode to each inserted (i, j, A_ij) entry. For
7316 // example, consider two different Export operations to the same
7317 // target CrsMatrix, the first with ABSMAX combine mode and the
7318 // second with REPLACE. This isn't a common use case, so we
7319 // won't mess with it for now.
7320 else if (combineMode == ABSMAX) {
7321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7322 ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
7323 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
7324 "implemented.");
7325 }
7326 else if (combineMode == REPLACE) {
7327 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7328 ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
7329 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
7330 "implemented.");
7331 }
7332 else {
7333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7334 true, std::logic_error, "Should never get here! Please report this "
7335 "bug to the Tpetra developers.");
7336 }
7337 }
7338 }
7339
7340 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7341 void
7344 (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
7345 Kokkos::DualView<char*, buffer_device_type> imports,
7346 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7347 const size_t constantNumPackets,
7348 const CombineMode combineMode)
7349 {
7350 using Details::Behavior;
7353 using std::endl;
7354 const char tfecfFuncName[] = "unpackAndCombine: ";
7355 ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombine");
7356
7357 const bool debug = Behavior::debug("CrsMatrix");
7358 const bool verbose = Behavior::verbose("CrsMatrix");
7359 constexpr int numValidModes = 5;
7360 const CombineMode validModes[numValidModes] =
7362 const char* validModeNames[numValidModes] =
7363 {"ADD", "REPLACE", "ABSMAX", "INSERT", "ZERO"};
7364
7365 std::unique_ptr<std::string> prefix;
7366 if (verbose) {
7367 prefix = this->createPrefix("CrsMatrix", "unpackAndCombine");
7368 std::ostringstream os;
7369 os << *prefix << "Start:" << endl
7370 << *prefix << " "
7371 << dualViewStatusToString (importLIDs, "importLIDs")
7372 << endl
7373 << *prefix << " "
7374 << dualViewStatusToString (imports, "imports")
7375 << endl
7376 << *prefix << " "
7377 << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7378 << endl
7379 << *prefix << " constantNumPackets: " << constantNumPackets
7380 << endl
7381 << *prefix << " combineMode: " << combineModeToString (combineMode)
7382 << endl;
7383 std::cerr << os.str ();
7384 }
7385
7386 if (debug) {
7387 if (std::find (validModes, validModes+numValidModes, combineMode) ==
7388 validModes+numValidModes) {
7389 std::ostringstream os;
7390 os << "Invalid combine mode. Valid modes are {";
7391 for (int k = 0; k < numValidModes; ++k) {
7392 os << validModeNames[k];
7393 if (k < numValidModes - 1) {
7394 os << ", ";
7395 }
7396 }
7397 os << "}.";
7398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7399 (true, std::invalid_argument, os.str ());
7400 }
7401 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7402 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
7403 std::invalid_argument, "importLIDs.extent(0)="
7404 << importLIDs.extent(0)
7405 << " != numPacketsPerLID.extent(0)="
7406 << numPacketsPerLID.extent(0) << ".");
7407 }
7408
7409 if (combineMode == ZERO) {
7410 return; // nothing to do
7411 }
7412
7413 if (debug) {
7414 using Teuchos::reduceAll;
7415 std::unique_ptr<std::ostringstream> msg (new std::ostringstream ());
7416 int lclBad = 0;
7417 try {
7418 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7419 constantNumPackets, combineMode,
7420 verbose);
7421 } catch (std::exception& e) {
7422 lclBad = 1;
7423 *msg << e.what ();
7424 }
7425 int gblBad = 0;
7426 const Teuchos::Comm<int>& comm = * (this->getComm ());
7427 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
7428 lclBad, Teuchos::outArg (gblBad));
7429 if (gblBad != 0) {
7430 // mfh 22 Oct 2017: 'prefix' might be null, since it is only
7431 // initialized in a debug build. Thus, we get the process
7432 // rank again here. This is an error message, so the small
7433 // run-time cost doesn't matter. See #1887.
7434 std::ostringstream os;
7435 os << "Proc " << comm.getRank () << ": " << msg->str () << endl;
7436 msg = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
7437 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7439 (true, std::logic_error, std::endl << "unpackAndCombineImpl "
7440 "threw an exception on one or more participating processes: "
7441 << endl << msg->str ());
7442 }
7443 }
7444 else {
7445 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7446 constantNumPackets, combineMode,
7447 verbose);
7448 }
7449
7450 if (verbose) {
7451 std::ostringstream os;
7452 os << *prefix << "Done!" << endl
7453 << *prefix << " "
7454 << dualViewStatusToString (importLIDs, "importLIDs")
7455 << endl
7456 << *prefix << " "
7457 << dualViewStatusToString (imports, "imports")
7458 << endl
7459 << *prefix << " "
7460 << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7461 << endl;
7462 std::cerr << os.str ();
7463 }
7464 }
7465
7466 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7467 void
7470 const Kokkos::DualView<const local_ordinal_type*,
7471 buffer_device_type>& importLIDs,
7472 Kokkos::DualView<char*, buffer_device_type> imports,
7473 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7474 const size_t constantNumPackets,
7475 const CombineMode combineMode,
7476 const bool verbose)
7477 {
7478 Details::ProfilingRegion region_unpack_and_combine_impl(
7479 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7480 "Import/Export"
7481 );
7482 using std::endl;
7483 const char tfecfFuncName[] = "unpackAndCombineImpl";
7484 std::unique_ptr<std::string> prefix;
7485 if (verbose) {
7486 prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
7487 std::ostringstream os;
7488 os << *prefix << "isStaticGraph(): "
7489 << (isStaticGraph() ? "true" : "false")
7490 << ", importLIDs.extent(0): "
7491 << importLIDs.extent(0)
7492 << ", imports.extent(0): "
7493 << imports.extent(0)
7494 << ", numPacketsPerLID.extent(0): "
7495 << numPacketsPerLID.extent(0)
7496 << endl;
7497 std::cerr << os.str();
7498 }
7499
7500 if (isStaticGraph ()) {
7501 using Details::unpackCrsMatrixAndCombineNew;
7502 unpackCrsMatrixAndCombineNew(*this, imports, numPacketsPerLID,
7503 importLIDs, constantNumPackets,
7504 combineMode);
7505 }
7506 else {
7507 {
7508 using padding_type = typename crs_graph_type::padding_type;
7509 std::unique_ptr<padding_type> padding;
7510 try {
7511 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7512 importLIDs, imports, numPacketsPerLID, verbose);
7513 }
7514 catch (std::exception& e) {
7515 const auto rowMap = getRowMap();
7516 const auto comm = rowMap.is_null() ? Teuchos::null :
7517 rowMap->getComm();
7518 const int myRank = comm.is_null() ? -1 : comm->getRank();
7519 TEUCHOS_TEST_FOR_EXCEPTION
7520 (true, std::runtime_error, "Proc " << myRank << ": "
7521 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7522 "threw an exception: " << e.what());
7523 }
7524 if (verbose) {
7525 std::ostringstream os;
7526 os << *prefix << "Call applyCrsPadding" << endl;
7527 std::cerr << os.str();
7528 }
7529 applyCrsPadding(*padding, verbose);
7530 }
7531 if (verbose) {
7532 std::ostringstream os;
7533 os << *prefix << "Call unpackAndCombineImplNonStatic" << endl;
7534 std::cerr << os.str();
7535 }
7536 unpackAndCombineImplNonStatic(importLIDs, imports,
7537 numPacketsPerLID,
7538 constantNumPackets,
7539 combineMode);
7540 }
7541
7542 if (verbose) {
7543 std::ostringstream os;
7544 os << *prefix << "Done" << endl;
7545 std::cerr << os.str();
7546 }
7547 }
7548
7549 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7550 void
7551 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7552 unpackAndCombineImplNonStatic(
7553 const Kokkos::DualView<const local_ordinal_type*,
7554 buffer_device_type>& importLIDs,
7555 Kokkos::DualView<char*, buffer_device_type> imports,
7556 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7557 const size_t constantNumPackets,
7558 const CombineMode combineMode)
7559 {
7560 using Kokkos::View;
7561 using Kokkos::subview;
7562 using Kokkos::MemoryUnmanaged;
7563 using Details::Behavior;
7566 using Details::PackTraits;
7567 using Details::ScalarViewTraits;
7568 using std::endl;
7569 using LO = LocalOrdinal;
7570 using GO = GlobalOrdinal;
7571 using ST = impl_scalar_type;
7572 using size_type = typename Teuchos::ArrayView<LO>::size_type;
7573 using HES =
7574 typename View<int*, device_type>::HostMirror::execution_space;
7575 using pair_type = std::pair<typename View<int*, HES>::size_type,
7576 typename View<int*, HES>::size_type>;
7577 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7578 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7579 const char tfecfFuncName[] = "unpackAndCombineImplNonStatic";
7580
7581 const bool debug = Behavior::debug("CrsMatrix");
7582 const bool verbose = Behavior::verbose("CrsMatrix");
7583 std::unique_ptr<std::string> prefix;
7584 if (verbose) {
7585 prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
7586 std::ostringstream os;
7587 os << *prefix << endl; // we've already printed DualViews' statuses
7588 std::cerr << os.str ();
7589 }
7590 const char* const prefix_raw =
7591 verbose ? prefix.get()->c_str() : nullptr;
7592
7593 const size_type numImportLIDs = importLIDs.extent (0);
7594 if (combineMode == ZERO || numImportLIDs == 0) {
7595 return; // nothing to do; no need to combine entries
7596 }
7597
7598 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7599 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7600 "Import/Export"
7601 );
7602
7603 // We're unpacking on host. This is read-only host access.
7604 if (imports.need_sync_host()) {
7605 imports.sync_host ();
7606 }
7607 auto imports_h = imports.view_host();
7608
7609 // Read-only host access.
7610 if (numPacketsPerLID.need_sync_host()) {
7611 numPacketsPerLID.sync_host ();
7612 }
7613 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7614
7615 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7616 auto importLIDs_h = importLIDs.view_host();
7617
7618 size_t numBytesPerValue;
7619 {
7620 // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7621 // with run-time size? We already assume that all entries in both the
7622 // source and target matrices have the same size. If the calling process
7623 // owns at least one entry in either matrix, we can use that entry to set
7624 // the size. However, it is possible that the calling process owns no
7625 // entries. In that case, we're in trouble. One way to fix this would be
7626 // for each row's data to contain the run-time size. This is only
7627 // necessary if the size is not a compile-time constant.
7628 Scalar val;
7629 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7630 }
7631
7632 // Determine the maximum number of entries in any one row
7633 size_t offset = 0;
7634 size_t maxRowNumEnt = 0;
7635 for (size_type i = 0; i < numImportLIDs; ++i) {
7636 const size_t numBytes = numPacketsPerLID_h[i];
7637 if (numBytes == 0) {
7638 continue; // empty buffer for that row means that the row is empty
7639 }
7640 // We need to unpack a nonzero number of entries for this row.
7641 if (debug) {
7642 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7643 (offset + numBytes > size_t(imports_h.extent (0)),
7644 std::logic_error, ": At local row index importLIDs_h[i="
7645 << i << "]=" << importLIDs_h[i] << ", offset (=" << offset
7646 << ") + numBytes (=" << numBytes << ") > "
7647 "imports_h.extent(0)=" << imports_h.extent (0) << ".");
7648 }
7649 LO numEntLO = 0;
7650
7651 if (debug) {
7652 const size_t theNumBytes =
7654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7655 (theNumBytes > numBytes, std::logic_error, ": theNumBytes="
7656 << theNumBytes << " > numBytes = " << numBytes << ".");
7657 }
7658 const char* const inBuf = imports_h.data () + offset;
7659 const size_t actualNumBytes =
7660 PackTraits<LO>::unpackValue (numEntLO, inBuf);
7661
7662 if (debug) {
7663 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7664 (actualNumBytes > numBytes, std::logic_error, ": At i=" << i
7665 << ", actualNumBytes=" << actualNumBytes
7666 << " > numBytes=" << numBytes << ".");
7667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7668 (numEntLO == 0, std::logic_error, ": At local row index "
7669 "importLIDs_h[i=" << i << "]=" << importLIDs_h[i] << ", "
7670 "the number of entries read from the packed data is "
7671 "numEntLO=" << numEntLO << ", but numBytes=" << numBytes
7672 << " != 0.");
7673 }
7674
7675 maxRowNumEnt = std::max(size_t(numEntLO), maxRowNumEnt);
7676 offset += numBytes;
7677 }
7678
7679 // Temporary space to cache incoming global column indices and
7680 // values. Column indices come in as global indices, in case the
7681 // source object's column Map differs from the target object's
7682 // (this's) column Map.
7683 View<GO*, HES> gblColInds;
7684 View<LO*, HES> lclColInds;
7685 View<ST*, HES> vals;
7686 {
7687 GO gid = 0;
7688 LO lid = 0;
7689 // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7690 // with run-time size? We already assume that all entries in both the
7691 // source and target matrices have the same size. If the calling process
7692 // owns at least one entry in either matrix, we can use that entry to set
7693 // the size. However, it is possible that the calling process owns no
7694 // entries. In that case, we're in trouble. One way to fix this would be
7695 // for each row's data to contain the run-time size. This is only
7696 // necessary if the size is not a compile-time constant.
7697 Scalar val;
7698 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7699 gid, maxRowNumEnt, "gids");
7700 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7701 lid, maxRowNumEnt, "lids");
7702 vals = ScalarViewTraits<ST, HES>::allocateArray(
7703 val, maxRowNumEnt, "vals");
7704 }
7705
7706 offset = 0;
7707 for (size_type i = 0; i < numImportLIDs; ++i) {
7708 const size_t numBytes = numPacketsPerLID_h[i];
7709 if (numBytes == 0) {
7710 continue; // empty buffer for that row means that the row is empty
7711 }
7712 LO numEntLO = 0;
7713 const char* const inBuf = imports_h.data () + offset;
7714 (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
7715
7716 const size_t numEnt = static_cast<size_t>(numEntLO);;
7717 const LO lclRow = importLIDs_h[i];
7718
7719 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7720 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7721
7722 const size_t numBytesOut =
7723 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7724 offset, numBytes, numEnt, numBytesPerValue);
7725 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7726 (numBytes != numBytesOut, std::logic_error, ": At i=" << i
7727 << ", numBytes=" << numBytes << " != numBytesOut="
7728 << numBytesOut << ".");
7729
7730 const ST* const valsRaw = const_cast<const ST*> (valsOut.data ());
7731 const GO* const gidsRaw = const_cast<const GO*> (gidsOut.data ());
7732 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7733 combineMode, prefix_raw, debug, verbose);
7734 // Don't update offset until current LID has succeeded.
7735 offset += numBytes;
7736 } // for each import LID i
7737
7738 if (verbose) {
7739 std::ostringstream os;
7740 os << *prefix << "Done" << endl;
7741 std::cerr << os.str();
7742 }
7743 }
7744
7745 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7746 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7748 getColumnMapMultiVector (const MV& X_domainMap,
7749 const bool force) const
7750 {
7751 using Teuchos::null;
7752 using Teuchos::RCP;
7753 using Teuchos::rcp;
7754
7755 TEUCHOS_TEST_FOR_EXCEPTION(
7756 ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
7757 "MapMultiVector: You may only call this method if the matrix has a "
7758 "column Map. If the matrix does not yet have a column Map, you should "
7759 "first call fillComplete (with domain and range Map if necessary).");
7760
7761 // If the graph is not fill complete, then the Import object (if
7762 // one should exist) hasn't been constructed yet.
7763 TEUCHOS_TEST_FOR_EXCEPTION(
7764 ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
7765 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7766 "this matrix's graph is fill complete.");
7767
7768 const size_t numVecs = X_domainMap.getNumVectors ();
7769 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7770 RCP<const map_type> colMap = this->getColMap ();
7771
7772 RCP<MV> X_colMap; // null by default
7773
7774 // If the Import object is trivial (null), then we don't need a
7775 // separate column Map multivector. Just return null in that
7776 // case. The caller is responsible for knowing not to use the
7777 // returned null pointer.
7778 //
7779 // If the Import is nontrivial, then we do need a separate
7780 // column Map multivector for the Import operation. Check in
7781 // that case if we have to (re)create the column Map
7782 // multivector.
7783 if (! importer.is_null () || force) {
7784 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7785 X_colMap = rcp (new MV (colMap, numVecs));
7786
7787 // Cache the newly created multivector for later reuse.
7788 importMV_ = X_colMap;
7789 }
7790 else { // Yay, we can reuse the cached multivector!
7791 X_colMap = importMV_;
7792 // mfh 09 Jan 2013: We don't have to fill with zeros first,
7793 // because the Import uses INSERT combine mode, which overwrites
7794 // existing entries.
7795 //
7796 //X_colMap->putScalar (ZERO);
7797 }
7798 }
7799 return X_colMap;
7800 }
7801
7802 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7803 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7806 const bool force) const
7807 {
7808 using Teuchos::null;
7809 using Teuchos::RCP;
7810 using Teuchos::rcp;
7811
7812 // If the graph is not fill complete, then the Export object (if
7813 // one should exist) hasn't been constructed yet.
7814 TEUCHOS_TEST_FOR_EXCEPTION(
7815 ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
7816 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7817 "matrix's graph is fill complete.");
7818
7819 const size_t numVecs = Y_rangeMap.getNumVectors ();
7820 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7821 // Every version of the constructor takes either a row Map, or a
7822 // graph (all of whose constructors take a row Map). Thus, the
7823 // matrix always has a row Map.
7824 RCP<const map_type> rowMap = this->getRowMap ();
7825
7826 RCP<MV> Y_rowMap; // null by default
7827
7828 // If the Export object is trivial (null), then we don't need a
7829 // separate row Map multivector. Just return null in that case.
7830 // The caller is responsible for knowing not to use the returned
7831 // null pointer.
7832 //
7833 // If the Export is nontrivial, then we do need a separate row
7834 // Map multivector for the Export operation. Check in that case
7835 // if we have to (re)create the row Map multivector.
7836 if (! exporter.is_null () || force) {
7837 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7838 Y_rowMap = rcp (new MV (rowMap, numVecs));
7839 exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
7840 }
7841 else { // Yay, we can reuse the cached multivector!
7842 Y_rowMap = exportMV_;
7843 }
7844 }
7845 return Y_rowMap;
7846 }
7847
7848 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7849 void
7851 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
7852 {
7853 TEUCHOS_TEST_FOR_EXCEPTION(
7854 myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
7855 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7856 "was created with a constant graph (that is, when it was created using "
7857 "the version of its constructor that takes an RCP<const CrsGraph>). "
7858 "This is because the matrix is not allowed to modify the graph in that "
7859 "case, but removing empty processes requires modifying the graph.");
7860 myGraph_->removeEmptyProcessesInPlace (newMap);
7861 // Even though CrsMatrix's row Map (as returned by getRowMap())
7862 // comes from its CrsGraph, CrsMatrix still implements DistObject,
7863 // so we also have to change the DistObject's Map.
7864 this->map_ = this->getRowMap ();
7865 // In the nonconst graph case, staticGraph_ is just a const
7866 // pointer to myGraph_. This assignment is probably redundant,
7867 // but it doesn't hurt.
7868 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7869 }
7870
7871 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7872 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7874 add (const Scalar& alpha,
7876 const Scalar& beta,
7877 const Teuchos::RCP<const map_type>& domainMap,
7878 const Teuchos::RCP<const map_type>& rangeMap,
7879 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7880 {
7881 using Teuchos::Array;
7882 using Teuchos::ArrayView;
7883 using Teuchos::ParameterList;
7884 using Teuchos::RCP;
7885 using Teuchos::rcp;
7886 using Teuchos::rcp_implicit_cast;
7887 using Teuchos::sublist;
7888 using std::endl;
7889 using LO = local_ordinal_type;
7890 using GO = global_ordinal_type;
7891 using crs_matrix_type =
7893 const char errPfx[] = "Tpetra::CrsMatrix::add: ";
7894
7895 const bool debug = Details::Behavior::debug("CrsMatrix");
7896 const bool verbose = Details::Behavior::verbose("CrsMatrix");
7897 std::unique_ptr<std::string> prefix;
7898 if (verbose) {
7899 prefix = this->createPrefix("CrsMatrix", "add");
7900 std::ostringstream os;
7901 os << *prefix << "Start" << endl;
7902 std::cerr << os.str ();
7903 }
7904
7905 const crs_matrix_type& B = *this; // a convenient abbreviation
7906 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7907 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7908
7909 // If the user didn't supply a domain or range Map, then try to
7910 // get one from B first (if it has them), then from A (if it has
7911 // them). If we don't have any domain or range Maps, scold the
7912 // user.
7913 RCP<const map_type> A_domainMap = A.getDomainMap ();
7914 RCP<const map_type> A_rangeMap = A.getRangeMap ();
7915 RCP<const map_type> B_domainMap = B.getDomainMap ();
7916 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7917
7918 RCP<const map_type> theDomainMap = domainMap;
7919 RCP<const map_type> theRangeMap = rangeMap;
7920
7921 if (domainMap.is_null ()) {
7922 if (B_domainMap.is_null ()) {
7923 TEUCHOS_TEST_FOR_EXCEPTION(
7924 A_domainMap.is_null (), std::invalid_argument,
7925 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7926 "then you must supply a nonnull domain Map to this method.");
7927 theDomainMap = A_domainMap;
7928 } else {
7929 theDomainMap = B_domainMap;
7930 }
7931 }
7932 if (rangeMap.is_null ()) {
7933 if (B_rangeMap.is_null ()) {
7934 TEUCHOS_TEST_FOR_EXCEPTION(
7935 A_rangeMap.is_null (), std::invalid_argument,
7936 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7937 "then you must supply a nonnull range Map to this method.");
7938 theRangeMap = A_rangeMap;
7939 } else {
7940 theRangeMap = B_rangeMap;
7941 }
7942 }
7943
7944 if (debug) {
7945 // In debug mode, check that A and B have matching domain and
7946 // range Maps, if they have domain and range Maps at all. (If
7947 // they aren't fill complete, then they may not yet have them.)
7948 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7949 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7950 TEUCHOS_TEST_FOR_EXCEPTION
7951 (! B_domainMap->isSameAs(*A_domainMap),
7952 std::invalid_argument,
7953 errPfx << "The input RowMatrix A must have a domain Map "
7954 "which is the same as (isSameAs) this RowMatrix's "
7955 "domain Map.");
7956 TEUCHOS_TEST_FOR_EXCEPTION
7957 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7958 errPfx << "The input RowMatrix A must have a range Map "
7959 "which is the same as (isSameAs) this RowMatrix's range "
7960 "Map.");
7961 TEUCHOS_TEST_FOR_EXCEPTION
7962 (! domainMap.is_null() &&
7963 ! domainMap->isSameAs(*B_domainMap),
7964 std::invalid_argument,
7965 errPfx << "The input domain Map must be the same as "
7966 "(isSameAs) this RowMatrix's domain Map.");
7967 TEUCHOS_TEST_FOR_EXCEPTION
7968 (! rangeMap.is_null() &&
7969 ! rangeMap->isSameAs(*B_rangeMap),
7970 std::invalid_argument,
7971 errPfx << "The input range Map must be the same as "
7972 "(isSameAs) this RowMatrix's range Map.");
7973 }
7974 }
7975 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7976 TEUCHOS_TEST_FOR_EXCEPTION
7977 (! domainMap.is_null() &&
7978 ! domainMap->isSameAs(*B_domainMap),
7979 std::invalid_argument,
7980 errPfx << "The input domain Map must be the same as "
7981 "(isSameAs) this RowMatrix's domain Map.");
7982 TEUCHOS_TEST_FOR_EXCEPTION
7983 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7984 std::invalid_argument,
7985 errPfx << "The input range Map must be the same as "
7986 "(isSameAs) this RowMatrix's range Map.");
7987 }
7988 else {
7989 TEUCHOS_TEST_FOR_EXCEPTION
7990 (domainMap.is_null() || rangeMap.is_null(),
7991 std::invalid_argument, errPfx << "If neither A nor B "
7992 "have a domain and range Map, then you must supply a "
7993 "nonnull domain and range Map to this method.");
7994 }
7995 }
7996
7997 // What parameters do we pass to C's constructor? Do we call
7998 // fillComplete on C after filling it? And if so, what parameters
7999 // do we pass to C's fillComplete call?
8000 bool callFillComplete = true;
8001 RCP<ParameterList> constructorSublist;
8002 RCP<ParameterList> fillCompleteSublist;
8003 if (! params.is_null()) {
8004 callFillComplete =
8005 params->get("Call fillComplete", callFillComplete);
8006 constructorSublist = sublist(params, "Constructor parameters");
8007 fillCompleteSublist = sublist(params, "fillComplete parameters");
8008 }
8009
8010 RCP<const map_type> A_rowMap = A.getRowMap ();
8011 RCP<const map_type> B_rowMap = B.getRowMap ();
8012 RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
8013 RCP<crs_matrix_type> C; // The result matrix.
8014
8015 // If A and B's row Maps are the same, we can compute an upper
8016 // bound on the number of entries in each row of C, before
8017 // actually computing the sum. A reasonable upper bound is the
8018 // sum of the two entry counts in each row. If we choose this as
8019 // the actual per-row upper bound, we can use static profile.
8020 if (A_rowMap->isSameAs (*B_rowMap)) {
8021 const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8022 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
8023
8024 // Get the number of entries in each row of A.
8025 if (alpha != ZERO) {
8026 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8027 const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8028 C_maxNumEntriesPerRow[localRow] += A_numEntries;
8029 }
8030 }
8031 // Get the number of entries in each row of B.
8032 if (beta != ZERO) {
8033 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8034 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8035 C_maxNumEntriesPerRow[localRow] += B_numEntries;
8036 }
8037 }
8038 // Construct the result matrix C.
8039 if (constructorSublist.is_null ()) {
8040 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8041 StaticProfile));
8042 } else {
8043 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8044 StaticProfile, constructorSublist));
8045 }
8046 // Since A and B have the same row Maps, we could add them
8047 // together all at once and merge values before we call
8048 // insertGlobalValues. However, we don't really need to, since
8049 // we've already allocated enough space in each row of C for C
8050 // to do the merge itself.
8051 }
8052 else { // the row Maps of A and B are not the same
8053 // Construct the result matrix C.
8054 // true: !A_rowMap->isSameAs (*B_rowMap)
8055 TEUCHOS_TEST_FOR_EXCEPTION
8056 (true, std::invalid_argument, errPfx << "The row maps must "
8057 "be the same for statically allocated matrices, to ensure "
8058 "that there is sufficient space to do the addition.");
8059 }
8060
8061 TEUCHOS_TEST_FOR_EXCEPTION
8062 (C.is_null (), std::logic_error,
8063 errPfx << "C should not be null at this point. "
8064 "Please report this bug to the Tpetra developers.");
8065
8066 if (verbose) {
8067 std::ostringstream os;
8068 os << *prefix << "Compute C = alpha*A + beta*B" << endl;
8069 std::cerr << os.str ();
8070 }
8071 using gids_type = nonconst_global_inds_host_view_type;
8072 using vals_type = nonconst_values_host_view_type;
8073 gids_type ind;
8074 vals_type val;
8075
8076 if (alpha != ZERO) {
8077 const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8078 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
8079 size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8080 const GO globalRow = A_rowMap->getGlobalElement (localRow);
8081 if (A_numEntries > static_cast<size_t> (ind.size ())) {
8082 Kokkos::resize(ind,A_numEntries);
8083 Kokkos::resize(val,A_numEntries);
8084 }
8085 gids_type indView = Kokkos::subview(ind,std::make_pair((size_t)0, A_numEntries));
8086 vals_type valView = Kokkos::subview(val,std::make_pair((size_t)0, A_numEntries));
8087 A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
8088
8089 if (alpha != ONE) {
8090 for (size_t k = 0; k < A_numEntries; ++k) {
8091 valView[k] *= alpha;
8092 }
8093 }
8094 C->insertGlobalValues (globalRow, A_numEntries,
8095 reinterpret_cast<Scalar *>(valView.data()),
8096 indView.data());
8097 }
8098 }
8099
8100 if (beta != ZERO) {
8101 const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
8102 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
8103 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8104 const GO globalRow = B_rowMap->getGlobalElement (localRow);
8105 if (B_numEntries > static_cast<size_t> (ind.size ())) {
8106 Kokkos::resize(ind,B_numEntries);
8107 Kokkos::resize(val,B_numEntries);
8108 }
8109 gids_type indView = Kokkos::subview(ind,std::make_pair((size_t)0, B_numEntries));
8110 vals_type valView = Kokkos::subview(val,std::make_pair((size_t)0, B_numEntries));
8111 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
8112
8113 if (beta != ONE) {
8114 for (size_t k = 0; k < B_numEntries; ++k) {
8115 valView[k] *= beta;
8116 }
8117 }
8118 C->insertGlobalValues (globalRow, B_numEntries,
8119 reinterpret_cast<Scalar *>(valView.data()),
8120 indView.data());
8121 }
8122 }
8123
8124 if (callFillComplete) {
8125 if (verbose) {
8126 std::ostringstream os;
8127 os << *prefix << "Call fillComplete on C" << endl;
8128 std::cerr << os.str ();
8129 }
8130 if (fillCompleteSublist.is_null ()) {
8131 C->fillComplete (theDomainMap, theRangeMap);
8132 } else {
8133 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
8134 }
8135 }
8136 else if (verbose) {
8137 std::ostringstream os;
8138 os << *prefix << "Do NOT call fillComplete on C" << endl;
8139 std::cerr << os.str ();
8140 }
8141
8142 if (verbose) {
8143 std::ostringstream os;
8144 os << *prefix << "Done" << endl;
8145 std::cerr << os.str ();
8146 }
8147 return rcp_implicit_cast<row_matrix_type> (C);
8148 }
8149
8150
8151
8152 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8153 void
8156 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
8157 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
8158 const Teuchos::RCP<const map_type>& domainMap,
8159 const Teuchos::RCP<const map_type>& rangeMap,
8160 const Teuchos::RCP<Teuchos::ParameterList>& params) const
8161 {
8162 using Details::Behavior;
8167 using Teuchos::ArrayRCP;
8168 using Teuchos::ArrayView;
8169 using Teuchos::Comm;
8170 using Teuchos::ParameterList;
8171 using Teuchos::RCP;
8172 using std::endl;
8173 typedef LocalOrdinal LO;
8174 typedef GlobalOrdinal GO;
8175 typedef node_type NT;
8176 typedef CrsMatrix<Scalar, LO, GO, NT> this_type;
8177 typedef Vector<int, LO, GO, NT> IntVectorType;
8178 using Teuchos::as;
8179
8180 const bool debug = Behavior::debug("CrsMatrix");
8181 const bool verbose = Behavior::verbose("CrsMatrix");
8182 int MyPID = getComm ()->getRank ();
8183
8184 std::unique_ptr<std::string> verbosePrefix;
8185 if (verbose) {
8186 verbosePrefix =
8187 this->createPrefix("CrsMatrix", "transferAndFillComplete");
8188 std::ostringstream os;
8189 os << "Start" << endl;
8190 std::cerr << os.str();
8191 }
8192
8193 //
8194 // Get the caller's parameters
8195 //
8196 bool isMM = false; // optimize for matrix-matrix ops.
8197 bool reverseMode = false; // Are we in reverse mode?
8198 bool restrictComm = false; // Do we need to restrict the communicator?
8199
8200 int mm_optimization_core_count =
8201 Behavior::TAFC_OptimizationCoreCount();
8202 RCP<ParameterList> matrixparams; // parameters for the destination matrix
8203 bool overrideAllreduce = false;
8204 if (! params.is_null ()) {
8205 matrixparams = sublist (params, "CrsMatrix");
8206 reverseMode = params->get ("Reverse Mode", reverseMode);
8207 restrictComm = params->get ("Restrict Communicator", restrictComm);
8208 auto & slist = params->sublist("matrixmatrix: kernel params",false);
8209 isMM = slist.get("isMatrixMatrix_TransferAndFillComplete",false);
8210 mm_optimization_core_count = slist.get("MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
8211
8212 overrideAllreduce = slist.get("MM_TAFC_OverrideAllreduceCheck",false);
8213 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM = false;
8214 if(reverseMode) isMM = false;
8215 }
8216
8217 // Only used in the sparse matrix-matrix multiply (isMM) case.
8218 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
8219 int mismatch = 0;
8220 int reduced_mismatch = 0;
8221 if (isMM && !overrideAllreduce) {
8222
8223 // Test for pathological matrix transfer
8224 const bool source_vals = ! getGraph ()->getImporter ().is_null();
8225 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
8226 rowTransfer.getRemoteLIDs ().size() == 0);
8227 mismatch = (source_vals != target_vals) ? 1 : 0;
8228 iallreduceRequest =
8229 ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
8230 Teuchos::REDUCE_MAX, * (getComm ()));
8231 }
8232
8233#ifdef HAVE_TPETRA_MMM_TIMINGS
8234 using Teuchos::TimeMonitor;
8235 std::string label;
8236 if(!params.is_null())
8237 label = params->get("Timer Label",label);
8238 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
8239 std::string tlstr;
8240 {
8241 std::ostringstream os;
8242 if(isMM) os<<":MMOpt";
8243 else os<<":MMLegacy";
8244 tlstr = os.str();
8245 }
8246
8247 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string("TAFC All") +tlstr ));
8248#endif
8249
8250 // Make sure that the input argument rowTransfer is either an
8251 // Import or an Export. Import and Export are the only two
8252 // subclasses of Transfer that we defined, but users might
8253 // (unwisely, for now at least) decide to implement their own
8254 // subclasses. Exclude this possibility.
8255 const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
8256 const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
8257 TEUCHOS_TEST_FOR_EXCEPTION(
8258 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
8259 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
8260 "argument must be either an Import or an Export, and its template "
8261 "parameters must match the corresponding template parameters of the "
8262 "CrsMatrix.");
8263
8264 // Make sure that the input argument domainTransfer is either an
8265 // Import or an Export. Import and Export are the only two
8266 // subclasses of Transfer that we defined, but users might
8267 // (unwisely, for now at least) decide to implement their own
8268 // subclasses. Exclude this possibility.
8269 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
8270 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
8271
8272 if(! domainTransfer.is_null()) {
8273 TEUCHOS_TEST_FOR_EXCEPTION(
8274 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
8275 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
8276 "argument must be either an Import or an Export, and its template "
8277 "parameters must match the corresponding template parameters of the "
8278 "CrsMatrix.");
8279
8280 TEUCHOS_TEST_FOR_EXCEPTION(
8281 ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
8282 (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
8283 ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
8284 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8285 "arguments must be of the same type (either Import or Export).");
8286
8287 TEUCHOS_TEST_FOR_EXCEPTION(
8288 ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
8289 (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
8290 ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
8291 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8292 "arguments must be of the same type (either Import or Export).");
8293 } // domainTransfer != null
8294
8295
8296 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
8297 // if the source Map is not distributed but the target Map is?
8298 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
8299
8300 // Get the new domain and range Maps. We need some of them for
8301 // error checking, now that we have the reverseMode parameter.
8302 RCP<const map_type> MyRowMap = reverseMode ?
8303 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
8304 RCP<const map_type> MyColMap; // create this below
8305 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
8306 domainMap : getDomainMap ();
8307 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
8308 rangeMap : getRangeMap ();
8309 RCP<const map_type> BaseRowMap = MyRowMap;
8310 RCP<const map_type> BaseDomainMap = MyDomainMap;
8311
8312 // If the user gave us a nonnull destMat, then check whether it's
8313 // "pristine." That means that it has no entries.
8314 //
8315 // FIXME (mfh 15 May 2014) If this is not true on all processes,
8316 // then this exception test may hang. It would be better to
8317 // forward an error flag to the next communication phase.
8318 if (! destMat.is_null ()) {
8319 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
8320 // whether a graph or matrix has no entries on the calling
8321 // process, is that it is neither locally nor globally indexed.
8322 // This may change eventually with the Kokkos refactor version
8323 // of Tpetra, so it would be better just to check the quantity
8324 // of interest directly. Note that with the Kokkos refactor
8325 // version of Tpetra, asking for the total number of entries in
8326 // a graph or matrix that is not fill complete might require
8327 // computation (kernel launch), since it is not thread scalable
8328 // to update a count every time an entry is inserted.
8329 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
8330 ! destMat->getGraph ()->isGloballyIndexed ();
8331 TEUCHOS_TEST_FOR_EXCEPTION(
8332 ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
8333 "transferAndFillComplete: The input argument 'destMat' is only allowed "
8334 "to be nonnull, if its graph is empty (neither locally nor globally "
8335 "indexed).");
8336 // FIXME (mfh 15 May 2014) At some point, we want to change
8337 // graphs and matrices so that their DistObject Map
8338 // (this->getMap()) may differ from their row Map. This will
8339 // make redistribution for 2-D distributions more efficient. I
8340 // hesitate to change this check, because I'm not sure how much
8341 // the code here depends on getMap() and getRowMap() being the
8342 // same.
8343 TEUCHOS_TEST_FOR_EXCEPTION(
8344 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
8345 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
8346 "input argument 'destMat' is not the same as the (row) Map specified "
8347 "by the input argument 'rowTransfer'.");
8348 TEUCHOS_TEST_FOR_EXCEPTION(
8349 ! destMat->checkSizes (*this), std::invalid_argument,
8350 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
8351 "destination matrix, but checkSizes() indicates that it is not a legal "
8352 "legal target for redistribution from the source matrix (*this). This "
8353 "may mean that they do not have the same dimensions.");
8354 }
8355
8356 // If forward mode (the default), then *this's (row) Map must be
8357 // the same as the source Map of the Transfer. If reverse mode,
8358 // then *this's (row) Map must be the same as the target Map of
8359 // the Transfer.
8360 //
8361 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
8362 // and matrices so that their DistObject Map (this->getMap()) may
8363 // differ from their row Map. This will make redistribution for
8364 // 2-D distributions more efficient. I hesitate to change this
8365 // check, because I'm not sure how much the code here depends on
8366 // getMap() and getRowMap() being the same.
8367 TEUCHOS_TEST_FOR_EXCEPTION(
8368 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
8369 std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8370 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
8371 TEUCHOS_TEST_FOR_EXCEPTION(
8372 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
8373 std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8374 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
8375
8376 // checks for domainTransfer
8377 TEUCHOS_TEST_FOR_EXCEPTION(
8378 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
8379 std::invalid_argument,
8380 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
8381 "argument must be the same as the rebalanced domain map 'domainMap'");
8382
8383 TEUCHOS_TEST_FOR_EXCEPTION(
8384 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
8385 std::invalid_argument,
8386 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
8387 "argument must be the same as the rebalanced domain map 'domainMap'");
8388
8389 // The basic algorithm here is:
8390 //
8391 // 1. Call the moral equivalent of "Distor.do" to handle the import.
8392 // 2. Copy all the Imported and Copy/Permuted data into the raw
8393 // CrsMatrix / CrsGraphData pointers, still using GIDs.
8394 // 3. Call an optimized version of MakeColMap that avoids the
8395 // Directory lookups (since the importer knows who owns all the
8396 // GIDs) AND reindexes to LIDs.
8397 // 4. Call expertStaticFillComplete()
8398
8399 // Get information from the Importer
8400 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
8401 ArrayView<const LO> ExportLIDs = reverseMode ?
8402 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
8403 ArrayView<const LO> RemoteLIDs = reverseMode ?
8404 rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
8405 ArrayView<const LO> PermuteToLIDs = reverseMode ?
8406 rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
8407 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
8408 rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
8409 Distributor& Distor = rowTransfer.getDistributor ();
8410
8411 // Owning PIDs
8412 Teuchos::Array<int> SourcePids;
8413 Teuchos::Array<int> TargetPids;
8414
8415 // Temp variables for sub-communicators
8416 RCP<const map_type> ReducedRowMap, ReducedColMap,
8417 ReducedDomainMap, ReducedRangeMap;
8418 RCP<const Comm<int> > ReducedComm;
8419
8420 // If the user gave us a null destMat, then construct the new
8421 // destination matrix. We will replace its column Map later.
8422 if (destMat.is_null ()) {
8423 destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
8424 }
8425
8426 /***************************************************/
8427 /***** 1) First communicator restriction phase ****/
8428 /***************************************************/
8429 if (restrictComm) {
8430 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8431 ReducedComm = ReducedRowMap.is_null () ?
8432 Teuchos::null :
8433 ReducedRowMap->getComm ();
8434 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8435
8436 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8437 ReducedRowMap :
8438 MyDomainMap->replaceCommWithSubset (ReducedComm);
8439 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8440 ReducedRowMap :
8441 MyRangeMap->replaceCommWithSubset (ReducedComm);
8442
8443 // Reset the "my" maps
8444 MyRowMap = ReducedRowMap;
8445 MyDomainMap = ReducedDomainMap;
8446 MyRangeMap = ReducedRangeMap;
8447
8448 // Update my PID, if we've restricted the communicator
8449 if (! ReducedComm.is_null ()) {
8450 MyPID = ReducedComm->getRank ();
8451 }
8452 else {
8453 MyPID = -2; // For debugging
8454 }
8455 }
8456 else {
8457 ReducedComm = MyRowMap->getComm ();
8458 }
8459
8460
8461
8462 /***************************************************/
8463 /***** 2) From Tpera::DistObject::doTransfer() ****/
8464 /***************************************************/
8465 // Get the owning PIDs
8466 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8467
8468 // check whether domain maps of source matrix and base domain map is the same
8469 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8470
8471 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8472 // Same domain map as source matrix
8473 //
8474 // NOTE: This won't work for restrictComm (because the Import
8475 // doesn't know the restricted PIDs), though writing an
8476 // optimized version for that case would be easy (Import an
8477 // IntVector of the new PIDs). Might want to add this later.
8478 Import_Util::getPids (*MyImporter, SourcePids, false);
8479 }
8480 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8481 // Same domain map as source matrix (restricted communicator)
8482 // We need one import from the domain to the column map
8483 IntVectorType SourceDomain_pids(getDomainMap (),true);
8484 IntVectorType SourceCol_pids(getColMap());
8485 // SourceDomain_pids contains the restricted pids
8486 SourceDomain_pids.putScalar(MyPID);
8487
8488 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8489 SourcePids.resize (getColMap ()->getNodeNumElements ());
8490 SourceCol_pids.get1dCopy (SourcePids ());
8491 }
8492 else if (MyImporter.is_null ()) {
8493 // Matrix has no off-process entries
8494 SourcePids.resize (getColMap ()->getNodeNumElements ());
8495 SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
8496 }
8497 else if ( ! MyImporter.is_null () &&
8498 ! domainTransfer.is_null () ) {
8499 // general implementation for rectangular matrices with
8500 // domain map different than SourceMatrix domain map.
8501 // User has to provide a DomainTransfer object. We need
8502 // to communications (import/export)
8503
8504 // TargetDomain_pids lives on the rebalanced new domain map
8505 IntVectorType TargetDomain_pids (domainMap);
8506 TargetDomain_pids.putScalar (MyPID);
8507
8508 // SourceDomain_pids lives on the non-rebalanced old domain map
8509 IntVectorType SourceDomain_pids (getDomainMap ());
8510
8511 // SourceCol_pids lives on the non-rebalanced old column map
8512 IntVectorType SourceCol_pids (getColMap ());
8513
8514 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8515 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8516 }
8517 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8518 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8519 }
8520 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8521 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8522 }
8523 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8524 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8525 }
8526 else {
8527 TEUCHOS_TEST_FOR_EXCEPTION(
8528 true, std::logic_error, "Tpetra::CrsMatrix::"
8529 "transferAndFillComplete: Should never get here! "
8530 "Please report this bug to a Tpetra developer.");
8531 }
8532 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8533 SourcePids.resize (getColMap ()->getNodeNumElements ());
8534 SourceCol_pids.get1dCopy (SourcePids ());
8535 }
8536 else if ( ! MyImporter.is_null () &&
8537 BaseDomainMap->isSameAs (*BaseRowMap) &&
8538 getDomainMap ()->isSameAs (*getRowMap ())) {
8539 // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
8540
8541 IntVectorType TargetRow_pids (domainMap);
8542 IntVectorType SourceRow_pids (getRowMap ());
8543 IntVectorType SourceCol_pids (getColMap ());
8544
8545 TargetRow_pids.putScalar (MyPID);
8546 if (! reverseMode && xferAsImport != nullptr) {
8547 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
8548 }
8549 else if (reverseMode && xferAsExport != nullptr) {
8550 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
8551 }
8552 else if (! reverseMode && xferAsExport != nullptr) {
8553 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
8554 }
8555 else if (reverseMode && xferAsImport != nullptr) {
8556 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
8557 }
8558 else {
8559 TEUCHOS_TEST_FOR_EXCEPTION(
8560 true, std::logic_error, "Tpetra::CrsMatrix::"
8561 "transferAndFillComplete: Should never get here! "
8562 "Please report this bug to a Tpetra developer.");
8563 }
8564
8565 SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
8566 SourcePids.resize (getColMap ()->getNodeNumElements ());
8567 SourceCol_pids.get1dCopy (SourcePids ());
8568 }
8569 else {
8570 TEUCHOS_TEST_FOR_EXCEPTION(
8571 true, std::invalid_argument, "Tpetra::CrsMatrix::"
8572 "transferAndFillComplete: This method only allows either domainMap == "
8573 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8574 "getDomainMap () == getRowMap ()).");
8575 }
8576
8577 // Tpetra-specific stuff
8578 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8579 if (constantNumPackets == 0) {
8580 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8581 RemoteLIDs.size ());
8582 }
8583 else {
8584 // There are a constant number of packets per element. We
8585 // already know (from the number of "remote" (incoming)
8586 // elements) how many incoming elements we expect, so we can
8587 // resize the buffer accordingly.
8588 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8589 destMat->reallocImportsIfNeeded (rbufLen, false, nullptr);
8590 }
8591
8592 // Pack & Prepare w/ owning PIDs
8593 if (debug) {
8594 using Teuchos::outArg;
8595 using Teuchos::REDUCE_MAX;
8596 using Teuchos::reduceAll;
8597 using std::cerr;
8598 using std::endl;
8599 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8600 const int myRank = comm->getRank ();
8601
8602 std::ostringstream errStrm;
8603 int lclErr = 0;
8604 int gblErr = 0;
8605
8606 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8607 try {
8608 // packAndPrepare* methods modify numExportPacketsPerLID_.
8609 destMat->numExportPacketsPerLID_.modify_host ();
8610 numExportPacketsPerLID =
8611 getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8612 }
8613 catch (std::exception& e) {
8614 errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw: "
8615 << e.what () << std::endl;
8616 lclErr = 1;
8617 }
8618 catch (...) {
8619 errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw "
8620 "an exception not a subclass of std::exception" << std::endl;
8621 lclErr = 1;
8622 }
8623
8624 if (! comm.is_null ()) {
8625 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8626 }
8627 if (gblErr != 0) {
8628 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8629 TEUCHOS_TEST_FOR_EXCEPTION(
8630 true, std::runtime_error, "getArrayViewFromDualView threw an "
8631 "exception on at least one process.");
8632 }
8633
8634 if (verbose) {
8635 std::ostringstream os;
8636 os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
8637 << std::endl;
8638 std::cerr << os.str ();
8639 }
8640 try {
8642 destMat->exports_,
8643 numExportPacketsPerLID,
8644 ExportLIDs,
8645 SourcePids,
8646 constantNumPackets);
8647 }
8648 catch (std::exception& e) {
8649 errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw: "
8650 << e.what () << std::endl;
8651 lclErr = 1;
8652 }
8653 catch (...) {
8654 errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw "
8655 "an exception not a subclass of std::exception" << std::endl;
8656 lclErr = 1;
8657 }
8658
8659 if (verbose) {
8660 std::ostringstream os;
8661 os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
8662 << std::endl;
8663 std::cerr << os.str ();
8664 }
8665
8666 if (! comm.is_null ()) {
8667 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8668 }
8669 if (gblErr != 0) {
8670 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8671 TEUCHOS_TEST_FOR_EXCEPTION(
8672 true, std::runtime_error, "packCrsMatrixWithOwningPIDs threw an "
8673 "exception on at least one process.");
8674 }
8675 }
8676 else {
8677 // packAndPrepare* methods modify numExportPacketsPerLID_.
8678 destMat->numExportPacketsPerLID_.modify_host ();
8679 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8680 getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8681 if (verbose) {
8682 std::ostringstream os;
8683 os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
8684 << std::endl;
8685 std::cerr << os.str ();
8686 }
8688 destMat->exports_,
8689 numExportPacketsPerLID,
8690 ExportLIDs,
8691 SourcePids,
8692 constantNumPackets);
8693 if (verbose) {
8694 std::ostringstream os;
8695 os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
8696 << std::endl;
8697 std::cerr << os.str ();
8698 }
8699 }
8700
8701 // Do the exchange of remote data.
8702 if (! communication_needed) {
8703 if (verbose) {
8704 std::ostringstream os;
8705 os << *verbosePrefix << "Communication not needed" << std::endl;
8706 std::cerr << os.str ();
8707 }
8708 }
8709 else {
8710 if (reverseMode) {
8711 if (constantNumPackets == 0) { // variable number of packets per LID
8712 if (verbose) {
8713 std::ostringstream os;
8714 os << *verbosePrefix << "Reverse mode, variable # packets / LID"
8715 << std::endl;
8716 std::cerr << os.str ();
8717 }
8718 // Make sure that host has the latest version, since we're
8719 // using the version on host. If host has the latest
8720 // version, syncing to host does nothing.
8721 destMat->numExportPacketsPerLID_.sync_host ();
8722 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8723 getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8724 destMat->numImportPacketsPerLID_.sync_host ();
8725 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8726 getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8727
8728 if (verbose) {
8729 std::ostringstream os;
8730 os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
8731 << std::endl;
8732 std::cerr << os.str ();
8733 }
8734 Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
8735 numImportPacketsPerLID);
8736 if (verbose) {
8737 std::ostringstream os;
8738 os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
8739 << std::endl;
8740 std::cerr << os.str ();
8741 }
8742
8743 size_t totalImportPackets = 0;
8744 for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8745 totalImportPackets += numImportPacketsPerLID[i];
8746 }
8747
8748 // Reallocation MUST go before setting the modified flag,
8749 // because it may clear out the flags.
8750 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8751 verbosePrefix.get ());
8752 destMat->imports_.modify_host ();
8753 Teuchos::ArrayView<char> hostImports =
8754 getArrayViewFromDualView (destMat->imports_);
8755 // This is a legacy host pack/unpack path, so use the host
8756 // version of exports_.
8757 destMat->exports_.sync_host ();
8758 Teuchos::ArrayView<const char> hostExports =
8759 getArrayViewFromDualView (destMat->exports_);
8760 if (verbose) {
8761 std::ostringstream os;
8762 os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits"
8763 << std::endl;
8764 std::cerr << os.str ();
8765 }
8766 Distor.doReversePostsAndWaits (hostExports,
8767 numExportPacketsPerLID,
8768 hostImports,
8769 numImportPacketsPerLID);
8770 if (verbose) {
8771 std::ostringstream os;
8772 os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits"
8773 << std::endl;
8774 std::cerr << os.str ();
8775 }
8776 }
8777 else { // constant number of packets per LID
8778 if (verbose) {
8779 std::ostringstream os;
8780 os << *verbosePrefix << "Reverse mode, constant # packets / LID"
8781 << std::endl;
8782 std::cerr << os.str ();
8783 }
8784 destMat->imports_.modify_host ();
8785 Teuchos::ArrayView<char> hostImports =
8786 getArrayViewFromDualView (destMat->imports_);
8787 // This is a legacy host pack/unpack path, so use the host
8788 // version of exports_.
8789 destMat->exports_.sync_host ();
8790 Teuchos::ArrayView<const char> hostExports =
8791 getArrayViewFromDualView (destMat->exports_);
8792 if (verbose) {
8793 std::ostringstream os;
8794 os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
8795 << std::endl;
8796 std::cerr << os.str ();
8797 }
8798 Distor.doReversePostsAndWaits (hostExports,
8799 constantNumPackets,
8800 hostImports);
8801 if (verbose) {
8802 std::ostringstream os;
8803 os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
8804 << std::endl;
8805 std::cerr << os.str ();
8806 }
8807 }
8808 }
8809 else { // forward mode (the default)
8810 if (constantNumPackets == 0) { // variable number of packets per LID
8811 if (verbose) {
8812 std::ostringstream os;
8813 os << *verbosePrefix << "Forward mode, variable # packets / LID"
8814 << std::endl;
8815 std::cerr << os.str ();
8816 }
8817 // Make sure that host has the latest version, since we're
8818 // using the version on host. If host has the latest
8819 // version, syncing to host does nothing.
8820 destMat->numExportPacketsPerLID_.sync_host ();
8821 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8822 getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8823 destMat->numImportPacketsPerLID_.sync_host ();
8824 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8825 getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8826 if (verbose) {
8827 std::ostringstream os;
8828 os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
8829 << std::endl;
8830 std::cerr << os.str ();
8831 }
8832 Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
8833 numImportPacketsPerLID);
8834 if (verbose) {
8835 std::ostringstream os;
8836 os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
8837 << std::endl;
8838 std::cerr << os.str ();
8839 }
8840
8841 size_t totalImportPackets = 0;
8842 for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8843 totalImportPackets += numImportPacketsPerLID[i];
8844 }
8845
8846 // Reallocation MUST go before setting the modified flag,
8847 // because it may clear out the flags.
8848 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8849 verbosePrefix.get ());
8850 destMat->imports_.modify_host ();
8851 Teuchos::ArrayView<char> hostImports =
8852 getArrayViewFromDualView (destMat->imports_);
8853 // This is a legacy host pack/unpack path, so use the host
8854 // version of exports_.
8855 destMat->exports_.sync_host ();
8856 Teuchos::ArrayView<const char> hostExports =
8857 getArrayViewFromDualView (destMat->exports_);
8858 if (verbose) {
8859 std::ostringstream os;
8860 os << *verbosePrefix << "Calling 4-arg doPostsAndWaits"
8861 << std::endl;
8862 std::cerr << os.str ();
8863 }
8864 Distor.doPostsAndWaits (hostExports,
8865 numExportPacketsPerLID,
8866 hostImports,
8867 numImportPacketsPerLID);
8868 if (verbose) {
8869 std::ostringstream os;
8870 os << *verbosePrefix << "Finished 4-arg doPostsAndWaits"
8871 << std::endl;
8872 std::cerr << os.str ();
8873 }
8874 }
8875 else { // constant number of packets per LID
8876 if (verbose) {
8877 std::ostringstream os;
8878 os << *verbosePrefix << "Forward mode, constant # packets / LID"
8879 << std::endl;
8880 std::cerr << os.str ();
8881 }
8882 destMat->imports_.modify_host ();
8883 Teuchos::ArrayView<char> hostImports =
8884 getArrayViewFromDualView (destMat->imports_);
8885 // This is a legacy host pack/unpack path, so use the host
8886 // version of exports_.
8887 destMat->exports_.sync_host ();
8888 Teuchos::ArrayView<const char> hostExports =
8889 getArrayViewFromDualView (destMat->exports_);
8890 if (verbose) {
8891 std::ostringstream os;
8892 os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
8893 << std::endl;
8894 std::cerr << os.str ();
8895 }
8896 Distor.doPostsAndWaits (hostExports,
8897 constantNumPackets,
8898 hostImports);
8899 if (verbose) {
8900 std::ostringstream os;
8901 os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
8902 << std::endl;
8903 std::cerr << os.str ();
8904 }
8905 }
8906 }
8907 }
8908
8909 /*********************************************************************/
8910 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
8911 /*********************************************************************/
8912
8913 // Backwards compatibility measure. We'll use this again below.
8914 destMat->numImportPacketsPerLID_.sync_host ();
8915 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
8916 getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8917 destMat->imports_.sync_host ();
8918 Teuchos::ArrayView<const char> hostImports =
8919 getArrayViewFromDualView (destMat->imports_);
8920
8921 if (verbose) {
8922 std::ostringstream os;
8923 os << *verbosePrefix << "Calling unpackAndCombineWithOwningPIDsCount"
8924 << std::endl;
8925 std::cerr << os.str ();
8926 }
8927 size_t mynnz =
8929 RemoteLIDs,
8930 hostImports,
8931 numImportPacketsPerLID,
8932 constantNumPackets,
8933 INSERT,
8934 NumSameIDs,
8935 PermuteToLIDs,
8936 PermuteFromLIDs);
8937 if (verbose) {
8938 std::ostringstream os;
8939 os << *verbosePrefix << "unpackAndCombineWithOwningPIDsCount returned "
8940 << mynnz << std::endl;
8941 std::cerr << os.str ();
8942 }
8943 size_t N = BaseRowMap->getNodeNumElements ();
8944
8945 // Allocations
8946 ArrayRCP<size_t> CSR_rowptr(N+1);
8947 ArrayRCP<GO> CSR_colind_GID;
8948 ArrayRCP<LO> CSR_colind_LID;
8949 ArrayRCP<Scalar> CSR_vals;
8950 CSR_colind_GID.resize (mynnz);
8951 CSR_vals.resize (mynnz);
8952
8953 // If LO and GO are the same, we can reuse memory when
8954 // converting the column indices from global to local indices.
8955 if (typeid (LO) == typeid (GO)) {
8956 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8957 }
8958 else {
8959 CSR_colind_LID.resize (mynnz);
8960 }
8961
8962 if (verbose) {
8963 std::ostringstream os;
8964 os << *verbosePrefix << "Calling unpackAndCombineIntoCrsArrays"
8965 << std::endl;
8966 std::cerr << os.str ();
8967 }
8968 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
8969 // unpackAndCombine method on a "CrsArrays" object? This passing
8970 // in a huge list of arrays is icky. Can't we have a bit of an
8971 // abstraction? Implementing a concrete DistObject subclass only
8972 // takes five methods.
8974 RemoteLIDs,
8975 hostImports,
8976 numImportPacketsPerLID,
8977 constantNumPackets,
8978 INSERT,
8979 NumSameIDs,
8980 PermuteToLIDs,
8981 PermuteFromLIDs,
8982 N,
8983 mynnz,
8984 MyPID,
8985 CSR_rowptr (),
8986 CSR_colind_GID (),
8987 Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
8988 SourcePids (),
8989 TargetPids);
8990
8991 /**************************************************************/
8992 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
8993 /**************************************************************/
8994 // Call an optimized version of makeColMap that avoids the
8995 // Directory lookups (since the Import object knows who owns all
8996 // the GIDs).
8997 Teuchos::Array<int> RemotePids;
8998 if (verbose) {
8999 std::ostringstream os;
9000 os << *verbosePrefix << "Calling lowCommunicationMakeColMapAndReindex"
9001 << std::endl;
9002 std::cerr << os.str ();
9003 }
9004 Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
9005 CSR_colind_LID (),
9006 CSR_colind_GID (),
9007 BaseDomainMap,
9008 TargetPids,
9009 RemotePids,
9010 MyColMap);
9011
9012 if (verbose) {
9013 std::ostringstream os;
9014 os << *verbosePrefix << "restrictComm="
9015 << (restrictComm ? "true" : "false") << std::endl;
9016 std::cerr << os.str ();
9017 }
9018
9019 /*******************************************************/
9020 /**** 4) Second communicator restriction phase ****/
9021 /*******************************************************/
9022 if (restrictComm) {
9023 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
9024 ReducedRowMap :
9025 MyColMap->replaceCommWithSubset (ReducedComm);
9026 MyColMap = ReducedColMap; // Reset the "my" maps
9027 }
9028
9029 // Replace the col map
9030 if (verbose) {
9031 std::ostringstream os;
9032 os << *verbosePrefix << "Calling replaceColMap" << std::endl;
9033 std::cerr << os.str ();
9034 }
9035 destMat->replaceColMap (MyColMap);
9036
9037 // Short circuit if the processor is no longer in the communicator
9038 //
9039 // NOTE: Epetra replaces modifies all "removed" processes so they
9040 // have a dummy (serial) Map that doesn't touch the original
9041 // communicator. Duplicating that here might be a good idea.
9042 if (ReducedComm.is_null ()) {
9043 if (verbose) {
9044 std::ostringstream os;
9045 os << *verbosePrefix << "I am no longer in the communicator; "
9046 "returning" << std::endl;
9047 std::cerr << os.str ();
9048 }
9049 return;
9050 }
9051
9052 /***************************************************/
9053 /**** 5) Sort ****/
9054 /***************************************************/
9055 if ((! reverseMode && xferAsImport != nullptr) ||
9056 (reverseMode && xferAsExport != nullptr)) {
9057 if (verbose) {
9058 std::ostringstream os;
9059 os << *verbosePrefix << "Calling sortCrsEntries" << endl;
9060 std::cerr << os.str ();
9061 }
9062 Import_Util::sortCrsEntries (CSR_rowptr (),
9063 CSR_colind_LID (),
9064 CSR_vals ());
9065 }
9066 else if ((! reverseMode && xferAsExport != nullptr) ||
9067 (reverseMode && xferAsImport != nullptr)) {
9068 if (verbose) {
9069 std::ostringstream os;
9070 os << *verbosePrefix << "Calling sortAndMergeCrsEntries"
9071 << endl;
9072 std::cerr << os.str();
9073 }
9074 Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
9075 CSR_colind_LID (),
9076 CSR_vals ());
9077 if (CSR_rowptr[N] != mynnz) {
9078 CSR_colind_LID.resize (CSR_rowptr[N]);
9079 CSR_vals.resize (CSR_rowptr[N]);
9080 }
9081 }
9082 else {
9083 TEUCHOS_TEST_FOR_EXCEPTION(
9084 true, std::logic_error, "Tpetra::CrsMatrix::"
9085 "transferAndFillComplete: Should never get here! "
9086 "Please report this bug to a Tpetra developer.");
9087 }
9088 /***************************************************/
9089 /**** 6) Reset the colmap and the arrays ****/
9090 /***************************************************/
9091
9092 if (verbose) {
9093 std::ostringstream os;
9094 os << *verbosePrefix << "Calling destMat->setAllValues" << endl;
9095 std::cerr << os.str ();
9096 }
9097
9098 // Call constructor for the new matrix (restricted as needed)
9099 //
9100 // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
9101 // refactor version of CrsMatrix, though it reserves the right to
9102 // make a deep copy of the arrays.
9103 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
9104
9105 /***************************************************/
9106 /**** 7) Build Importer & Call ESFC ****/
9107 /***************************************************/
9108 // Pre-build the importer using the existing PIDs
9109 Teuchos::ParameterList esfc_params;
9110
9111 RCP<import_type> MyImport;
9112
9113 // Fulfull the non-blocking allreduce on reduced_mismatch.
9114 if (iallreduceRequest.get () != nullptr) {
9115 if (verbose) {
9116 std::ostringstream os;
9117 os << *verbosePrefix << "Calling iallreduceRequest->wait()"
9118 << endl;
9119 std::cerr << os.str ();
9120 }
9121 iallreduceRequest->wait ();
9122 if (reduced_mismatch != 0) {
9123 isMM = false;
9124 }
9125 }
9126
9127 if( isMM ) {
9128#ifdef HAVE_TPETRA_MMM_TIMINGS
9129 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string("isMM Block")));
9130#endif
9131 // Combine all type1/2/3 lists, [filter them], then call the expert import constructor.
9132
9133 if (verbose) {
9134 std::ostringstream os;
9135 os << *verbosePrefix << "Calling getAllValues" << endl;
9136 std::cerr << os.str ();
9137 }
9138
9139 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
9140 Teuchos::ArrayRCP<int> type3PIDs;
9141 Teuchos::ArrayRCP<const size_t> rowptr;
9142 Teuchos::ArrayRCP<const LO> colind;
9143 Teuchos::ArrayRCP<const Scalar> vals;
9144 {
9145#ifdef HAVE_TPETRA_MMM_TIMINGS
9146 TimeMonitor tm_getAllValues (*TimeMonitor::getNewTimer(prefix + std::string("isMMgetAllValues")));
9147#endif
9148 getAllValues(rowptr,colind,vals);
9149 }
9150
9151 if (verbose) {
9152 std::ostringstream os;
9153 os << *verbosePrefix << "Calling reverseNeighborDiscovery" << std::endl;
9154 std::cerr << os.str ();
9155 }
9156
9157 {
9158#ifdef HAVE_TPETRA_MMM_TIMINGS
9159 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string("isMMrevNeighDis")));
9160#endif
9161 Import_Util::reverseNeighborDiscovery(*this,
9162 rowptr,
9163 colind,
9164 rowTransfer,
9165 MyImporter,
9166 MyDomainMap,
9167 type3PIDs,
9168 type3LIDs,
9169 ReducedComm);
9170 }
9171
9172 if (verbose) {
9173 std::ostringstream os;
9174 os << *verbosePrefix << "Done with reverseNeighborDiscovery" << std::endl;
9175 std::cerr << os.str ();
9176 }
9177
9178 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
9179 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
9180
9181 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs(); // row matrix
9182 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
9183
9184 const int numCols = getGraph()->getColMap()->getNodeNumElements(); // may be dup
9185 // from EpetraExt_MMHelpers.cpp: build_type2_exports
9186 std::vector<bool> IsOwned(numCols,true);
9187 std::vector<int> SentTo(numCols,-1);
9188 if (! MyImporter.is_null ()) {
9189 for (auto && rlid : MyImporter->getRemoteLIDs()) { // the remoteLIDs must be from sourcematrix
9190 IsOwned[rlid]=false;
9191 }
9192 }
9193
9194 std::vector<std::pair<int,GO> > usrtg;
9195 usrtg.reserve(TEPID2.size());
9196
9197 {
9198 const auto& colMap = * (this->getColMap ()); // *this is sourcematrix
9199 for (Array_size_type i = 0; i < TEPID2.size (); ++i) {
9200 const LO row = TELID2[i];
9201 const int pid = TEPID2[i];
9202 for (auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
9203 const int col = colind[j];
9204 if (IsOwned[col] && SentTo[col] != pid) {
9205 SentTo[col] = pid;
9206 GO gid = colMap.getGlobalElement (col);
9207 usrtg.push_back (std::pair<int,GO> (pid, gid));
9208 }
9209 }
9210 }
9211 }
9212
9213// This sort can _not_ be omitted.[
9214 std::sort(usrtg.begin(),usrtg.end()); // default comparator does the right thing, now sorted in gid order
9215 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
9216 // 25 Jul 2018: Could just ignore the entries at and after eopg.
9217 usrtg.erase(eopg,usrtg.end());
9218
9219 const Array_size_type type2_us_size = usrtg.size();
9220 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(new int[type2_us_size],0,type2_us_size,true);
9221 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(new LO[type2_us_size],0,type2_us_size,true);
9222
9223 int pos=0;
9224 for(auto && p : usrtg) {
9225 EPID2[pos]= p.first;
9226 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9227 pos++;
9228 }
9229
9230 Teuchos::ArrayView<int> EPID3 = type3PIDs();
9231 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9232 GO InfGID = std::numeric_limits<GO>::max();
9233 int InfPID = INT_MAX;
9234#ifdef TPETRA_MIN3
9235# undef TPETRA_MIN3
9236#endif // TPETRA_MIN3
9237#define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9238 int i1=0, i2=0, i3=0;
9239 int Len1 = EPID1.size();
9240 int Len2 = EPID2.size();
9241 int Len3 = EPID3.size();
9242
9243 int MyLen=Len1+Len2+Len3;
9244 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(new LO[MyLen],0,MyLen,true);
9245 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(new int[MyLen],0,MyLen,true);
9246 int iloc = 0; // will be the size of the userExportLID/PIDs
9247
9248 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9249 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9250 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9251 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9252
9253 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9254 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9255 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9256
9257 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9258 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9259#ifdef TPETRA_MIN3
9260# undef TPETRA_MIN3
9261#endif // TPETRA_MIN3
9262 bool added_entry=false;
9263
9264 if(PID1 == MIN_PID && GID1 == MIN_GID){
9265 userExportLIDs[iloc]=ELID1[i1];
9266 userExportPIDs[iloc]=EPID1[i1];
9267 i1++;
9268 added_entry=true;
9269 iloc++;
9270 }
9271 if(PID2 == MIN_PID && GID2 == MIN_GID){
9272 if(!added_entry) {
9273 userExportLIDs[iloc]=ELID2[i2];
9274 userExportPIDs[iloc]=EPID2[i2];
9275 added_entry=true;
9276 iloc++;
9277 }
9278 i2++;
9279 }
9280 if(PID3 == MIN_PID && GID3 == MIN_GID){
9281 if(!added_entry) {
9282 userExportLIDs[iloc]=ELID3[i3];
9283 userExportPIDs[iloc]=EPID3[i3];
9284 iloc++;
9285 }
9286 i3++;
9287 }
9288 }
9289
9290 if (verbose) {
9291 std::ostringstream os;
9292 os << *verbosePrefix << "Create Import" << std::endl;
9293 std::cerr << os.str ();
9294 }
9295
9296#ifdef HAVE_TPETRA_MMM_TIMINGS
9297 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string("isMMIportCtor")));
9298#endif
9299 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(new Teuchos::ParameterList());
9300 // 25 Jul 2018: Test for equality with the non-isMM path's Import object.
9301 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9302 MyImport = rcp ( new import_type (MyDomainMap,
9303 MyColMap,
9304 RemotePids,
9305 userExportLIDs.view(0,iloc).getConst(),
9306 userExportPIDs.view(0,iloc).getConst(),
9307 plist)
9308 );
9309
9310 if (verbose) {
9311 std::ostringstream os;
9312 os << *verbosePrefix << "Call expertStaticFillComplete" << std::endl;
9313 std::cerr << os.str ();
9314 }
9315
9316 {
9317#ifdef HAVE_TPETRA_MMM_TIMINGS
9318 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string("isMM::destMat->eSFC")));
9319 esfc_params.set("Timer Label",label+std::string("isMM eSFC"));
9320#endif
9321 if(!params.is_null())
9322 esfc_params.set("compute global constants",params->get("compute global constants",true));
9323 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(new Teuchos::ParameterList(esfc_params)));
9324
9325 }
9326
9327 } // if(isMM)
9328 else {
9329#ifdef HAVE_TPETRA_MMM_TIMINGS
9330 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMblock")));
9331#endif
9332 if (verbose) {
9333 std::ostringstream os;
9334 os << *verbosePrefix << "Create Import" << std::endl;
9335 std::cerr << os.str ();
9336 }
9337
9338#ifdef HAVE_TPETRA_MMM_TIMINGS
9339 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMCreateImporter")));
9340#endif
9341 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(new Teuchos::ParameterList);
9342 mypars->set("Timer Label","notMMFrom_tAFC");
9343 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9344 MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9345
9346 if (verbose) {
9347 std::ostringstream os;
9348 os << *verbosePrefix << "Call expertStaticFillComplete" << endl;
9349 std::cerr << os.str ();
9350 }
9351
9352#ifdef HAVE_TPETRA_MMM_TIMINGS
9353 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string("notMMdestMat->expertStaticFillComplete")));
9354 esfc_params.set("Timer Label",prefix+std::string("notMM eSFC"));
9355#else
9356 esfc_params.set("Timer Label",std::string("notMM eSFC"));
9357#endif
9358
9359 if (!params.is_null ()) {
9360 esfc_params.set ("compute global constants",
9361 params->get ("compute global constants", true));
9362 }
9363 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9364 MyImport, Teuchos::null,
9365 rcp (new Teuchos::ParameterList (esfc_params)));
9366 }
9367
9368 if (verbose) {
9369 std::ostringstream os;
9370 os << *verbosePrefix << "Done" << endl;
9371 std::cerr << os.str ();
9372 }
9373 }
9374
9375
9376 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9377 void
9380 const import_type& importer,
9381 const Teuchos::RCP<const map_type>& domainMap,
9382 const Teuchos::RCP<const map_type>& rangeMap,
9383 const Teuchos::RCP<Teuchos::ParameterList>& params) const
9384 {
9385 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9386 }
9387
9388 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9389 void
9392 const import_type& rowImporter,
9393 const import_type& domainImporter,
9394 const Teuchos::RCP<const map_type>& domainMap,
9395 const Teuchos::RCP<const map_type>& rangeMap,
9396 const Teuchos::RCP<Teuchos::ParameterList>& params) const
9397 {
9398 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9399 }
9400
9401 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9402 void
9405 const export_type& exporter,
9406 const Teuchos::RCP<const map_type>& domainMap,
9407 const Teuchos::RCP<const map_type>& rangeMap,
9408 const Teuchos::RCP<Teuchos::ParameterList>& params) const
9409 {
9410 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9411 }
9412
9413 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9414 void
9417 const export_type& rowExporter,
9418 const export_type& domainExporter,
9419 const Teuchos::RCP<const map_type>& domainMap,
9420 const Teuchos::RCP<const map_type>& rangeMap,
9421 const Teuchos::RCP<Teuchos::ParameterList>& params) const
9422 {
9423 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9424 }
9425
9426
9427} // namespace Tpetra
9428
9429//
9430// Explicit instantiation macro
9431//
9432// Must be expanded from within the Tpetra namespace!
9433//
9434
9435#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9436 \
9437 template class CrsMatrix< SCALAR , LO , GO , NODE >; \
9438 template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
9439 CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
9440
9441#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9442 \
9443 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9444 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9445
9446#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9447 template<> \
9448 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9449 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9450 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9451 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9452 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9453 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9454 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9455 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9456 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9457 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9458 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9459 const Teuchos::RCP<Teuchos::ParameterList>& params);
9460
9461#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9462 template<> \
9463 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9464 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9465 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9466 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9467 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9468 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9469 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9470 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9471 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9472 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9473 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9474 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9475 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9476 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9477 const Teuchos::RCP<Teuchos::ParameterList>& params);
9478
9479
9480#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9481 template<> \
9482 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9483 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9484 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9485 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9486 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9487 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9488 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9489 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9490 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9491 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9492 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9493 const Teuchos::RCP<Teuchos::ParameterList>& params);
9494
9495#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9496 template<> \
9497 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9498 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9499 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9500 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9501 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9502 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9503 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9504 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9505 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9506 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9507 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9508 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9509 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9510 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9511 const Teuchos::RCP<Teuchos::ParameterList>& params);
9512
9513
9514#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9515 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9516 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9517 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9518 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9519 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9520
9521#endif // TPETRA_CRSMATRIX_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular,...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Declaration of Tpetra::Details::iallreduce.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
KOKKOS_FUNCTION int unpackRow(const Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, const Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of colum indices for all rows KDDKDD UVM Removal: Device view takes place of k_lclInds...
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row,...
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
size_t getNodeNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
void computeGlobalConstants()
Compute matrix properties that require collectives.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
bool isFillActive() const
Whether the matrix is not fill complete.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
bool fillComplete_
Whether the matrix is fill complete.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
std::string description() const override
A one-line description of this object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
local_matrix_device_type::values_type getLocalValuesView() const
Get the Kokkos local values.
Scalar scalar_type
The type of each entry in the matrix.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices,...
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
void clearGlobalConstants()
Clear matrix properties that require collectives.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
bool isFillComplete() const override
Whether the matrix is fill complete.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
size_t getNodeNumEntries() const override
The local number of entries in this matrix.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row,...
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local column indices.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
bool isLocallyComplete() const
Is this Export or Import locally complete?
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
One or more distributed dense vectors.
void reduce()
Sum values of a locally replicated multivector across all processes.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
size_t getLocalLength() const
Local number of rows on the calling process.
size_t getNumVectors() const
Number of columns in the multivector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y....
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X....
A read-only, row-oriented interface to a sparse matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps,...
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ADD
Sum new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.
Functor for the the ABSMAX CombineMode of Import and Export operations.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.