40#ifndef TPETRA_DETAILS_CRSUTILS_HPP
41#define TPETRA_DETAILS_CRSUTILS_HPP
45#include "TpetraCore_config.h"
46#include "Kokkos_Core.hpp"
48#include "Tpetra_Details_CrsPadding.hpp"
49#include "Tpetra_Details_WrappedDualView.hpp"
52#include <unordered_map>
65template<
class ViewType>
67make_uninitialized_view(
68 const std::string& name,
71 const std::string*
const prefix)
74 std::ostringstream os;
75 os << *prefix <<
"Allocate Kokkos::View " << name
76 <<
": " << size << std::endl;
77 std::cerr << os.str();
79 using Kokkos::view_alloc;
80 using Kokkos::WithoutInitializing;
81 return ViewType(view_alloc(name, WithoutInitializing), size);
84template<
class ViewType>
87 const std::string& name,
90 const std::string*
const prefix)
93 std::ostringstream os;
94 os << *prefix <<
"Allocate & initialize Kokkos::View "
95 << name <<
": " << size << std::endl;
96 std::cerr << os.str();
98 return ViewType(name, size);
101template<
class OutViewType,
class InViewType>
103assign_to_view(OutViewType& out,
104 const InViewType& in,
105 const char viewName[],
107 const std::string*
const prefix)
110 std::ostringstream os;
111 os << *prefix <<
"Assign to Kokkos::View " << viewName
112 <<
": Old size: " << out.extent(0)
113 <<
", New size: " << in.extent(0) << std::endl;
114 std::cerr << os.str();
119template<
class MemorySpace,
class ViewType>
120auto create_mirror_view(
121 const MemorySpace& memSpace,
122 const ViewType& view,
124 const std::string*
const prefix) ->
125 decltype(Kokkos::create_mirror_view(memSpace, view))
128 std::ostringstream os;
129 os << *prefix <<
"Create mirror view: "
130 <<
"view.extent(0): " << view.extent(0) << std::endl;
131 std::cerr << os.str();
133 return Kokkos::create_mirror_view(memSpace, view);
136enum class PadCrsAction {
149template<
class RowPtr,
class Indices,
class Values,
class Padding>
152 const PadCrsAction action,
153 const RowPtr& row_ptr_beg,
154 const RowPtr& row_ptr_end,
155 Indices& indices_wdv,
157 const Padding& padding,
161 using Kokkos::view_alloc;
162 using Kokkos::WithoutInitializing;
164 std::unique_ptr<std::string> prefix;
166 const size_t maxNumToPrint = verbose ?
169 std::ostringstream os;
170 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
171 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
172 os <<
"Start" << endl;
173 std::cerr << os.str();
175 Kokkos::HostSpace hostSpace;
178 std::ostringstream os;
179 os << *prefix <<
"On input: ";
181 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
187 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
191 os <<
", indices.extent(0): " << indices_wdv.extent(0)
192 <<
", values.extent(0): " << values_wdv.extent(0)
196 std::cerr << os.str();
199 if (row_ptr_beg.size() == 0) {
201 std::ostringstream os;
202 os << *prefix <<
"Done; local matrix has no rows" << endl;
203 std::cerr << os.str();
208 const size_t lclNumRows(row_ptr_beg.size() - 1);
209 RowPtr newAllocPerRow =
210 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
211 verbose, prefix.get());
213 std::ostringstream os;
214 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
215 std::cerr << os.str();
220 auto row_ptr_end_h = create_mirror_view(
221 hostSpace, row_ptr_end, verbose, prefix.get());
223 auto row_ptr_beg_h = create_mirror_view(
224 hostSpace, row_ptr_beg, verbose, prefix.get());
227 auto newAllocPerRow_h = create_mirror_view(
228 hostSpace, newAllocPerRow, verbose, prefix.get());
229 using host_range_type = Kokkos::RangePolicy<
230 Kokkos::DefaultHostExecutionSpace,
size_t>;
231 Kokkos::parallel_reduce
232 (
"Tpetra::CrsGraph: Compute new allocation size per row",
233 host_range_type(0, lclNumRows),
234 [&] (
const size_t lclRowInd,
size_t& lclIncrease) {
235 const size_t start = row_ptr_beg_h[lclRowInd];
236 const size_t end = row_ptr_beg_h[lclRowInd+1];
237 TEUCHOS_ASSERT( end >= start );
238 const size_t oldAllocSize = end - start;
239 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
240 TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
247 auto result = padding.get_result(lclRowInd);
248 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
249 if (newNumEnt > oldAllocSize) {
250 lclIncrease += (newNumEnt - oldAllocSize);
251 newAllocPerRow_h[lclRowInd] = newNumEnt;
254 newAllocPerRow_h[lclRowInd] = oldAllocSize;
259 std::ostringstream os;
260 os << *prefix <<
"increase: " << increase <<
", ";
264 std::cerr << os.str();
273 using inds_value_type =
274 typename Indices::DeviceViewType::non_const_value_type;
275 using vals_value_type =
typename Values::DeviceViewType::non_const_value_type;
277 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
278 const size_t newIndsSize = size_t(indices_old.size()) + increase;
279 auto indices_new = make_uninitialized_view<typename Indices::DeviceViewType>(
280 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
283 typename Values::DeviceViewType values_new;
284 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
285 if (action == PadCrsAction::INDICES_AND_VALUES) {
286 const size_t newValsSize = newIndsSize;
289 values_new = make_initialized_view<typename Values::DeviceViewType>(
290 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
294 std::ostringstream os;
295 os << *prefix <<
"Repack" << endl;
296 std::cerr << os.str();
298 using execution_space =
typename Indices::DeviceViewType::execution_space;
299 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
300 Kokkos::parallel_scan(
301 "Tpetra::CrsGraph or CrsMatrix repack",
302 range_type(
size_t(0),
size_t(lclNumRows+1)),
303 KOKKOS_LAMBDA (
const size_t lclRow,
size_t& newRowBeg,
304 const bool finalPass)
309 const size_t row_beg = row_ptr_beg[lclRow];
310 const size_t row_end =
311 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
312 const size_t numEnt = row_end - row_beg;
313 const size_t newRowAllocSize =
314 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
316 if (lclRow < lclNumRows) {
317 const Kokkos::pair<size_t, size_t> oldRange(
318 row_beg, row_beg + numEnt);
319 const Kokkos::pair<size_t, size_t> newRange(
320 newRowBeg, newRowBeg + numEnt);
321 auto oldColInds = Kokkos::subview(indices_old, oldRange);
322 auto newColInds = Kokkos::subview(indices_new, newRange);
325 memcpy(newColInds.data(), oldColInds.data(),
326 numEnt *
sizeof(inds_value_type));
327 if (action == PadCrsAction::INDICES_AND_VALUES) {
329 Kokkos::subview(values_old, oldRange);
330 auto newVals = Kokkos::subview(values_new, newRange);
331 memcpy(newVals.data(), oldVals.data(),
332 numEnt *
sizeof(vals_value_type));
336 row_ptr_beg[lclRow] = newRowBeg;
337 if (lclRow < lclNumRows) {
338 row_ptr_end[lclRow] = newRowBeg + numEnt;
341 newRowBeg += newRowAllocSize;
346 std::ostringstream os;
350 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
358 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
364 std::cout << os.str();
367 indices_wdv = Indices(indices_new);
368 values_wdv = Values(values_new);
371 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
372 auto values_h = values_wdv.getHostView(Access::ReadOnly);
373 std::ostringstream os;
384 std::ostringstream os;
385 os << *prefix <<
"Done" << endl;
386 std::cerr << os.str();
391template <
class Po
inters,
class InOutIndices,
class InIndices,
class IndexMap>
394 typename Pointers::value_type
const row,
395 Pointers
const& row_ptrs,
396 InOutIndices& cur_indices,
397 size_t& num_assigned,
398 InIndices
const& new_indices,
400 std::function<
void(
size_t const,
size_t const,
size_t const)> cb)
402 if (new_indices.size() == 0) {
406 if (cur_indices.size() == 0) {
408 return Teuchos::OrdinalTraits<size_t>::invalid();
411 using offset_type =
typename std::decay<
decltype (row_ptrs[0])>::type;
412 using ordinal_type =
typename std::decay<
decltype (cur_indices[0])>::type;
414 const offset_type start = row_ptrs[row];
415 offset_type end = start +
static_cast<offset_type
> (num_assigned);
416 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t (0) :
417 row_ptrs[row + 1] - end;
418 const size_t num_new_indices =
static_cast<size_t> (new_indices.size ());
419 size_t num_inserted = 0;
421 size_t numIndicesLookup = num_assigned + num_new_indices;
424 const size_t useLookUpTableThreshold = 400;
426 if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
429 for (
size_t k = 0; k < num_new_indices; ++k) {
430 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
431 offset_type row_offset = start;
432 for (; row_offset < end; ++row_offset) {
433 if (idx == cur_indices[row_offset]) {
438 if (row_offset == end) {
439 if (num_inserted >= num_avail) {
440 return Teuchos::OrdinalTraits<size_t>::invalid();
443 cur_indices[end++] = idx;
447 cb(k, start, row_offset - start);
453 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
456 for (
size_t k = 0; k < num_assigned; k++) {
457 idxLookup[cur_indices[start+k]] = start+k;
461 for (
size_t k = 0; k < num_new_indices; k++) {
462 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
463 offset_type row_offset;
465 auto it = idxLookup.find(idx);
466 if (it == idxLookup.end()) {
467 if (num_inserted >= num_avail) {
468 return Teuchos::OrdinalTraits<size_t>::invalid();
472 cur_indices[end++] = idx;
473 idxLookup[idx] = row_offset;
478 row_offset = it->second;
481 cb(k, start, row_offset - start);
485 num_assigned += num_inserted;
490template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
493 typename Pointers::value_type
const row,
494 Pointers
const& row_ptrs,
495 const size_t curNumEntries,
496 Indices1
const& cur_indices,
497 Indices2
const& new_indices,
501 if (new_indices.size() == 0)
505 typename std::remove_const<typename Indices1::value_type>::type;
506 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
508 const size_t start =
static_cast<size_t> (row_ptrs[row]);
509 const size_t end = start + curNumEntries;
510 size_t num_found = 0;
511 for (
size_t k = 0; k < new_indices.size(); k++)
513 auto row_offset = start;
514 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
515 if (idx == invalid_ordinal)
517 for (; row_offset < end; row_offset++)
519 if (idx == cur_indices[row_offset])
521 std::forward<Callback>(cb)(k, start, row_offset - start);
549template<
class RowPtr,
class Indices,
class Padding>
552 const RowPtr& rowPtrBeg,
553 const RowPtr& rowPtrEnd,
554 Indices& indices_wdv,
555 const Padding& padding,
559 using impl::pad_crs_arrays;
562 pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
563 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
564 indices_wdv, values_null, padding, my_rank, verbose);
567template<
class RowPtr,
class Indices,
class Values,
class Padding>
570 const RowPtr& rowPtrBeg,
571 const RowPtr& rowPtrEnd,
572 Indices& indices_wdv,
574 const Padding& padding,
578 using impl::pad_crs_arrays;
579 pad_crs_arrays<RowPtr, Indices, Values, Padding>(
580 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
581 indices_wdv, values_wdv, padding, my_rank, verbose);
629template <
class Po
inters,
class InOutIndices,
class InIndices>
632 typename Pointers::value_type
const row,
633 Pointers
const& rowPtrs,
634 InOutIndices& curIndices,
636 InIndices
const& newIndices,
637 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
638 std::function<
void(
const size_t,
const size_t,
const size_t)>())
640 static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
641 typename std::remove_const<typename InIndices::value_type>::type>::value,
642 "Expected views to have same value type");
645 using ordinal =
typename InOutIndices::value_type;
646 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
647 numAssigned, newIndices, [](ordinal
const idx) {
return idx; }, cb);
651template <
class Po
inters,
class InOutIndices,
class InIndices>
654 typename Pointers::value_type
const row,
655 Pointers
const& rowPtrs,
656 InOutIndices& curIndices,
658 InIndices
const& newIndices,
659 std::function<
typename InOutIndices::value_type(
const typename InIndices::value_type)> map,
660 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
661 std::function<
void(
const size_t,
const size_t,
const size_t)>())
663 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
664 numAssigned, newIndices, map, cb);
698template <
class Po
inters,
class Indices1,
class Indices2,
class Callback>
701 typename Pointers::value_type
const row,
702 Pointers
const& rowPtrs,
703 const size_t curNumEntries,
704 Indices1
const& curIndices,
705 Indices2
const& newIndices,
708 static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
709 typename std::remove_const<typename Indices2::value_type>::type>::value,
710 "Expected views to have same value type");
712 using ordinal =
typename Indices2::value_type;
713 auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
714 [=](ordinal ind){
return ind; }, cb);
718template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
721 typename Pointers::value_type
const row,
722 Pointers
const& rowPtrs,
723 const size_t curNumEntries,
724 Indices1
const& curIndices,
725 Indices2
const& newIndices,
729 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void pad_crs_arrays(const PadCrsAction action, const RowPtr &row_ptr_beg, const RowPtr &row_ptr_end, Indices &indices_wdv, Values &values_wdv, const Padding &padding, const int my_rank, const bool verbose)
Implementation of padCrsArrays.
size_t find_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, const size_t curNumEntries, Indices1 const &cur_indices, Indices2 const &new_indices, IndexMap &&map, Callback &&cb)
Implementation of findCrsIndices.
size_t insert_crs_indices(typename Pointers::value_type const row, Pointers const &row_ptrs, InOutIndices &cur_indices, size_t &num_assigned, InIndices const &new_indices, IndexMap &&map, std::function< void(size_t const, size_t const, size_t const)> cb)
Implementation of insertCrsIndices.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.