Ifpack2 Templated Preconditioning Package Version 1.0
Ifpack2_Details_CrsArrays.hpp
Go to the documentation of this file.
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
47
48#ifndef __IFPACK2_CRSARRAYS_DECL_HPP__
49#define __IFPACK2_CRSARRAYS_DECL_HPP__
50
51#include <Tpetra_RowMatrix.hpp>
52#include <Tpetra_CrsMatrix.hpp>
53#include <Kokkos_DefaultNode.hpp>
54#include <KokkosSparse_CrsMatrix.hpp>
55#include <Ifpack2_LocalFilter.hpp>
56#include <Ifpack2_ReorderFilter.hpp>
57
58namespace Ifpack2
59{
60namespace Details
61{
62
63//Utility for getting the local values, rowptrs and colinds (in Kokkos::Views) for any RowMatrix
64//Used by Fic, Filu and Fildl but may also be useful in other classes
65template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
66struct CrsArrayReader
67{
68 typedef typename Node::device_type device_type;
69 typedef typename device_type::execution_space execution_space;
70 typedef Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TRowMatrix;
71 typedef Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TCrsMatrix;
73 typedef Ifpack2::ReorderFilter<TRowMatrix> ReordFilter;
74 typedef KokkosSparse::CrsMatrix<Scalar, LocalOrdinal, execution_space> KCrsMatrix;
75 typedef Kokkos::View<LocalOrdinal*, execution_space> OrdinalArray;
76 typedef Kokkos::View<Scalar*, execution_space> ScalarArray;
77 typedef Kokkos::View<LocalOrdinal*, Kokkos::HostSpace> OrdinalArrayHost;
78 typedef Kokkos::View<Scalar*, Kokkos::HostSpace> ScalarArrayHost;
80 typedef Kokkos::Serial functor_space;
81 typedef Kokkos::RangePolicy<functor_space, int> RangePol;
82
85 {
86 CountEntriesFunctor(const TRowMatrix* A_, OrdinalArrayHost& rowptrs_) : A(A_), rowptrs(rowptrs_) {}
87 KOKKOS_INLINE_FUNCTION void operator()(const int row) const
88 {
89 rowptrs(row) = A->getNumEntriesInLocalRow(row);
90 }
91 const TRowMatrix* A;
92 OrdinalArrayHost& rowptrs;
93 };
94
97 {
98 GetIndicesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, OrdinalArrayHost& colinds_) :
99 A(A_), rowptrs(rowptrs_), colinds(colinds_) {}
100 KOKKOS_INLINE_FUNCTION void operator()(const int row) const
101 {
102 LocalOrdinal offset = rowptrs(row);
103 size_t entries = rowptrs(row + 1) - offset;
104 Teuchos::Array<Scalar> valsArray(entries);
105 Teuchos::Array<LocalOrdinal> indicesArray(entries);
106 auto valsView = valsArray();
107 auto indicesView = indicesArray();
108 A->getLocalRowCopy(row, indicesView, valsView, entries);
109 std::sort(indicesView.getRawPtr(), indicesView().getRawPtr() + entries);
110 for(LocalOrdinal i = 0; i < entries; i++)
111 {
112 colinds(offset + i) = indicesView[i];
113 }
114 }
115 const TRowMatrix* A;
116 const OrdinalArrayHost& rowptrs;
117 OrdinalArrayHost& colinds;
118 };
119
122 {
123 GetValuesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, ScalarArrayHost& vals_) :
124 A(A_), rowptrs(rowptrs_), vals(vals_) {}
125 KOKKOS_INLINE_FUNCTION void operator()(const int row) const
126 {
127 LocalOrdinal offset = rowptrs(row);
128 size_t entries = rowptrs(row + 1) - offset;
129 Teuchos::Array<Scalar> valsArray(entries);
130 Teuchos::Array<LocalOrdinal> indicesArray(entries);
131 auto valsView = valsArray();
132 auto indicesView = indicesArray();
133 A->getLocalRowCopy(row, indicesView, valsView, entries);
134 Tpetra::sort2(indicesView.getRawPtr(), indicesView.getRawPtr() + entries, valsView.getRawPtr());
135 for(LocalOrdinal i = 0; i < entries; i++)
136 {
137 vals(offset + i) = valsView[i];
138 }
139 }
140 const TRowMatrix* A;
141 const OrdinalArrayHost& rowptrs;
142 ScalarArrayHost& vals;
143 };
144
146 // \param[in] A The matrix
147 // \param[out] vals The values array on device (allocated inside function)
148 // \param[in] rowptrs The rowptrs host view provided by getStructure()
149 static void getValues(const TRowMatrix* A, ScalarArray& vals, OrdinalArrayHost& rowptrs)
150 {
151 auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
152 if(Acrs)
153 {
154 getValuesCrs(Acrs, vals);
155 return;
156 }
157 //Allocate host values and device values
158 LocalOrdinal nrows = A->getNodeNumRows();
159 ScalarArrayHost valsHost("Values (host)", rowptrs[nrows]);
160 vals = ScalarArray("Values", rowptrs[nrows]);
161 GetValuesFunctor funct(A, rowptrs, valsHost);
162 Kokkos::parallel_for(RangePol(0, nrows), funct);
163 Kokkos::deep_copy(vals, valsHost);
164 }
165
167 // \param[in] A The matrix
168 // \param[out] rowptrsHost The rowptrs array, in host space (allocated inside function)
169 // \param[out] rowptrs The rowptrs host array, in device space (allocated inside function). Will have exactly the same values as rowptrsHost.
170 // \param[out] colinds The colinds array, in device space (allocated inside function)
171 static void getStructure(const TRowMatrix* A, OrdinalArrayHost& rowptrsHost, OrdinalArray& rowptrs, OrdinalArray& colinds)
172 {
173 auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
174 if(Acrs)
175 {
176 getStructureCrs(Acrs, rowptrs, colinds);
177 return;
178 }
179 //Need to allocate new array, then copy in one row at a time
180 //note: actual rowptrs in the CrsMatrix implementation is size_t, but
181 //FastILU needs them as LocalOrdinal so this function provides an OrdinalArray
182 LocalOrdinal nrows = A->getNodeNumRows();
183 rowptrsHost = OrdinalArrayHost("RowPtrs (host)", nrows + 1);
184 rowptrs = OrdinalArray("RowPtrs", nrows + 1);
185 CountEntriesFunctor countFunct(A, rowptrsHost);
186 Kokkos::parallel_for(RangePol(0, nrows), countFunct);
187 //convert rowptrsHost to prefix sum
188 {
189 LocalOrdinal accum = 0;
190 for(LocalOrdinal i = 0; i < nrows; i++)
191 {
192 LocalOrdinal temp = rowptrsHost[i];
193 rowptrsHost[i] = accum;
194 accum += temp;
195 }
196 rowptrsHost[nrows] = accum;
197 }
198 //Get colinds (host)
199 OrdinalArrayHost colindsHost = OrdinalArrayHost("ColInds (host)", rowptrsHost[nrows]);
200 colinds = OrdinalArray("ColInds", rowptrsHost[nrows]);
201 GetIndicesFunctor indicesFunct(A, rowptrsHost, colindsHost);
202 Kokkos::parallel_for(RangePol(0, nrows), indicesFunct);
203 //copy rowptrs and colinds to device
204 Kokkos::deep_copy(rowptrs, rowptrsHost);
205 Kokkos::deep_copy(colinds, colindsHost);
206 }
207
208 private:
209
211 static void getValuesCrs(const TCrsMatrix* A, ScalarArray& vals)
212 {
213 vals = A->getLocalMatrix().values;
214 }
215
217 static void getStructureCrs(const TCrsMatrix* A, OrdinalArray& rowptrs, OrdinalArray& colinds)
218 {
219 //rowptrs really have data type size_t, but need them as LocalOrdinal, so must convert manually
220 auto rowmap = A->getLocalMatrix().graph.row_map;
221 //allocate rowptrs, it's a deep copy (colinds is a shallow copy so not necessary for it)
222 rowptrs = OrdinalArray("RowPtrs", A->getNodeNumRows() + 1);
223 for(size_t i = 0; i < rowmap.extent(0); i++)
224 {
225 rowptrs[i] = rowmap[i];
226 }
227 colinds = A->getLocalMatrix().graph.entries;
228 }
229};
230
231} //Details
232} //Ifpack2
233
234#endif
235
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:163
Wraps a Tpetra::RowMatrix in a filter that reorders local rows and columns.
Definition: Ifpack2_ReorderFilter_decl.hpp:70
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:73
Functor for counting matrix entries per row in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:85
Functor for getting column indices in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:97
Functor for getting matrix values in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:122