Tpetra parallel linear algebra Version of the Day
Tpetra_BlockCrsMatrix_Helpers_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
41#define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
42
44
45#include "Tpetra_BlockCrsMatrix.hpp"
46#include "Tpetra_CrsMatrix.hpp"
47#include "Tpetra_HashTable.hpp"
48#include "Tpetra_Import.hpp"
49#include "Tpetra_Map.hpp"
50#include "Tpetra_MultiVector.hpp"
51#include "Teuchos_ParameterList.hpp"
52#include "Teuchos_ScalarTraits.hpp"
53#include <ctime>
54#include <fstream>
55
56namespace Tpetra {
57
58 template<class Scalar, class LO, class GO, class Node>
59 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName) {
60 Teuchos::ParameterList pl;
61 std::ofstream out;
62 out.open(fileName.c_str());
63 blockCrsMatrixWriter(A, out, pl);
64 }
65
66 template<class Scalar, class LO, class GO, class Node>
67 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName, Teuchos::ParameterList const &params) {
68 std::ofstream out;
69 out.open(fileName.c_str());
70 blockCrsMatrixWriter(A, out, params);
71 }
72
73 template<class Scalar, class LO, class GO, class Node>
75 Teuchos::ParameterList pl;
76 blockCrsMatrixWriter(A, os, pl);
77 }
78
79 template<class Scalar, class LO, class GO, class Node>
80 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
81
82 using Teuchos::RCP;
83 using Teuchos::rcp;
84
85 typedef Teuchos::OrdinalTraits<GO> TOT;
86 typedef BlockCrsMatrix<Scalar, LO, GO, Node> block_crs_matrix_type;
87 typedef Tpetra::Import<LO, GO, Node> import_type;
88 typedef Tpetra::Map<LO, GO, Node> map_type;
90 typedef Tpetra::CrsGraph<LO, GO, Node> crs_graph_type;
91
92 RCP<const map_type> const rowMap = A.getRowMap(); //"mesh" map
93 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
94 const int myRank = comm->getRank();
95 const size_t numProcs = comm->getSize();
96
97 // If true, force use of the import strip-mining infrastructure. This is useful for debugging on one process.
98 bool alwaysUseParallelAlgorithm = false;
99 if (params.isParameter("always use parallel algorithm"))
100 alwaysUseParallelAlgorithm = params.get<bool>("always use parallel algorithm");
101 bool printMatrixMarketHeader = true;
102 if (params.isParameter("print MatrixMarket header"))
103 printMatrixMarketHeader = params.get<bool>("print MatrixMarket header");
104
105 if (printMatrixMarketHeader && myRank==0) {
106 std::time_t now = std::time(NULL);
107
108 const std::string dataTypeStr =
109 Teuchos::ScalarTraits<Scalar>::isComplex ? "complex" : "real";
110
111 // Explanation of first line of file:
112 // - "%%MatrixMarket" is the tag for Matrix Market format.
113 // - "matrix" is what we're printing.
114 // - "coordinate" means sparse (triplet format), rather than dense.
115 // - "real" / "complex" is the type (in an output format sense,
116 // not in a C++ sense) of each value of the matrix. (The
117 // other two possibilities are "integer" (not really necessary
118 // here) and "pattern" (no values, just graph).)
119 os << "%%MatrixMarket matrix coordinate " << dataTypeStr << " general" << std::endl;
120 os << "% time stamp: " << ctime(&now);
121 os << "% written from " << numProcs << " processes" << std::endl;
122 os << "% point representation of Tpetra::BlockCrsMatrix" << std::endl;
123 size_t numRows = A.getGlobalNumRows();
124 size_t numCols = A.getGlobalNumCols();
125 os << "% " << numRows << " block rows, " << numCols << " block columns" << std::endl;
126 const LO blockSize = A.getBlockSize();
127 os << "% block size " << blockSize << std::endl;
128 os << numRows*blockSize << " " << numCols*blockSize << " " << A.getGlobalNumEntries()*blockSize*blockSize << std::endl;
129 }
130
131 if (numProcs==1 && !alwaysUseParallelAlgorithm) {
132 writeMatrixStrip(A,os,params);
133 } else {
134 size_t numRows = rowMap->getNodeNumElements();
135
136 //Create source map
137 RCP<const map_type> allMeshGidsMap = rcp(new map_type(TOT::invalid(), numRows, A.getIndexBase(), comm));
138 //Create and populate vector of mesh GIDs corresponding to this pid's rows.
139 //This vector will be imported one pid's worth of information at a time to pid 0.
140 mv_type allMeshGids(allMeshGidsMap,1);
141 Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
142
143 for (size_t i=0; i<numRows; i++)
144 allMeshGidsData[i] = rowMap->getGlobalElement(i);
145 allMeshGidsData = Teuchos::null;
146
147 // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A.
148 size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
149 size_t remainder = allMeshGids.getGlobalLength() % numProcs;
150 size_t curStart = 0;
151 size_t curStripSize = 0;
152 Teuchos::Array<GO> importMeshGidList;
153 for (size_t i=0; i<numProcs; i++) {
154 if (myRank==0) { // Only PE 0 does this part
155 curStripSize = stripSize;
156 if (i<remainder) curStripSize++; // handle leftovers
157 importMeshGidList.resize(curStripSize); // Set size of vector to max needed
158 for (size_t j=0; j<curStripSize; j++) importMeshGidList[j] = j + curStart + A.getIndexBase();
159 curStart += curStripSize;
160 }
161 // The following import map should be non-trivial only on PE 0.
162 TEUCHOS_TEST_FOR_EXCEPTION(myRank>0 && curStripSize!=0,
163 std::runtime_error, "Tpetra::blockCrsMatrixWriter: (pid "
164 << myRank << ") map size should be zero, but is " << curStripSize);
165 RCP<map_type> importMeshGidMap = rcp(new map_type(TOT::invalid(), importMeshGidList(), A.getIndexBase(), comm));
166 import_type gidImporter(allMeshGidsMap, importMeshGidMap);
167 mv_type importMeshGids(importMeshGidMap, 1);
168 importMeshGids.doImport(allMeshGids, gidImporter, INSERT);
169
170 // importMeshGids now has a list of GIDs for the current strip of matrix rows.
171 // Use these values to build another importer that will get rows of the matrix.
172
173 // The following import map will be non-trivial only on PE 0.
174 Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
175 Teuchos::Array<GO> importMeshGidsGO;
176 importMeshGidsGO.reserve(importMeshGidsData.size());
177 for (typename Teuchos::ArrayRCP<const GO>::size_type j=0; j<importMeshGidsData.size(); ++j)
178 importMeshGidsGO.push_back(importMeshGidsData[j]);
179 RCP<const map_type> importMap = rcp(new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm) );
180
181 import_type importer(rowMap,importMap );
182 size_t numEntriesPerRow = A.getCrsGraph().getGlobalMaxNumRowEntries();
183 RCP<crs_graph_type> graph = createCrsGraph(importMap,numEntriesPerRow);
184 RCP<const map_type> domainMap = A.getCrsGraph().getDomainMap();
185 graph->doImport(A.getCrsGraph(), importer, INSERT);
186 graph->fillComplete(domainMap, importMap);
187
188 block_crs_matrix_type importA(*graph, A.getBlockSize());
189 importA.doImport(A, importer, INSERT);
190
191 // Finally we are ready to write this strip of the matrix
192 writeMatrixStrip(importA, os, params);
193 }
194 }
195 }
196
197 template<class Scalar, class LO, class GO, class Node>
198 void writeMatrixStrip(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
199 using Teuchos::RCP;
200 using map_type = Tpetra::Map<LO, GO, Node>;
201 using bcrs_type = BlockCrsMatrix<Scalar,LO,GO,Node>;
202 using bcrs_local_inds_host_view_type = typename bcrs_type::local_inds_host_view_type;
203 using bcrs_values_host_view_type = typename bcrs_type::values_host_view_type;
204 using impl_scalar_type = typename bcrs_type::impl_scalar_type;
205
206 size_t numRows = A.getGlobalNumRows();
207 RCP<const map_type> rowMap = A.getRowMap();
208 RCP<const map_type> colMap = A.getColMap();
209 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
210 const int myRank = comm->getRank();
211
212 const size_t meshRowOffset = rowMap->getIndexBase();
213 const size_t meshColOffset = colMap->getIndexBase();
214 TEUCHOS_TEST_FOR_EXCEPTION(meshRowOffset != meshColOffset,
215 std::runtime_error, "Tpetra::writeMatrixStrip: "
216 "mesh row index base != mesh column index base");
217
218 if (myRank !=0) {
219
220 TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumRows() != 0,
221 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
222 << myRank << " should have 0 rows but has " << A.getNodeNumRows());
223 TEUCHOS_TEST_FOR_EXCEPTION(A.getNodeNumCols() != 0,
224 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
225 << myRank << " should have 0 columns but has " << A.getNodeNumCols());
226
227 } else {
228
229 TEUCHOS_TEST_FOR_EXCEPTION(numRows != A.getNodeNumRows(),
230 std::runtime_error, "Tpetra::writeMatrixStrip: "
231 "number of rows on pid 0 does not match global number of rows");
232
233
234 int err = 0;
235 const LO blockSize = A.getBlockSize();
236 const size_t numLocalRows = A.getNodeNumRows();
237 bool precisionChanged=false;
238 int oldPrecision = 0; // avoid "unused variable" warning
239 if (params.isParameter("precision")) {
240 oldPrecision = os.precision(params.get<int>("precision"));
241 precisionChanged=true;
242 }
243 int pointOffset = 1;
244 if (params.isParameter("zero-based indexing")) {
245 if (params.get<bool>("zero-based indexing") == true)
246 pointOffset = 0;
247 }
248
249 size_t localRowInd;
250 for (localRowInd = 0; localRowInd < numLocalRows; ++localRowInd) {
251
252 // Get a view of the current row.
253 bcrs_local_inds_host_view_type localColInds;
254 bcrs_values_host_view_type vals;
255 LO numEntries;
256 A.getLocalRowView (localRowInd, localColInds, vals); numEntries = localColInds.extent(0);
257 GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
258
259 for (LO k = 0; k < numEntries; ++k) {
260 GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
261 const impl_scalar_type* curBlock = vals.data() + blockSize * blockSize * k;
262 // Blocks are stored in row-major format.
263 for (LO j = 0; j < blockSize; ++j) {
264 GO globalPointRowID = globalMeshRowID * blockSize + j + pointOffset;
265 for (LO i = 0; i < blockSize; ++i) {
266 GO globalPointColID = globalMeshColID * blockSize + i + pointOffset;
267 const impl_scalar_type curVal = curBlock[i + j * blockSize];
268
269 os << globalPointRowID << " " << globalPointColID << " ";
270 if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
271 // Matrix Market format wants complex values to be
272 // written as space-delimited pairs. See Bug 6469.
273 os << Teuchos::ScalarTraits<impl_scalar_type>::real (curVal) << " "
274 << Teuchos::ScalarTraits<impl_scalar_type>::imag (curVal);
275 }
276 else {
277 os << curVal;
278 }
279 os << std::endl;
280 }
281 }
282 }
283 }
284 if (precisionChanged)
285 os.precision(oldPrecision);
286 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
287 std::runtime_error, "Tpetra::writeMatrixStrip: "
288 "error getting view of local row " << localRowInd);
289
290 }
291
292 }
293
294 template<class Scalar, class LO, class GO, class Node>
295 Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
297 {
298
299 /*
300 ASSUMPTIONS:
301
302 1) In point matrix, all entries associated with a little block are present (even if they are zero).
303 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
304 3) Point column map and block column map are ordered consistently.
305 */
306
307 using Teuchos::Array;
308 using Teuchos::ArrayView;
309 using Teuchos::RCP;
310
311 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
312 typedef Tpetra::Map<LO,GO,Node> map_type;
313 typedef Tpetra::CrsGraph<LO,GO,Node> crs_graph_type;
314 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
315
316 const map_type &pointRowMap = *(pointMatrix.getRowMap());
317 RCP<const map_type> meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap);
318
319 const map_type &pointColMap = *(pointMatrix.getColMap());
320 RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);
321
322 const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
323 RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);
324
325 const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
326 RCP<const map_type> meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap);
327
328 // Use graph ctor that provides column map and upper bound on nonzeros per row.
329 // We can use static profile because the point graph should have at least as many entries per
330 // row as the mesh graph.
331 RCP<crs_graph_type> meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap,
332 pointMatrix.getGlobalMaxNumRowEntries(), Tpetra::StaticProfile));
333 // Fill the graph by walking through the matrix. For each mesh row, we query the collection of point
334 // rows associated with it. The point column ids are converted to mesh column ids and put into an array.
335 // As each point row collection is finished, the mesh column ids are sorted, made unique, and inserted
336 // into the mesh graph.
337 typename crs_matrix_type::local_inds_host_view_type pointColInds;
338 typename crs_matrix_type::values_host_view_type pointVals;
339 Array<GO> meshColGids;
340 meshColGids.reserve(pointMatrix.getGlobalMaxNumRowEntries());
341 //again, I assume that point GIDs associated with a mesh GID are consecutive.
342 //if they are not, this will break!!
343 for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
344 for (int j=0; j<blockSize; ++j) {
345 LO rowLid = i*blockSize+j;
346 pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals); //TODO optimization: Since I don't care about values,
347 //TODO I should use the graph instead.
348 for (size_t k=0; k<pointColInds.size(); ++k) {
349 GO meshColInd = pointColMap.getGlobalElement(pointColInds[k]) / blockSize;
350 meshColGids.push_back(meshColInd);
351 }
352 }
353 //List of mesh GIDs probably contains duplicates because we looped over all point rows in the block.
354 //Sort and make unique.
355 std::sort(meshColGids.begin(), meshColGids.end());
356 meshColGids.erase( std::unique(meshColGids.begin(), meshColGids.end()), meshColGids.end() );
357 meshCrsGraph->insertGlobalIndices(meshRowMap->getGlobalElement(i), meshColGids());
358 meshColGids.clear();
359 }
360 meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
361
362 //create and populate the block matrix
363 RCP<block_crs_matrix_type> blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockSize));
364
365 //preallocate the maximum number of (dense) block entries needed by any row
366 int maxBlockEntries = blockMatrix->getNodeMaxNumRowEntries();
367 Array<Array<Scalar>> blocks(maxBlockEntries);
368 for (int i=0; i<maxBlockEntries; ++i)
369 blocks[i].reserve(blockSize*blockSize);
370 std::map<int,int> bcol2bentry; //maps block column index to dense block entries
371 std::map<int,int>::iterator iter;
372 //Fill the block matrix. We must do this in local index space.
373 //TODO: Optimization: We assume the blocks are fully populated in the point matrix. This means
374 //TODO: on the first point row in the block row, we know that we're hitting new block col indices.
375 //TODO: on other rows, we know the block col indices have all been seen before
376 //int offset;
377 //if (pointMatrix.getIndexBase()) offset = 0;
378 //else offset = 1;
379 for (size_t i=0; i<pointMatrix.getNodeNumRows()/blockSize; i++) {
380 int blkCnt=0; //how many unique block entries encountered so far in current block row
381 for (int j=0; j<blockSize; ++j) {
382 LO rowLid = i*blockSize+j;
383 pointMatrix.getLocalRowView(rowLid,pointColInds,pointVals);
384 for (size_t k=0; k<pointColInds.size(); ++k) {
385 //convert point column to block col
386 LO meshColInd = pointColInds[k] / blockSize;
387 iter = bcol2bentry.find(meshColInd);
388 if (iter == bcol2bentry.end()) {
389 //new block column
390 bcol2bentry[meshColInd] = blkCnt;
391 blocks[blkCnt].push_back(pointVals[k]);
392 blkCnt++;
393 } else {
394 //block column found previously
395 int littleBlock = iter->second;
396 blocks[littleBlock].push_back(pointVals[k]);
397 }
398 }
399 }
400 // TODO This inserts the blocks one block entry at a time. It is probably more efficient to
401 // TODO store all the blocks in a block row contiguously so they can be inserted with a single call.
402 for (iter=bcol2bentry.begin(); iter != bcol2bentry.end(); ++iter) {
403 LO localBlockCol = iter->first;
404 Scalar *vals = (blocks[iter->second]).getRawPtr();
405 blockMatrix->replaceLocalValues(i, &localBlockCol, vals, 1);
406 }
407
408 //Done with block row. Zero everything out.
409 for (int j=0; j<maxBlockEntries; ++j)
410 blocks[j].clear();
411 blkCnt = 0;
412 bcol2bentry.clear();
413 }
414
415 return blockMatrix;
416
417 }
418
419 template<class LO, class GO, class Node>
420 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
421 createMeshMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& pointMap)
422 {
423 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
424 typedef Tpetra::Map<LO,GO,Node> map_type;
425
426 //calculate mesh GIDs
427 Teuchos::ArrayView<const GO> pointGids = pointMap.getNodeElementList();
428 Teuchos::Array<GO> meshGids;
429 GO indexBase = pointMap.getIndexBase();
430
431 // Use hash table to track whether we've encountered this GID previously. This will happen
432 // when striding through the point DOFs in a block. It should not happen otherwise.
433 // I don't use sort/make unique because I don't want to change the ordering.
434 meshGids.reserve(pointGids.size());
435 Tpetra::Details::HashTable<GO,int> hashTable(pointGids.size());
436 for (int i=0; i<pointGids.size(); ++i) {
437 GO meshGid = (pointGids[i]-indexBase) / blockSize + indexBase;
438 if (hashTable.get(meshGid) == -1) {
439 hashTable.add(meshGid,1); //(key,value)
440 meshGids.push_back(meshGid);
441 }
442 }
443
444 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) );
445 return meshMap;
446
447 }
448
449
450 template<class LO, class GO, class Node>
451 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
452 createPointMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& blockMap)
453 {
454 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
455 typedef Tpetra::Map<LO,GO,Node> map_type;
456
457 //calculate mesh GIDs
458 Teuchos::ArrayView<const GO> blockGids = blockMap.getNodeElementList();
459 Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
460 GO indexBase = blockMap.getIndexBase();
461
462 for(LO i=0, ct=0; i<(LO)blockGids.size(); i++) {
463 GO base = (blockGids[i] - indexBase)* blockSize + indexBase;
464 for(LO j=0; j<blockSize; j++, ct++)
465 pointGids[i*blockSize+j] = base+j;
466 }
467
468 Teuchos::RCP<const map_type> pointMap = Teuchos::rcp( new map_type(TOT::invalid(), pointGids(), 0, blockMap.getComm()) );
469 return pointMap;
470
471 }
472
473
474 template<class Scalar, class LO, class GO, class Node>
475 Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node> >
477
478 using Teuchos::Array;
479 using Teuchos::ArrayView;
480 using Teuchos::RCP;
481
482 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
483 typedef Tpetra::Map<LO,GO,Node> map_type;
484 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
485
486 using crs_graph_type = typename block_crs_matrix_type::crs_graph_type;
487 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
488 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
489 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
490 using entries_type = typename local_graph_device_type::entries_type::non_const_type;
491 using values_type = typename local_matrix_device_type::values_type::non_const_type;
492
493 using row_map_type_const = typename local_graph_device_type::row_map_type;
494 using entries_type_const = typename local_graph_device_type::entries_type;
495
496 using little_block_type = typename block_crs_matrix_type::const_little_block_type;
497 using offset_type = typename row_map_type::non_const_value_type;
498 using column_type = typename entries_type::non_const_value_type;
499
500 using execution_space = typename Node::execution_space;
501 using range_type = Kokkos::RangePolicy<execution_space, LO>;
502
503
504 LO blocksize = blockMatrix.getBlockSize();
505 const offset_type bs2 = blocksize * blocksize;
506 size_t block_nnz = blockMatrix.getNodeNumEntries();
507 size_t point_nnz = block_nnz * bs2;
508
509 // We can get these from the blockMatrix directly
510 RCP<const map_type> pointDomainMap = blockMatrix.getDomainMap();
511 RCP<const map_type> pointRangeMap = blockMatrix.getRangeMap();
512
513 // We need to generate the row/col Map ourselves.
514 RCP<const map_type> blockRowMap = blockMatrix.getRowMap();
515 RCP<const map_type> pointRowMap = createPointMap<LO,GO,Node>(blocksize, *blockRowMap);
516
517 RCP<const map_type> blockColMap = blockMatrix.getColMap();
518 RCP<const map_type> pointColMap = createPointMap<LO,GO,Node>(blocksize, *blockColMap);
519
520 // Get the last few things
521
522 const crs_graph_type & blockGraph = blockMatrix.getCrsGraph();
523 LO point_rows = (LO) pointRowMap->getNodeNumElements();
524 LO block_rows = (LO) blockRowMap->getNodeNumElements();
525 auto blockValues = blockMatrix.getValuesDevice();
526 auto blockLocalGraph = blockGraph.getLocalGraphDevice();
527 row_map_type_const blockRowptr = blockLocalGraph.row_map;
528 entries_type_const blockColind = blockLocalGraph.entries;
529
530 // Generate the point matrix rowptr / colind / values
531 row_map_type rowptr("row_map", point_rows+1);
532 entries_type colind("entries", point_nnz);
533 values_type values("values", point_nnz);
534
535 // Pre-fill the rowptr
536 Kokkos::parallel_for("fillRowPtr",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
537 offset_type base = blockRowptr[i];
538 offset_type diff = blockRowptr[i+1] - base;
539 for(LO j=0; j<blocksize; j++) {
540 rowptr[i*blocksize +j] = base*bs2 + j*diff*blocksize;
541 }
542
543 // Fill the last guy, if we're on the final entry
544 if(i==block_rows-1) {
545 rowptr[block_rows*blocksize] = blockRowptr[i+1] * bs2;
546 }
547 });
548
549
550 Kokkos::parallel_for("copyEntriesAndValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
551 const offset_type blkBeg = blockRowptr[i];
552 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
553
554 // For each block in the row...
555 for (offset_type block=0; block < numBlocks; block++) {
556 column_type point_col_base = blockColind[blkBeg + block] * blocksize;
557 little_block_type my_block(blockValues.data () + (blkBeg+block) * bs2, blocksize, blocksize);
558
559 // For each entry in the block...
560 for(LO little_row=0; little_row<blocksize; little_row++) {
561 offset_type point_row_offset = rowptr[i*blocksize + little_row];
562 for(LO little_col=0; little_col<blocksize; little_col++) {
563 colind[point_row_offset + block*blocksize + little_col] = point_col_base + little_col;
564 values[point_row_offset + block*blocksize + little_col] = my_block(little_row,little_col);
565 }
566 }
567
568 }
569 });
570
571 // Generate the matrix
572 RCP<crs_matrix_type> pointCrsMatrix = rcp(new crs_matrix_type(pointRowMap, pointColMap, 0));
573 pointCrsMatrix->setAllValues(rowptr,colind,values);
574
575 // FUTURE OPTIMIZATION: Directly compute import/export, rather than letting ESFC do it
576 pointCrsMatrix->expertStaticFillComplete(pointDomainMap,pointRangeMap);
577
578 return pointCrsMatrix;
579 }
580
581
582} // namespace Tpetra
583
584//
585// Explicit instantiation macro for blockCrsMatrixWriter (various
586// overloads), writeMatrixStrip, and convertToBlockCrsMatrix.
587//
588// Must be expanded from within the Tpetra namespace!
589//
590#define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S,LO,GO,NODE) \
591 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName); \
592 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName, Teuchos::ParameterList const &params); \
593 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
594 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
595 template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
596 template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
597 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix);
598
599//
600// Explicit instantiation macro for createMeshMap / createPointMap
601//
602#define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \
603 template Teuchos::RCP<const Map<LO,GO,NODE> > createMeshMap (const LO& blockSize, const Map<LO,GO,NODE>& pointMap); \
604 template Teuchos::RCP<const Map<LO,GO,NODE> > createPointMap (const LO& blockSize, const Map<LO,GO,NODE>& blockMap);
605
606#endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
virtual size_t getNodeNumEntries() const override
The local number of stored (structurally nonzero) entries.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
size_t getNodeNumRows() const override
get the local number of block rows
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
global_size_t getGlobalNumRows() const override
get the global number of block rows
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual size_t getNodeNumCols() const override
The number of columns needed to apply the forward operator on this node.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
One or more distributed dense vectors.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const &params)
Helper function called by blockCrsMatrixWriter.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
@ INSERT
Insert new values that don't currently exist.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap)
Helper function to generate a mesh map from a point map. Important! It's assumed that point GIDs asso...