MueLu Version of the Day
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48
49#include <stdlib.h>
50
51#include <Teuchos_LAPACK.hpp>
52
53#include <Xpetra_CrsMatrixWrap.hpp>
54#include <Xpetra_ImportFactory.hpp>
55#include <Xpetra_Matrix.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_VectorFactory.hpp>
58
61
62#include "MueLu_MasterList.hpp"
63#include "MueLu_Monitor.hpp"
64
65namespace MueLu {
66
67 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69 RCP<ParameterList> validParamList = rcp(new ParameterList());
70
71#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72 SET_VALID_ENTRY("semicoarsen: piecewise constant");
73 SET_VALID_ENTRY("semicoarsen: coarsen rate");
74#undef SET_VALID_ENTRY
75 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
76 validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
77 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
78
79 validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
80 validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
81 validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
82
83 return validParamList;
84 }
85
86 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
88 Input(fineLevel, "A");
89 Input(fineLevel, "Nullspace");
90
91 Input(fineLevel, "LineDetection_VertLineIds");
92 Input(fineLevel, "LineDetection_Layers");
93 Input(fineLevel, "CoarseNumZLayers");
94
95 // check whether fine level coordinate information is available.
96 // If yes, request the fine level coordinates and generate coarse coordinates
97 // during the Build call
98 if (fineLevel.GetLevelID() == 0) {
99 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
100 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
101 bTransferCoordinates_ = true;
102 }
103 } else if (bTransferCoordinates_ == true){
104 // on coarser levels we check the default factory providing "Coordinates"
105 // or the factory declared to provide "Coordinates"
106 // first, check which factory is providing coordinate information
107 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
108 if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
109 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
110 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
111 }
112 }
113 }
114
115 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117 return BuildP(fineLevel, coarseLevel);
118 }
119
120 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
122 FactoryMonitor m(*this, "Build", coarseLevel);
123
124 // obtain general variables
125 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
126 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
127
128 // get user-provided coarsening rate parameter (constant over all levels)
129 const ParameterList& pL = GetParameterList();
130 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
131
132 // collect general input data
133 LO BlkSize = A->GetFixedBlockSize();
134 RCP<const Map> rowMap = A->getRowMap();
135 LO Ndofs = rowMap->getNodeNumElements();
136 LO Nnodes = Ndofs/BlkSize;
137
138 // collect line detection information generated by the LineDetectionFactory instance
139 LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
140 Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
141 Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
142 LO* VertLineId = TVertLineId.getRawPtr();
143 LO* LayerId = TLayerId.getRawPtr();
144
145 // generate transfer operator with semicoarsening
146 RCP<const Map> theCoarseMap;
147 RCP<Matrix> P;
148 RCP<MultiVector> coarseNullspace;
149 GO Ncoarse = MakeSemiCoarsenP(Nnodes,FineNumZLayers,CoarsenRate,LayerId,VertLineId,
150 BlkSize, A, P, theCoarseMap, fineNullspace,coarseNullspace);
151
152 // set StridingInformation of P
153 if (A->IsView("stridedMaps") == true)
154 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
155 else
156 P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
157
158 if (pL.get<bool>("semicoarsen: piecewise constant"))
159 RevertToPieceWiseConstant(P, BlkSize);
160
161 // Store number of coarse z-layers on the coarse level container
162 // This information is used by the LineDetectionAlgorithm
163 // TODO get rid of the NoFactory
164
165 LO CoarseNumZLayers = FineNumZLayers*Ncoarse;
166 CoarseNumZLayers /= Ndofs;
167 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
168
169 // store semicoarsening transfer on coarse level
170 Set(coarseLevel, "P", P);
171
172 Set(coarseLevel, "Nullspace", coarseNullspace);
173
174 // transfer coordinates
175 if(bTransferCoordinates_) {
176 typedef Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO> xdMV;
177 RCP<xdMV> fineCoords = Teuchos::null;
178 if (fineLevel.GetLevelID() == 0 &&
179 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
180 fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
181 } else {
182 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
183 if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
184 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
185 fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
186 }
187 }
188
189 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
190
191 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
192 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
193 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
194 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
195
196 // determine the maximum and minimum z coordinate value on the current processor.
197 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max = -Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
198 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min = Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
199 for ( auto it = z.begin(); it != z.end(); ++it) {
200 if(*it > zval_max) zval_max = *it;
201 if(*it < zval_min) zval_min = *it;
202 }
203
204 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
205
206 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
207 if(myCoarseZLayers == 1) {
208 myZLayerCoords[0] = zval_min;
209 } else {
210 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max-zval_min)/(myCoarseZLayers-1);
211 for(LO k = 0; k<myCoarseZLayers; ++k) {
212 myZLayerCoords[k] = k*dz;
213 }
214 }
215
216 // Note, that the coarse level node coordinates have to be in vertical ordering according
217 // to the numbering of the vertical lines
218
219 // number of vertical lines on current node:
220 LO numVertLines = Nnodes / FineNumZLayers;
221 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
222
223 RCP<const Map> coarseCoordMap =
224 MapFactory::Build (fineCoords->getMap()->lib(),
225 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
226 Teuchos::as<size_t>(numLocalCoarseNodes),
227 fineCoords->getMap()->getIndexBase(),
228 fineCoords->getMap()->getComm());
229 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
230 coarseCoords->putScalar(-1.0);
231 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
232 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
233 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
234
235 // loop over all vert line indices (stop as soon as possible)
236 LO cntCoarseNodes = 0;
237 for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
238 //vertical line id in *vt
239 LO curVertLineId = TVertLineId[vt];
240
241 if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
242 cy[curVertLineId * myCoarseZLayers] == -1.0) {
243 // loop over all local myCoarseZLayers
244 for (LO n=0; n<myCoarseZLayers; ++n) {
245 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
246 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
247 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
248 }
249 cntCoarseNodes += myCoarseZLayers;
250 }
251
252 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
253 if(cntCoarseNodes == numLocalCoarseNodes) break;
254 }
255
256 // set coarse level coordinates
257 Set(coarseLevel, "Coordinates", coarseCoords);
258 } /* end bool bTransferCoordinates */
259
260 }
261
262 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
264 /*
265 * Given the number of points in the z direction (PtsPerLine) and a
266 * coarsening rate (CoarsenRate), determine which z-points will serve
267 * as Cpts and return the total number of Cpts.
268 *
269 * Input
270 * PtsPerLine: Number of fine level points in the z direction
271 *
272 * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
273 *
274 * Thin: Must be either 0 or 1. Thin decides what to do when
275 * (PtsPerLine+1)/CoarsenRate is not an integer.
276 *
277 * Thin == 0 ==> ceil() the above fraction
278 * Thin == 1 ==> floor() the above fraction
279 *
280 * Output
281 * LayerCpts Array where LayerCpts[i] indicates that the
282 * LayerCpts[i]th fine level layer is a Cpt Layer.
283 * Note: fine level layers are assumed to be numbered starting
284 * a one.
285 */
286 typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
287 LO NCpts, i;
288 LO NCLayers = -1;
289 LO FirstStride;
290
291 temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (CoarsenRate)) - 1.0;
292 if (Thin == 1) NCpts = (LO) ceil(temp);
293 else NCpts = (LO) floor(temp);
294
295 TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
296
297 if (NCpts < 1) NCpts = 1;
298
299 FirstStride= (LO) ceil( ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) PtsPerLine+1)/( (typename Teuchos::ScalarTraits<Scalar>::coordinateType) (NCpts+1)));
300 RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) NCpts);
301
302 NCLayers = (LO) floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
303
304 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
305
306 di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType) FirstStride;
307 for (i = 1; i <= NCpts; i++) {
308 (*LayerCpts)[i] = (LO) floor(di);
309 di += RestStride;
310 }
311
312 return(NCLayers);
313 }
314
315#define MaxHorNeighborNodes 75
316
317 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319 MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
320 LO const VertLineId[], LO const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P,
321 RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
322 RCP<MultiVector>& coarseNullspace ) const {
323
324 /*
325 * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
326 * describing the z-layer and vertical line (LayerId and VertLineId)
327 * of each matrix block row, a coarsening rate, and dofs/node information,
328 * construct a prolongator that coarsening to semicoarsening in the z-direction
329 * using something like an operator-dependent grid transfer. In particular,
330 * matrix stencils are collapsed to vertical lines. Thus, each vertical line
331 * gives rise to a block tridiagonal matrix. BlkRows corresponding to
332 * Cpts are replaced by identity matrices. This tridiagonal is solved
333 * to determine each interpolation basis functions. Each Blk Rhs corresponds
334 * to all zeros except at the corresponding C-pt which has an identity
335 *
336 * On termination, return the number of local prolongator columns owned by
337 * this processor.
338 *
339 * Note: This code was adapted from a matlab code where offsets/arrays
340 * start from 1. In most parts of the code, this 1 offset is kept
341 * (in some cases wasting the first element of the array). The
342 * input and output matrices of this function has been changed to
343 * have offsets/rows/columns which start from 0. LayerId[] and
344 * VertLineId[] currently start from 1.
345 *
346 * Input
347 * =====
348 * Ntotal Number of fine level Blk Rows owned by this processor
349 *
350 * nz Number of vertical layers. Note: partitioning must be done
351 * so that each processor owns an entire vertical line. This
352 * means that nz is the global number of layers, which should
353 * be equal to the local number of layers.
354 * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
355 * would correspond to CoarsenRate = 3.
356 * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
357 * layer number associated with the dofs within BlkRow.
358 * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
359 * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
360 * BlkRows associated with nodes along the same vertical line
361 * in the mesh should have the same LineId.
362 * DofsPerNode Number of degrees-of-freedom per mesh node.
363 *
364 * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
365 * OrigAcols,
366 * OrigAvals
367 *
368 * Output
369 * =====
370 * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
371 * ParamPcols,
372 * ParamsPvals
373 */
374 int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
375 int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
376 int BlkRow, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
377 int i, j, iii, col, count, index, loc, PtRow, PtCol;
378 SC *BandSol=NULL, *BandMat=NULL, TheSum;
379 int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
380 int *Pcols;
381 size_t *Pptr;
382 SC *Pvals;
383 int MaxStencilSize, MaxNnzPerRow;
384 LO *LayDiff=NULL;
385 int CurRow, LastGuy = -1, NewPtr;
386 int Ndofs;
387 int Nghost;
388 LO *Layerdofs = NULL, *Col2Dof = NULL;
389
390 Teuchos::LAPACK<LO,SC> lapack;
391
392 char notrans[3];
393 notrans[0] = 'N';
394 notrans[1] = 'N';
395
396
397 MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
398 Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
399
400 Nghost = Amat->getColMap()->getNodeNumElements() - Amat->getDomainMap()->getNodeNumElements();
401 if (Nghost < 0) Nghost = 0;
402 Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
403 Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
404
405 RCP<Xpetra::Vector<LO,LO,GO,NO> > localdtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getDomainMap());
406 RCP<Xpetra::Vector<LO,LO,GO,NO> > dtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getColMap());
407 ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
408
409 for (i = 0; i < Ntotal*DofsPerNode; i++)
410 valptr[i]= LayerId[i/DofsPerNode];
411 valptr=ArrayRCP<LO>();
412
413 RCP< const Import> importer;
414 importer = Amat->getCrsGraph()->getImporter();
415 if (importer == Teuchos::null) {
416 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
417 }
418 dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
419
420 valptr= dtemp->getDataNonConst(0);
421 for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
422 valptr= localdtemp->getDataNonConst(0);
423 for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
424 valptr=ArrayRCP<LO>();
425 dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
426
427 valptr= dtemp->getDataNonConst(0);
428 for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
429 valptr=ArrayRCP<LO>();
430
431 if (Ntotal != 0) {
432 NLayers = LayerId[0];
433 NVertLines= VertLineId[0];
434 }
435 else { NLayers = -1; NVertLines = -1; }
436
437 for (i = 1; i < Ntotal; i++) {
438 if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
439 if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
440 }
441 NLayers++;
442 NVertLines++;
443
444 /*
445 * Make an inverse map so that we can quickly find the dof
446 * associated with a particular vertical line and layer.
447 */
448
449 Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
450 for (i=0; i < Ntotal; i++) {
451 InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
452 }
453
454 /*
455 * Determine coarse layers where injection will be applied.
456 */
457
458 Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
459 NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
460
461 /*
462 * Compute the largest possible interpolation stencil width based
463 * on the location of the Clayers. This stencil width is actually
464 * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
465 * one needs to multiply this by DofsPerNode.
466 */
467
468 if (NCLayers < 2) MaxStencilSize = nz;
469 else MaxStencilSize = CptLayers[2];
470
471 for (i = 3; i <= NCLayers; i++) {
472 if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
473 MaxStencilSize = CptLayers[i]- CptLayers[i-2];
474 }
475 if (NCLayers > 1) {
476 if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
477 MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
478 }
479
480 /*
481 * Allocate storage associated with solving a banded sub-matrix needed to
482 * determine the interpolation stencil. Note: we compute interpolation stencils
483 * for all dofs within a node at the same time, and so the banded solution
484 * must be large enough to hold all DofsPerNode simultaneously.
485 */
486
487 Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
488 Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
489 /*
490 * Lapack variables. See comments for dgbsv().
491 */
492 KL = 2*DofsPerNode-1;
493 KU = 2*DofsPerNode-1;
494 KLU = KL+KU;
495 LDAB = 2*KL+KU+1;
496 NRHS = DofsPerNode;
497 Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
498 Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
499
500 /*
501 * Allocate storage for the final interpolation matrix. Note: each prolongator
502 * row might have entries corresponding to at most two nodes.
503 * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
504 * nnz per prolongator row is DofsPerNode*2.
505 */
506
507 Ndofs = DofsPerNode*Ntotal;
508 MaxNnz = 2*DofsPerNode*Ndofs;
509
510 RCP<const Map> rowMap = Amat->getRowMap();
511 Xpetra::global_size_t GNdofs= rowMap->getGlobalNumElements();
512
513 std::vector<size_t> stridingInfo_;
514 stridingInfo_.push_back(DofsPerNode);
515
516 Xpetra::global_size_t itemp = GNdofs/nz;
517 coarseMap = StridedMapFactory::Build(rowMap->lib(),
518 NCLayers*itemp,
519 NCLayers*NVertLines*DofsPerNode,
520 0, /* index base */
521 stridingInfo_,
522 rowMap->getComm(),
523 -1, /* strided block id */
524 0 /* domain gid offset */);
525
526
527 //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
528
529
530 P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0));
531 coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
532
533
534 Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
535 Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
536 Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
537
538 Pptr[0] = 0; Pptr[1] = 0;
539
540 TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
541
542 /*
543 * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
544 * This will be useful while filling up P, and then later we will squeeze out
545 * the unused nonzeros locations.
546 */
547
548 for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
549 count = 1;
550 for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
551 Pptr[i] = count;
552 count += (2*DofsPerNode);
553 }
554
555 /*
556 * Build P column by column. The 1st block column corresponds to the 1st coarse
557 * layer and the first line. The 2nd block column corresponds to the 2nd coarse
558 * layer and the first line. The NCLayers+1 block column corresponds to the
559 * 1st coarse layer and the 2nd line, etc.
560 */
561
562
563 col = 0;
564 for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
565 for (iii=1; iii <= NCLayers; iii+= 1) {
566 col = col+1;
567 MyLayer = CptLayers[iii];
568
569 /*
570 * StartLayer gives the layer number of the lowest layer that
571 * is nonzero in the interpolation stencil that is currently
572 * being computed. Normally, if we are not near a boundary this
573 * is simply CptsLayers[iii-1]+1.
574 *
575 * NStencilNodes indicates the number of nonzero nodes in the
576 * interpolation stencil that is currently being computed. Normally,
577 * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
578 */
579
580 if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
581 else StartLayer = 1;
582
583 if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
584 else NStencilNodes= NLayers - StartLayer + 1;
585
586
587 N = NStencilNodes*DofsPerNode;
588
589 /*
590 * dgbsv() does not require that the first KL rows be initialized,
591 * so we could avoid zeroing out some entries?
592 */
593
594 for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++)
595 BandSol[ i] = 0.0;
596 for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
597
598 /*
599 * Fill BandMat and BandSol (which is initially the rhs) for each
600 * node in the interpolation stencil that is being computed.
601 */
602
603 for (node_k=1; node_k <= NStencilNodes ; node_k++) {
604
605 /* Map a Line and Layer number to a BlkRow in the fine level matrix
606 * and record the mapping from the sub-system to the BlkRow of the
607 * fine level matrix.
608 */
609 BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
610 Sub2FullMap[node_k] = BlkRow;
611
612 /* Two cases:
613 * 1) the current layer is not a Cpoint layer. In this case we
614 * want to basically stick the matrix couplings to other
615 * nonzero stencil rows into the band matrix. One way to do
616 * this is to include couplings associated with only MyLine
617 * and ignore all the other couplings. However, what we do
618 * instead is to sum all the coupling at each layer participating
619 * in this interpolation stencil and stick this sum into BandMat.
620 * 2) the current layer is a Cpoint layer and so we
621 * stick an identity block in BandMat and rhs.
622 */
623 if (StartLayer+node_k-1 != MyLayer) {
624 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
625
626 j = (BlkRow-1)*DofsPerNode+dof_i;
627 ArrayView<const LO> AAcols;
628 ArrayView<const SC> AAvals;
629 Amat->getLocalRowView(j, AAcols, AAvals);
630 const int *Acols = AAcols.getRawPtr();
631 const SC *Avals = AAvals.getRawPtr();
632 RowLeng = AAvals.size();
633
634 TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
635
636 for (i = 0; i < RowLeng; i++) {
637 LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
638
639 /* This is the main spot where there might be off- */
640 /* processor communication. That is, when we */
641 /* average the stencil in the horizontal direction,*/
642 /* we need to know the layer id of some */
643 /* neighbors that might reside off-processor. */
644 }
645 PtRow = (node_k-1)*DofsPerNode+dof_i+1;
646 for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
647 PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
648 /* Stick in entry corresponding to Mat(PtRow,PtCol) */
649 /* see dgbsv() comments for matrix format. */
650 TheSum = 0.0;
651 for (i = 0; i < RowLeng; i++) {
652 if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
653 TheSum += Avals[i];
654 }
655 index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
656 BandMat[index] = TheSum;
657 if (node_k != NStencilNodes) {
658 /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
659 /* see dgbsv() comments for matrix format. */
660 TheSum = 0.0;
661 for (i = 0; i < RowLeng; i++) {
662 if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
663 TheSum += Avals[i];
664 }
665 j = PtCol+DofsPerNode;
666 index=LDAB*(j-1)+KLU+PtRow-j;
667 BandMat[index] = TheSum;
668 }
669 if (node_k != 1) {
670 /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
671 /* see dgbsv() comments for matrix format. */
672 TheSum = 0.0;
673 for (i = 0; i < RowLeng; i++) {
674 if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
675 TheSum += Avals[i];
676 }
677 j = PtCol-DofsPerNode;
678 index=LDAB*(j-1)+KLU+PtRow-j;
679 BandMat[index] = TheSum;
680 }
681 }
682 }
683 }
684 else {
685
686 /* inject the null space */
687 // int FineStride = Ntotal*DofsPerNode;
688 // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
689 for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
690 Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
691 Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
692 for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
693 OneCNull[( col-1)*DofsPerNode+dof_i] = OneFNull[ (BlkRow-1)*DofsPerNode+dof_i];
694 }
695 }
696
697 for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
698 /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
699 /* see dgbsv() comments for matrix format. */
700 PtRow = (node_k-1)*DofsPerNode+dof_i+1;
701 index = LDAB*(PtRow-1)+KLU;
702 BandMat[index] = 1.0;
703 BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
704 }
705 }
706 }
707
708 /* Solve banded system and then stick result in Pmatrix arrays */
709
710 lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
711
712 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
713
714 lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
715 BandSol, N, &INFO );
716
717 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
718
719 for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
720 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
721 for (i =1; i <= NStencilNodes ; i++) {
722 index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
723 loc = Pptr[index];
724 Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
725 Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
726 (i-1)*DofsPerNode + dof_i];
727 Pptr[index]= Pptr[index] + 1;
728 }
729 }
730 }
731 }
732 }
733
734 /*
735 * Squeeze the -1's out of the columns. At the same time convert Pcols
736 * so that now the first column is numbered '0' as opposed to '1'.
737 * Also, the arrays Pcols and Pvals should now use the zeroth element
738 * as opposed to just starting with the first element. Pptr will be
739 * fixed in the for loop below so that Pptr[0] = 0, etc.
740 */
741 CurRow = 1;
742 NewPtr = 1;
743 for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
744 if (ii == Pptr[CurRow]) {
745 Pptr[CurRow] = LastGuy;
746 CurRow++;
747 while (ii > Pptr[CurRow]) {
748 Pptr[CurRow] = LastGuy;
749 CurRow++;
750 }
751 }
752 if (Pcols[ii] != -1) {
753 Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
754 Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
755 LastGuy = NewPtr;
756 NewPtr++;
757 }
758 }
759 for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
760
761 /* Now move the pointers so that they now point to the beginning of each
762 * row as opposed to the end of each row
763 */
764 for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
765 Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
766 }
767 Pptr[0] = 0;
768
769 ArrayRCP<size_t> rcpRowPtr;
770 ArrayRCP<LO> rcpColumns;
771 ArrayRCP<SC> rcpValues;
772
773 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
774 LO nnz = Pptr[Ndofs];
775 PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
776
777 ArrayView<size_t> rowPtr = rcpRowPtr();
778 ArrayView<LO> columns = rcpColumns();
779 ArrayView<SC> values = rcpValues();
780
781 // copy data over
782
783 for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
784 for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
785 for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
786 PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
787 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
788
789
790 return NCLayers*NVertLines*DofsPerNode;
791 }
792 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
794 // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
795 // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
796 // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
797 // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
798 // non-zero entries
799
800 ArrayView<const LocalOrdinal> inds;
801 ArrayView<const Scalar> vals1;
802 ArrayView< Scalar> vals;
803 Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
804 Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
805
806 for (size_t i = 0; i < P->getRowMap()->getNodeNumElements(); i++) {
807 P->getLocalRowView(i, inds, vals1);
808
809 size_t nnz = inds.size();
810 if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
811
812 LO largestIndex = -1;
813 Scalar largestValue = ZERO;
814 /* find largest value in row and change that one to a 1 while the others are set to 0 */
815
816 LO rowDof = i%BlkSize;
817 for (size_t j =0; j < nnz; j++) {
818 if (Teuchos::ScalarTraits<SC>::magnitude(vals[ j ]) >= Teuchos::ScalarTraits<SC>::magnitude(largestValue)) {
819 if ( inds[j]%BlkSize == rowDof ) {
820 largestValue = vals[j];
821 largestIndex = (int) j;
822 }
823 }
824 vals[j] = ZERO;
825 }
826 if (largestIndex != -1) vals[largestIndex] = ONE;
827 else
828 TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
829
830 if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
831 }
832 }
833
834} //namespace MueLu
835
836#define MUELU_SEMICOARSENPFACTORY_SHORT
837#endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
#define MaxHorNeighborNodes
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
Namespace for MueLu classes and methods.