MueLu Version of the Day
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <stdlib.h>
52
53#include <Kokkos_Core.hpp>
54
55#include <KokkosBatched_LU_Decl.hpp>
56#include <KokkosBatched_LU_Serial_Impl.hpp>
57#include <KokkosBatched_Trsm_Decl.hpp>
58#include <KokkosBatched_Trsm_Serial_Impl.hpp>
59#include <KokkosBatched_Util.hpp>
60#include <KokkosSparse_CrsMatrix.hpp>
61
62#include <Xpetra_CrsMatrixWrap.hpp>
63#include <Xpetra_ImportFactory.hpp>
64#include <Xpetra_Matrix.hpp>
65#include <Xpetra_MultiVectorFactory.hpp>
66#include <Xpetra_VectorFactory.hpp>
67
69#include "MueLu_MasterList.hpp"
70#include "MueLu_Monitor.hpp"
72
73namespace MueLu {
74
75template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
76 class DeviceType>
77RCP<const ParameterList>
78SemiCoarsenPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
79 Kokkos::Compat::KokkosDeviceWrapperNode<
80 DeviceType>>::GetValidParameterList() const {
81 RCP<ParameterList> validParamList = rcp(new ParameterList());
82
83 std::string name = "semicoarsen: coarsen rate";
84 validParamList->setEntry(name, MasterList::getEntry(name));
85 validParamList->set<RCP<const FactoryBase>>(
86 "A", Teuchos::null, "Generating factory of the matrix A");
87 validParamList->set<RCP<const FactoryBase>>(
88 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
89 validParamList->set<RCP<const FactoryBase>>(
90 "Coordinates", Teuchos::null, "Generating factory for coordinates");
91
92 validParamList->set<RCP<const FactoryBase>>(
93 "LineDetection_VertLineIds", Teuchos::null,
94 "Generating factory for LineDetection vertical line ids");
95 validParamList->set<RCP<const FactoryBase>>(
96 "LineDetection_Layers", Teuchos::null,
97 "Generating factory for LineDetection layer ids");
98 validParamList->set<RCP<const FactoryBase>>(
99 "CoarseNumZLayers", Teuchos::null,
100 "Generating factory for number of coarse z-layers");
101
102 return validParamList;
103}
104
105template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
106 class DeviceType>
107void SemiCoarsenPFactory_kokkos<
109 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
110 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
111 Input(fineLevel, "A");
112 Input(fineLevel, "Nullspace");
113
114 Input(fineLevel, "LineDetection_VertLineIds");
115 Input(fineLevel, "LineDetection_Layers");
116 Input(fineLevel, "CoarseNumZLayers");
117
118 // check whether fine level coordinate information is available.
119 // If yes, request the fine level coordinates and generate coarse coordinates
120 // during the Build call
121 if (fineLevel.GetLevelID() == 0) {
122 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
123 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
124 bTransferCoordinates_ = true;
125 }
126 } else if (bTransferCoordinates_ == true) {
127 // on coarser levels we check the default factory providing "Coordinates"
128 // or the factory declared to provide "Coordinates"
129 // first, check which factory is providing coordinate information
130 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
131 if (myCoordsFact == Teuchos::null) {
132 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
133 }
134 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
135 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
136 }
137 }
138}
139
140template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
141 class DeviceType>
142void SemiCoarsenPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
143 Kokkos::Compat::KokkosDeviceWrapperNode<
144 DeviceType>>::Build(Level &fineLevel,
145 Level &coarseLevel)
146 const {
147 return BuildP(fineLevel, coarseLevel);
148}
149
150template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
151 class DeviceType>
152void SemiCoarsenPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
153 Kokkos::Compat::KokkosDeviceWrapperNode<
154 DeviceType>>::BuildP(Level &fineLevel,
155 Level &coarseLevel)
156 const {
157 FactoryMonitor m(*this, "Build", coarseLevel);
158
159 // obtain general variables
160 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
161 RCP<MultiVector> fineNullspace =
162 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
163
164 // get user-provided coarsening rate parameter (constant over all levels)
165 const ParameterList &pL = GetParameterList();
166 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
167 TEUCHOS_TEST_FOR_EXCEPTION(
168 CoarsenRate < 2, Exceptions::RuntimeError,
169 "semicoarsen: coarsen rate must be greater than 1");
170
171 // collect general input data
172 LO BlkSize = A->GetFixedBlockSize();
173 RCP<const Map> rowMap = A->getRowMap();
174 LO Ndofs = rowMap->getNodeNumElements();
175 LO Nnodes = Ndofs / BlkSize;
176
177 // collect line detection information generated by the LineDetectionFactory
178 // instance
179 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
180 Teuchos::ArrayRCP<LO> TVertLineId =
181 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
182 Teuchos::ArrayRCP<LO> TLayerId =
183 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
184
185 // compute number of coarse layers
186 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
187 "Cannot coarsen further");
188 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
189 LO CoarseNumZLayers =
190 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
191 if (CoarseNumZLayers < 1)
192 CoarseNumZLayers = 1;
193
194 // generate transfer operator with semicoarsening
195 RCP<Matrix> P;
196 RCP<MultiVector> coarseNullspace;
197 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
198 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
199 coarseNullspace);
200
201 // Store number of coarse z-layers on the coarse level container
202 // This information is used by the LineDetectionAlgorithm
203 // TODO get rid of the NoFactory
204 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
206
207 // store semicoarsening transfer on coarse level
208 Set(coarseLevel, "P", P);
209 Set(coarseLevel, "Nullspace", coarseNullspace);
210
211 // transfer coordinates
212 if (bTransferCoordinates_) {
213 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
214 typedef Xpetra::MultiVector<
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
216 xdMV;
217 RCP<xdMV> fineCoords = Teuchos::null;
218 if (fineLevel.GetLevelID() == 0 &&
219 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
220 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
221 } else {
222 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
223 if (myCoordsFact == Teuchos::null) {
224 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
225 }
226 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
227 fineCoords =
228 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
229 }
230 }
231
232 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
233 Exceptions::RuntimeError,
234 "No Coordinates found provided by the user.");
235
236 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
237 Exceptions::RuntimeError,
238 "Three coordinates arrays must be supplied if "
239 "line detection orientation not given.");
240 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
241 fineCoords->getDataNonConst(0);
242 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
243 fineCoords->getDataNonConst(1);
244 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
245 fineCoords->getDataNonConst(2);
246
247 // determine the maximum and minimum z coordinate value on the current
248 // processor.
249 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
250 -Teuchos::ScalarTraits<
251 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
252 Teuchos::ScalarTraits<
253 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
255 Teuchos::ScalarTraits<
256 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
257 Teuchos::ScalarTraits<
258 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
259 for (auto it = z.begin(); it != z.end(); ++it) {
260 if (*it > zval_max)
261 zval_max = *it;
262 if (*it < zval_min)
263 zval_min = *it;
264 }
265
266 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
267
268 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
269 myZLayerCoords = Teuchos::arcp<
270 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
271 myCoarseZLayers);
272 if (myCoarseZLayers == 1) {
273 myZLayerCoords[0] = zval_min;
274 } else {
275 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
276 (zval_max - zval_min) / (myCoarseZLayers - 1);
277 for (LO k = 0; k < myCoarseZLayers; ++k) {
278 myZLayerCoords[k] = k * dz;
279 }
280 }
281
282 // Note, that the coarse level node coordinates have to be in vertical
283 // ordering according to the numbering of the vertical lines
284
285 // number of vertical lines on current node:
286 LO numVertLines = Nnodes / FineNumZLayers;
287 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
288
289 RCP<const Map> coarseCoordMap = MapFactory::Build(
290 fineCoords->getMap()->lib(),
291 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
292 Teuchos::as<size_t>(numLocalCoarseNodes),
293 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
294 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
295 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
296 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
297 coarseCoords->putScalar(-1.0);
298 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
299 coarseCoords->getDataNonConst(0);
300 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
301 coarseCoords->getDataNonConst(1);
302 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
303 coarseCoords->getDataNonConst(2);
304
305 // loop over all vert line indices (stop as soon as possible)
306 LO cntCoarseNodes = 0;
307 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
308 // vertical line id in *vt
309 LO curVertLineId = TVertLineId[vt];
310
311 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
312 cy[curVertLineId * myCoarseZLayers] == -1.0) {
313 // loop over all local myCoarseZLayers
314 for (LO n = 0; n < myCoarseZLayers; ++n) {
315 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
316 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
317 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
318 }
319 cntCoarseNodes += myCoarseZLayers;
320 }
321
322 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
323 Exceptions::RuntimeError,
324 "number of coarse nodes is inconsistent.");
325 if (cntCoarseNodes == numLocalCoarseNodes)
326 break;
327 }
328
329 // set coarse level coordinates
330 Set(coarseLevel, "Coordinates", coarseCoords);
331 } /* end bool bTransferCoordinates */
332}
333
334template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
335 class DeviceType>
336void SemiCoarsenPFactory_kokkos<
338 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
339 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
340 const LO DofsPerNode, const LO NFLayers,
341 const LO NCLayers, const ArrayRCP<LO> LayerId,
342 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
343 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
344 RCP<MultiVector> &coarseNullspace) const {
345 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
346 using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
347 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
348 using LOView1D = Kokkos::View<LO *, DeviceType>;
349 using LOView2D = Kokkos::View<LO **, DeviceType>;
350
351 // Construct a map from fine level column to layer ids (including ghost nodes)
352 // Note: this is needed to sum all couplings within a layer
353 const auto FCol2LayerVector =
354 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
355 const auto localTemp =
356 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
357 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
358 if (importer == Teuchos::null)
359 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
360 {
361 // Fill local temp with layer ids and fill ghost nodes
362 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
363 for (int row = 0; row < NFRows; row++)
364 localTempHost(row, 0) = LayerId[row / DofsPerNode];
365 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
366 Kokkos::deep_copy(localTempView, localTempHost);
367 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
368 }
369 const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
370 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
371
372 // Construct a map from fine level column to local dof per node id (including
373 // ghost nodes) Note: this is needed to sum all couplings within a layer
374 const auto FCol2DofVector =
375 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
376 {
377 // Fill local temp with local dof per node ids and fill ghost nodes
378 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
379 for (int row = 0; row < NFRows; row++)
380 localTempHost(row, 0) = row % DofsPerNode;
381 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
382 Kokkos::deep_copy(localTempView, localTempHost);
383 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
384 }
385 const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
386 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
387
388 // Compute NVertLines
389 // TODO: Read this from line detection factory
390 int NVertLines = -1;
391 if (NFNodes != 0)
392 NVertLines = VertLineId[0];
393 for (int node = 1; node < NFNodes; ++node)
394 if (VertLineId[node] > NVertLines)
395 NVertLines = VertLineId[node];
396 NVertLines++;
397
398 // Construct a map from Line, Layer ids to fine level node
399 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
400 typename LOView2D::HostMirror LineLayer2NodeHost =
401 Kokkos::create_mirror_view(LineLayer2Node);
402 for (int node = 0; node < NFNodes; ++node)
403 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
404 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
405
406 // Construct a map from coarse layer id to fine layer id
407 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
408 typename LOView1D::HostMirror CLayer2FLayerHost =
409 Kokkos::create_mirror_view(CLayer2FLayer);
410 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
411 const LO FirstStride =
412 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
413 const coordT RestStride =
414 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
415 const LO NCpts =
416 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
417 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
418 "sizes do not match.");
419 coordT stride = (coordT)FirstStride;
420 for (int clayer = 0; clayer < NCLayers; ++clayer) {
421 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
422 stride += RestStride;
423 }
424 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
425
426 // Compute start layer and stencil sizes for layer interpolation at each
427 // coarse layer
428 int MaxStencilSize = 1;
429 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
430 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
431 typename LOView1D::HostMirror CLayer2StartLayerHost =
432 Kokkos::create_mirror_view(CLayer2StartLayer);
433 typename LOView1D::HostMirror CLayer2StencilSizeHost =
434 Kokkos::create_mirror_view(CLayer2StencilSize);
435 for (int clayer = 0; clayer < NCLayers; ++clayer) {
436 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
437 const int stencilSize = (clayer < NCLayers - 1)
438 ? CLayer2FLayerHost(clayer + 1) - startLayer
439 : NFLayers - startLayer;
440
441 if (MaxStencilSize < stencilSize)
442 MaxStencilSize = stencilSize;
443 CLayer2StartLayerHost(clayer) = startLayer;
444 CLayer2StencilSizeHost(clayer) = stencilSize;
445 }
446 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
447 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
448
449 // Allocate storage for the coarse layer interpolation matrices on all
450 // vertical lines Note: Contributions to each matrix are collapsed to vertical
451 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
452 // Here we store the full matrix to be compatible with kokkos kernels batch LU
453 // and tringular solve.
454 int Nmax = MaxStencilSize * DofsPerNode;
456 "BandMat", NVertLines, Nmax, Nmax);
458 "BandSol", NVertLines, Nmax, DofsPerNode);
459
460 // Precompute number of nonzeros in prolongation matrix and allocate P views
461 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
462 // interpolation stencil (StencilSize*DofsPerNode)
463 int NnzP = 0;
464 for (int clayer = 0; clayer < NCLayers; ++clayer)
465 NnzP += CLayer2StencilSizeHost(clayer);
466 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
467 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
468 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
469
470 // Precompute Pptr
471 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
472 // corresponding row in P
473 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
475 Kokkos::create_mirror_view(Pptr);
476 Kokkos::deep_copy(PptrHost, 0);
477 for (int line = 0; line < NVertLines; ++line) {
478 for (int clayer = 0; clayer < NCLayers; ++clayer) {
479 const int stencilSize = CLayer2StencilSizeHost(clayer);
480 const int startLayer = CLayer2StartLayerHost(clayer);
481 for (int snode = 0; snode < stencilSize; ++snode) {
482 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
483 const int layer = startLayer + snode;
484 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
485 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
486 PptrHost(AmatRow + 1) += DofsPerNode;
487 }
488 }
489 }
490 }
491 for (int i = 2; i < NFRows + 1; ++i)
492 PptrHost(i) += PptrHost(i - 1);
493 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
494 Exceptions::RuntimeError,
495 "Number of nonzeros in P does not match");
496 Kokkos::deep_copy(Pptr, PptrHost);
497
498 // Precompute Pptr offsets
499 // Note: These are used to determine the nonzero index in Pvals and Pcols
501 "layerBuckets", NFLayers);
502 Kokkos::deep_copy(layerBuckets, 0);
503 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
504 MaxStencilSize);
505 typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
506 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
507 for (int clayer = 0; clayer < NCLayers; ++clayer) {
508 const int stencilSize = CLayer2StencilSizeHost(clayer);
509 const int startLayer = CLayer2StartLayerHost(clayer);
510 for (int snode = 0; snode < stencilSize; ++snode) {
511 const int layer = startLayer + snode;
512 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
513 layerBuckets(layer)++;
514 }
515 }
516 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
517
518 { // Fill P - fill and solve each block tridiagonal system and fill P views
519 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
520
521 const auto localAmat = Amat->getLocalMatrixDevice();
522 const auto zero = impl_ATS::zero();
523 const auto one = impl_ATS::one();
524
525 using range_policy = Kokkos::RangePolicy<execution_space>;
526 Kokkos::parallel_for(
527 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
528 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
529 for (int clayer = 0; clayer < NCLayers; ++clayer) {
530
531 // Initialize BandSol
532 auto bandSol =
533 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
534 for (int row = 0; row < Nmax; ++row)
535 for (int dof = 0; dof < DofsPerNode; ++dof)
536 bandSol(row, dof) = zero;
537
538 // Initialize BandMat (set unused row diagonal to 1.0)
539 const int stencilSize = CLayer2StencilSize(clayer);
540 const int N = stencilSize * DofsPerNode;
541 auto bandMat =
542 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
543 for (int row = 0; row < Nmax; ++row)
544 for (int col = 0; col < Nmax; ++col)
545 bandMat(row, col) =
546 (row == col && row >= N) ? one : zero;
547
548 // Loop over layers in stencil and fill banded matrix and rhs
549 const int flayer = CLayer2FLayer(clayer);
550 const int startLayer = CLayer2StartLayer(clayer);
551 for (int snode = 0; snode < stencilSize; ++snode) {
552
553 const int layer = startLayer + snode;
554 if (layer == flayer) { // If layer in stencil is a coarse layer
555 for (int dof = 0; dof < DofsPerNode; ++dof) {
556 const int row = snode * DofsPerNode + dof;
557 bandMat(row, row) = one;
558 bandSol(row, dof) = one;
559 }
560 } else { // Not a coarse layer
561 const int AmatBlkRow = LineLayer2Node(line, layer);
562 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
563
564 // Get Amat row info
565 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
566 const auto localAmatRow = localAmat.rowConst(AmatRow);
567 const int AmatRowLeng = localAmatRow.length;
568
569 const int row = snode * DofsPerNode + dofi;
570 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
571 const int col = snode * DofsPerNode + dofj;
572
573 // Sum values along row which correspond to stencil
574 // layer/dof and fill bandMat
575 auto val = zero;
576 for (int i = 0; i < AmatRowLeng; ++i) {
577 const int colidx = localAmatRow.colidx(i);
578 if (FCol2Layer(colidx) == layer &&
579 FCol2Dof(colidx) == dofj)
580 val += localAmatRow.value(i);
581 }
582 bandMat(row, col) = val;
583
584 if (snode > 0) {
585 // Sum values along row which correspond to stencil
586 // layer/dof below and fill bandMat
587 val = zero;
588 for (int i = 0; i < AmatRowLeng; ++i) {
589 const int colidx = localAmatRow.colidx(i);
590 if (FCol2Layer(colidx) == layer - 1 &&
591 FCol2Dof(colidx) == dofj)
592 val += localAmatRow.value(i);
593 }
594 bandMat(row, col - DofsPerNode) = val;
595 }
596
597 if (snode < stencilSize - 1) {
598 // Sum values along row which correspond to stencil
599 // layer/dof above and fill bandMat
600 val = zero;
601 for (int i = 0; i < AmatRowLeng; ++i) {
602 const int colidx = localAmatRow.colidx(i);
603 if (FCol2Layer(colidx) == layer + 1 &&
604 FCol2Dof(colidx) == dofj)
605 val += localAmatRow.value(i);
606 }
607 bandMat(row, col + DofsPerNode) = val;
608 }
609 }
610 }
611 }
612 }
613
614 // Batch LU and triangular solves
615 namespace KB = KokkosBatched;
616 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
617 lu_type::invoke(bandMat);
618 using trsv_l_type =
619 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
620 KB::Trans::NoTranspose, KB::Diag::Unit,
621 KB::Algo::Trsm::Unblocked>;
622 trsv_l_type::invoke(one, bandMat, bandSol);
623 using trsv_u_type = typename KB::SerialTrsm<
624 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
625 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
626 trsv_u_type::invoke(one, bandMat, bandSol);
627
628 // Fill prolongation views with solution
629 for (int snode = 0; snode < stencilSize; ++snode) {
630 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
631 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
632 const int layer = startLayer + snode;
633 const int AmatBlkRow = LineLayer2Node(line, layer);
634 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
635
636 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
637 const int pptr =
638 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
639
640 const int col =
641 line * NCLayers + clayer; // coarse node (block row) index
642 Pcols(pptr) = col * DofsPerNode + dofj;
643 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
644 }
645 }
646 }
647 }
648 });
649 } // Fill P
650
651 // Build P
652 RCP<const Map> rowMap = Amat->getRowMap();
653 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
654 Xpetra::global_size_t itemp = GNdofs / NFLayers;
655 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
656 RCP<const Map> coarseMap = StridedMapFactory::Build(
657 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
658 stridingInfo_, rowMap->getComm(), -1, 0);
659 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
660 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
661 PCrs->setAllValues(Pptr, Pcols, Pvals);
662 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
663
664 // set StridingInformation of P
665 if (Amat->IsView("stridedMaps") == true)
666 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
667 else
668 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
669
670 // Construct coarse nullspace and inject fine nullspace
671 coarseNullspace =
672 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
673 const int numVectors = fineNullspace->getNumVectors();
674 const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
675 const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
676 using range_policy = Kokkos::RangePolicy<execution_space>;
677 Kokkos::parallel_for(
678 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
679 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
680 for (int clayer = 0; clayer < NCLayers; ++clayer) {
681 const int layer = CLayer2FLayer(clayer);
682 const int AmatBlkRow =
683 LineLayer2Node(line, layer); // fine node (block row) index
684 const int col =
685 line * NCLayers + clayer; // coarse node (block row) index
686 for (int k = 0; k < numVectors; ++k) {
687 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
688 const int fRow = AmatBlkRow * DofsPerNode + dofi;
689 const int cRow = col * DofsPerNode + dofi;
690 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
691 }
692 }
693 }
694 });
695}
696
697} // namespace MueLu
698
699#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
700#endif // HAVE_MUELU_KOKKOS_REFACTOR
701#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
Namespace for MueLu classes and methods.