MueLu Version of the Day
MueLu_RegionRFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REGIONRFACTORY_DEF_HPP
47#define MUELU_REGIONRFACTORY_DEF_HPP
48
49#ifdef HAVE_MUELU_KOKKOS_REFACTOR
50
51#include <Xpetra_Matrix.hpp>
52#include <Xpetra_CrsGraphFactory.hpp>
53
54// #include "MueLu_PFactory.hpp"
55// #include "MueLu_FactoryManagerBase.hpp"
56#include "MueLu_Monitor.hpp"
57#include "MueLu_MasterList.hpp"
58
60
61namespace MueLu {
62
63 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
64 RCP<const ParameterList> RegionRFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
65 GetValidParameterList() const {
66 RCP<ParameterList> validParamList = rcp(new ParameterList());
67
68 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
69 "Generating factory of the matrix A");
70 validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
71 "Number of spatial dimensions in the problem.");
72 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
73 "Number of local nodes per spatial dimension on the fine grid.");
74 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
75 "Fine level nullspace used to construct the coarse level nullspace.");
76 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
77 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
78 validParamList->set<bool> ("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
79
80 return validParamList;
81 } // GetValidParameterList()
82
83 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
84 void RegionRFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
85 DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
86
87 Input(fineLevel, "A");
88 Input(fineLevel, "numDimensions");
89 Input(fineLevel, "lNodesPerDim");
90 Input(fineLevel, "Nullspace");
91 Input(fineLevel, "Coordinates");
92
93 } // DeclareInput()
94
95 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
96 void RegionRFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
97 Build(Level& fineLevel, Level& coarseLevel) const {
98
99 // Set debug outputs based on environment variable
100 RCP<Teuchos::FancyOStream> out;
101 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
102 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
103 out->setShowAllFrontMatter(false).setShowProcRank(true);
104 } else {
105 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
106 }
107
108 *out << "Starting RegionRFactory::Build." << std::endl;
109
110 // First get the inputs from the fineLevel
111 const int numDimensions = Get<int>(fineLevel, "numDimensions");
112 Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
113 {
114 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
115 for(int dim = 0; dim < numDimensions; ++dim) {
116 lFineNodesPerDim[dim] = lNodesPerDim[dim];
117 }
118 }
119 *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
120 << std::endl;
121
122 // Let us check that the inputs verify our assumptions
123 if(numDimensions < 1 || numDimensions > 3) {
124 throw std::runtime_error("numDimensions must be 1, 2 or 3!");
125 }
126 for(int dim = 0; dim < numDimensions; ++dim) {
127 if(lFineNodesPerDim[dim] % 3 != 1) {
128 throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
129 }
130 }
131 Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
132
133 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
134
135 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
136 fineCoordinates = Get< RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
137 if(static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
138 throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
139 }
140
141 // Let us create R and pass it down to the
142 // appropriate specialization and see what we
143 // get back!
144 RCP<Matrix> R;
145
146 if(numDimensions == 1) {
147 throw std::runtime_error("RegionRFactory no implemented for 1D case yet.");
148 } else if(numDimensions == 2) {
149 throw std::runtime_error("RegionRFactory no implemented for 2D case yet.");
150 } else if(numDimensions == 3) {
151 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
152 R, coarseCoordinates, lCoarseNodesPerDim);
153 }
154
155 const Teuchos::ParameterList& pL = GetParameterList();
156
157 // Reuse pattern if available (multiple solve)
158 RCP<ParameterList> Tparams;
159 if(pL.isSublist("matrixmatrix: kernel params"))
160 Tparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
161 else
162 Tparams= rcp(new ParameterList);
163
164 // R->describe(*out, Teuchos::VERB_EXTREME);
165 *out << "Compute P=R^t" << std::endl;
166 // By default, we don't need global constants for transpose
167 Tparams->set("compute global constants: temporaries",Tparams->get("compute global constants: temporaries", false));
168 Tparams->set("compute global constants", Tparams->get("compute global constants",false));
169 std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
170 RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
171
172 *out << "Compute coarse nullspace" << std::endl;
173 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
174 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
175 fineNullspace->getNumVectors());
176 R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
177 Teuchos::ScalarTraits<SC>::zero());
178
179 *out << "Set data on coarse level" << std::endl;
180 Set(coarseLevel, "numDimensions", numDimensions);
181 Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
182 Set(coarseLevel, "Nullspace", coarseNullspace);
183 Set(coarseLevel, "Coordinates", coarseCoordinates);
184 if(pL.get<bool>("keep coarse coords")) {
185 coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
186 }
187
188 R->SetFixedBlockSize(A->GetFixedBlockSize());
189 P->SetFixedBlockSize(A->GetFixedBlockSize());
190
191 Set(coarseLevel, "R", R);
192 Set(coarseLevel, "P", P);
193
194 } // Build()
195
196 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
197 void RegionRFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
198 Build3D(const int numDimensions,
199 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
200 const RCP<Matrix>& A,
201 const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
202 RCP<Matrix>& R,
203 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
204 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
205 using local_matrix_type = typename CrsMatrix::local_matrix_type;
206 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
207 using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
208 using entries_type = typename local_matrix_type::index_type::non_const_type;
209 using values_type = typename local_matrix_type::values_type::non_const_type;
210
211 // Set debug outputs based on environment variable
212 RCP<Teuchos::FancyOStream> out;
213 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
214 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
215 out->setShowAllFrontMatter(false).setShowProcRank(true);
216 } else {
217 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
218 }
219
220
221 // Now compute number of coarse grid points
222 for(int dim = 0; dim < numDimensions; ++dim) {
223 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
224 }
225 *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
226
227 // Grab the block size here and multiply all existing offsets by it
228 const LO blkSize = A->GetFixedBlockSize();
229 *out << "blkSize " << blkSize << std::endl;
230
231 // Based on lCoarseNodesPerDim and lFineNodesPerDim
232 // we can compute numRows, numCols and NNZ for R
233 const LO numRows = blkSize*lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2];
234 const LO numCols = blkSize*lFineNodesPerDim[0]*lFineNodesPerDim[1]*lFineNodesPerDim[2];
235
236 // Create the coarse coordinates multivector
237 // so we can fill it on the fly while computing
238 // the restriction operator
239 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
240 Teuchos::OrdinalTraits<GO>::invalid(),
241 numRows,
242 A->getRowMap()->getIndexBase(),
243 A->getRowMap()->getComm());
244
245 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(rowMap,
246 numDimensions);
247 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
248 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
249 for(int dim = 0; dim < numDimensions; ++dim) {
250 fineCoordData[dim] = fineCoordinates->getData(dim);
251 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
252 }
253
254 // Let us set some parameter that will be useful
255 // while constructing R
256
257 // Length of interpolation stencils based on geometry
258 const LO cornerStencilLength = 27;
259 const LO edgeStencilLength = 45;
260 const LO faceStencilLength = 75;
261 const LO interiorStencilLength = 125;
262
263 // Number of corner, edge, face and interior nodes
264 const LO numCorners = 8;
265 const LO numEdges = 4*(lCoarseNodesPerDim[0] - 2)
266 + 4*(lCoarseNodesPerDim[1] - 2)
267 + 4*(lCoarseNodesPerDim[2] - 2);
268 const LO numFaces = 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
269 + 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[2] - 2)
270 + 2*(lCoarseNodesPerDim[1] - 2)*(lCoarseNodesPerDim[2] - 2);
271 const LO numInteriors = (lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
272 *(lCoarseNodesPerDim[2] - 2);
273
274 const LO nnz = (numCorners*cornerStencilLength + numEdges*edgeStencilLength
275 + numFaces*faceStencilLength + numInteriors*interiorStencilLength)*blkSize;
276
277 // Having the number of rows and columns we can genrate
278 // the appropriate maps for our operator.
279
280 *out << "R statistics:" << std::endl
281 << " -numRows= " << numRows << std::endl
282 << " -numCols= " << numCols << std::endl
283 << " -nnz= " << nnz << std::endl;
284
285 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
286 typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
287
288 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
289 typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
290
291 values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
292 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
293
294 // Compute the basic interpolation
295 // coefficients for 1D rate of 3
296 // coarsening.
297 Array<SC> coeffs({1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0});
298 row_map_h(0) = 0;
299
300 // Define some offsets that
301 // will be needed often later on
302 const LO edgeLineOffset = 2*cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength;
303 const LO faceLineOffset = 2*edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength;
304 const LO interiorLineOffset = 2*faceStencilLength
305 + (lCoarseNodesPerDim[0] - 2)*interiorStencilLength;
306
307 const LO facePlaneOffset = 2*edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset;
308 const LO interiorPlaneOffset = 2*faceLineOffset + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset;
309
310 // Let us take care of the corners
311 // first since we always have
312 // corners to deal with!
313 {
314 // Corner 1
315 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
316 for(LO l = 0; l < blkSize; ++l) {
317 for(LO k = 0; k < 3; ++k) {
318 for(LO j = 0; j < 3; ++j) {
319 for(LO i = 0; i < 3; ++i) {
320 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
321 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
322 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i + 2];
323 }
324 }
325 }
326 }
327 for(LO l = 0; l < blkSize; ++l) {
328 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
329 }
330 for(int dim = 0; dim <numDimensions; ++dim) {
331 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
332 }
333
334 // Corner 5
335 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
336 rowIdx = coordRowIdx*blkSize;
337 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
338 columnOffset = coordColumnOffset*blkSize;
339 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
340 for(LO l = 0; l < blkSize; ++l) {
341 for(LO k = 0; k < 3; ++k) {
342 for(LO j = 0; j < 3; ++j) {
343 for(LO i = 0; i < 3; ++i) {
344 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
345 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
346 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
347 }
348 }
349 }
350 }
351 for(LO l = 0; l < blkSize; ++l) {
352 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
353 }
354 for(int dim = 0; dim <numDimensions; ++dim) {
355 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
356 }
357
358 // Corner 2
359 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
360 rowIdx = coordRowIdx*blkSize;
361 coordColumnOffset = (lFineNodesPerDim[0] - 1);
362 columnOffset = coordColumnOffset*blkSize;
363 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
364 for(LO l = 0; l < blkSize; ++l) {
365 for(LO k = 0; k < 3; ++k) {
366 for(LO j = 0; j < 3; ++j) {
367 for(LO i = 0; i < 3; ++i) {
368 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
369 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
370 + j*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
371 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
372 }
373 }
374 }
375 }
376 for(LO l = 0; l < blkSize; ++l) {
377 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
378 }
379 for(int dim = 0; dim <numDimensions; ++dim) {
380 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
381 }
382
383 // Corner 6
384 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
385 rowIdx = coordRowIdx*blkSize;
386 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
387 columnOffset = coordColumnOffset*blkSize;
388 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
389 for(LO l = 0; l < blkSize; ++l) {
390 for(LO k = 0; k < 3; ++k) {
391 for(LO j = 0; j < 3; ++j) {
392 for(LO i = 0; i < 3; ++i) {
393 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
394 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
395 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
396 }
397 }
398 }
399 }
400 for(LO l = 0; l < blkSize; ++l) {
401 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
402 }
403 for(int dim = 0; dim <numDimensions; ++dim) {
404 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
405 }
406
407 // Corner 3
408 coordRowIdx = (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0];
409 rowIdx = coordRowIdx*blkSize;
410 coordColumnOffset = (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0];
411 columnOffset = coordColumnOffset*blkSize;
412 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset)*blkSize;
413 for(LO l = 0; l < blkSize; ++l) {
414 for(LO k = 0; k < 3; ++k) {
415 for(LO j = 0; j < 3; ++j) {
416 for(LO i = 0; i < 3; ++i) {
417 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
418 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
419 + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
420 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
421 }
422 }
423 }
424 }
425 for(LO l = 0; l < blkSize; ++l) {
426 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
427 }
428 for(int dim = 0; dim <numDimensions; ++dim) {
429 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
430 }
431
432 // Corner 7
433 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
434 rowIdx = coordRowIdx*blkSize;
435 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
436 columnOffset = coordColumnOffset*blkSize;
437 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
438 for(LO l = 0; l < blkSize; ++l) {
439 for(LO k = 0; k < 3; ++k) {
440 for(LO j = 0; j < 3; ++j) {
441 for(LO i = 0; i < 3; ++i) {
442 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
443 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
444 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
445 }
446 }
447 }
448 }
449 for(LO l = 0; l < blkSize; ++l) {
450 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
451 }
452 for(int dim = 0; dim <numDimensions; ++dim) {
453 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
454 }
455
456 // Corner 4
457 coordRowIdx = (lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
458 rowIdx = coordRowIdx*blkSize;
459 coordColumnOffset = (lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
460 columnOffset = coordColumnOffset*blkSize;
461 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset +
462 cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
463 for(LO l = 0; l < blkSize; ++l) {
464 for(LO k = 0; k < 3; ++k) {
465 for(LO j = 0; j < 3; ++j) {
466 for(LO i = 0; i < 3; ++i) {
467 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
468 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
469 + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
470 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
471 }
472 }
473 }
474 }
475 for(LO l = 0; l < blkSize; ++l) {
476 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
477 }
478 for(int dim = 0; dim <numDimensions; ++dim) {
479 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
480 }
481
482 // Corner 8
483 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
484 rowIdx = coordRowIdx*blkSize;
485 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
486 columnOffset = coordColumnOffset*blkSize;
487 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
488 for(LO l = 0; l < blkSize; ++l) {
489 for(LO k = 0; k < 3; ++k) {
490 for(LO j = 0; j < 3; ++j) {
491 for(LO i = 0; i < 3; ++i) {
492 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
493 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
494 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
495 }
496 }
497 }
498 }
499 for(LO l = 0; l < blkSize; ++l) {
500 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
501 }
502 for(int dim = 0; dim <numDimensions; ++dim) {
503 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
504 }
505 } // Corners are done!
506
507 // Edges along 0 direction
508 if(lCoarseNodesPerDim[0] - 2 > 0) {
509
510 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
511 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
512
513 // Edge 0
514 coordRowIdx = (edgeIdx + 1);
515 rowIdx = coordRowIdx*blkSize;
516 coordColumnOffset = (edgeIdx + 1)*3;
517 columnOffset = coordColumnOffset*blkSize;
518 entryOffset = (cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
519 for(LO l = 0; l < blkSize; ++l) {
520 for(LO k = 0; k < 3; ++k) {
521 for(LO j = 0; j < 3; ++j) {
522 for(LO i = 0; i < 5; ++i) {
523 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
524 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
525 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
526 }
527 }
528 }
529 }
530 for(LO l = 0; l < blkSize; ++l) {
531 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
532 }
533 for(int dim = 0; dim <numDimensions; ++dim) {
534 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
535 }
536
537 // Edge 1
538 coordRowIdx = ((lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
539 rowIdx = coordRowIdx*blkSize;
540 coordColumnOffset = ((lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
541 columnOffset = coordColumnOffset*blkSize;
542 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
543 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
544 for(LO l = 0; l < blkSize; ++l) {
545 for(LO k = 0; k < 3; ++k) {
546 for(LO j = 0; j < 3; ++j) {
547 for(LO i = 0; i < 5; ++i) {
548 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
549 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
550 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
551 }
552 }
553 }
554 }
555 for(LO l = 0; l < blkSize; ++l) {
556 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
557 }
558 for(int dim = 0; dim <numDimensions; ++dim) {
559 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
560 }
561
562 // Edge 2
563 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
564 + edgeIdx + 1);
565 rowIdx = coordRowIdx*blkSize;
566 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
567 + (edgeIdx + 1)*3);
568 columnOffset = coordColumnOffset*blkSize;
569 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
570 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
571 for(LO l = 0; l < blkSize; ++l) {
572 for(LO k = 0; k < 3; ++k) {
573 for(LO j = 0; j < 3; ++j) {
574 for(LO i = 0; i < 5; ++i) {
575 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
576 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
577 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
578 }
579 }
580 }
581 }
582 for(LO l = 0; l < blkSize; ++l) {
583 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
584 }
585 for(int dim = 0; dim <numDimensions; ++dim) {
586 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
587 }
588
589 // Edge 3
590 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
591 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
592 rowIdx = coordRowIdx*blkSize;
593 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
594 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
595 columnOffset = coordColumnOffset*blkSize;
596 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
597 + edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
598 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
599 for(LO l = 0; l < blkSize; ++l) {
600 for(LO k = 0; k < 3; ++k) {
601 for(LO j = 0; j < 3; ++j) {
602 for(LO i = 0; i < 5; ++i) {
603 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
604 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
605 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
606 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
607 }
608 }
609 }
610 }
611 for(LO l = 0; l < blkSize; ++l) {
612 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
613 }
614 for(int dim = 0; dim <numDimensions; ++dim) {
615 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
616 }
617 }
618 }
619
620 // Edges along 1 direction
621 if(lCoarseNodesPerDim[1] - 2 > 0) {
622
623 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
624 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
625
626 // Edge 0
627 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[0];
628 rowIdx = coordRowIdx*blkSize;
629 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[0];
630 columnOffset = coordColumnOffset*blkSize;
631 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
632 for(LO l = 0; l < blkSize; ++l) {
633 for(LO k = 0; k < 3; ++k) {
634 for(LO j = 0; j < 5; ++j) {
635 for(LO i = 0; i < 3; ++i) {
636 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
637 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
638 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
639 }
640 }
641 }
642 }
643 for(LO l = 0; l < blkSize; ++l) {
644 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
645 }
646 for(int dim = 0; dim <numDimensions; ++dim) {
647 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
648 }
649
650 // Edge 1
651 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
652 rowIdx = coordRowIdx*blkSize;
653 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
654 columnOffset = coordColumnOffset*blkSize;
655 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset
656 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
657 for(LO l = 0; l < blkSize; ++l) {
658 for(LO k = 0; k < 3; ++k) {
659 for(LO j = 0; j < 5; ++j) {
660 for(LO i = 0; i < 3; ++i) {
661 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
662 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
663 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
664 }
665 }
666 }
667 }
668 for(LO l = 0; l < blkSize; ++l) {
669 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
670 }
671 for(int dim = 0; dim <numDimensions; ++dim) {
672 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
673 }
674
675 // Edge 2
676 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
677 + (edgeIdx + 1)*lCoarseNodesPerDim[0]);
678 rowIdx = coordRowIdx*blkSize;
679 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
680 + (edgeIdx + 1)*3*lFineNodesPerDim[0]);
681 columnOffset = coordColumnOffset*blkSize;
682 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
683 + edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
684 for(LO l = 0; l < blkSize; ++l) {
685 for(LO k = 0; k < 3; ++k) {
686 for(LO j = 0; j < 5; ++j) {
687 for(LO i = 0; i < 3; ++i) {
688 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
689 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
690 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
691 }
692 }
693 }
694 }
695 for(LO l = 0; l < blkSize; ++l) {
696 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
697 }
698 for(int dim = 0; dim <numDimensions; ++dim) {
699 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
700 }
701
702 // Edge 3
703 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
704 + (edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
705 rowIdx = coordRowIdx*blkSize;
706 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
707 + (edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
708 columnOffset = coordColumnOffset*blkSize;
709 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
710 + edgeLineOffset + edgeIdx*faceLineOffset
711 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
712 for(LO l = 0; l < blkSize; ++l) {
713 for(LO k = 0; k < 3; ++k) {
714 for(LO j = 0; j < 5; ++j) {
715 for(LO i = 0; i < 3; ++i) {
716 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
717 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
718 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
719 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
720 }
721 }
722 }
723 }
724 for(LO l = 0; l < blkSize; ++l) {
725 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
726 }
727 for(int dim = 0; dim <numDimensions; ++dim) {
728 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
729 }
730 }
731 }
732
733 // Edges along 2 direction
734 if(lCoarseNodesPerDim[2] - 2 > 0) {
735
736 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
737 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
738
739 // Edge 0
740 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
741 rowIdx = coordRowIdx*blkSize;
742 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0];
743 columnOffset = coordColumnOffset*blkSize;
744 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset)*blkSize;
745 for(LO l = 0; l < blkSize; ++l) {
746 for(LO k = 0; k < 5; ++k) {
747 for(LO j = 0; j < 3; ++j) {
748 for(LO i = 0; i < 3; ++i) {
749 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
750 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
751 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
752 }
753 }
754 }
755 }
756 for(LO l = 0; l < blkSize; ++l) {
757 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
758 }
759 for(int dim = 0; dim <numDimensions; ++dim) {
760 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
761 }
762
763 // Edge 1
764 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
765 + lCoarseNodesPerDim[0] - 1);
766 rowIdx = coordRowIdx*blkSize;
767 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
768 + lFineNodesPerDim[0] - 1);
769 columnOffset = coordColumnOffset*blkSize;
770 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength
771 + edgeIdx*interiorPlaneOffset)*blkSize;
772 for(LO l = 0; l < blkSize; ++l) {
773 for(LO k = 0; k < 5; ++k) {
774 for(LO j = 0; j < 3; ++j) {
775 for(LO i = 0; i < 3; ++i) {
776 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
777 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
778 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
779 }
780 }
781 }
782 }
783 for(LO l = 0; l < blkSize; ++l) {
784 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
785 }
786 for(int dim = 0; dim <numDimensions; ++dim) {
787 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
788 }
789
790 // Edge 2
791 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
792 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0]);
793 rowIdx = coordRowIdx*blkSize;
794 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
795 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0]);
796 columnOffset = coordColumnOffset*blkSize;
797 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset + faceLineOffset
798 + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset)*blkSize;
799 for(LO l = 0; l < blkSize; ++l) {
800 for(LO k = 0; k < 5; ++k) {
801 for(LO j = 0; j < 3; ++j) {
802 for(LO i = 0; i < 3; ++i) {
803 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
804 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
805 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
806 }
807 }
808 }
809 }
810 for(LO l = 0; l < blkSize; ++l) {
811 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
812 }
813 for(int dim = 0; dim <numDimensions; ++dim) {
814 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
815 }
816
817 // Edge 3
818 coordRowIdx = ((edgeIdx + 2)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
819 rowIdx = coordRowIdx*blkSize;
820 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
821 + lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
822 columnOffset = coordColumnOffset*blkSize;
823 entryOffset = (facePlaneOffset + (edgeIdx + 1)*interiorPlaneOffset - edgeStencilLength)*blkSize;
824 for(LO l = 0; l < blkSize; ++l) {
825 for(LO k = 0; k < 5; ++k) {
826 for(LO j = 0; j < 3; ++j) {
827 for(LO i = 0; i < 3; ++i) {
828 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
829 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
830 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
831 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
832 }
833 }
834 }
835 }
836 for(LO l = 0; l < blkSize; ++l) {
837 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
838 }
839 for(int dim = 0; dim <numDimensions; ++dim) {
840 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
841 }
842 }
843 }
844
845 // Faces in 0-1 plane
846 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
847
848 Array<LO> gridIdx(3);
849 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
850 for(LO faceIdx=0; faceIdx < (lCoarseNodesPerDim[1]-2)*(lCoarseNodesPerDim[0]-2); ++faceIdx) {
851
852 // Face 0
853 coordRowIdx = ((gridIdx[1] + 1)*lCoarseNodesPerDim[0] + gridIdx[0] + 1);
854 rowIdx = coordRowIdx*blkSize;
855 coordColumnOffset = 3*((gridIdx[1] + 1)*lFineNodesPerDim[0] + gridIdx[0] + 1);
856 columnOffset = coordColumnOffset*blkSize;
857 entryOffset = (edgeLineOffset + edgeStencilLength
858 + gridIdx[1]*faceLineOffset + gridIdx[0]*faceStencilLength)*blkSize;
859 for(LO l = 0; l < blkSize; ++l) {
860 for(LO k = 0; k < 3; ++k) {
861 for(LO j = 0; j < 5; ++j) {
862 for(LO i = 0; i < 5; ++i) {
863 entries_h(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
864 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
865 values_h(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
866 }
867 }
868 }
869 }
870 for(LO l = 0; l < blkSize; ++l) {
871 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
872 }
873 for(int dim = 0; dim <numDimensions; ++dim) {
874 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
875 }
876
877 // Face 1
878 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
879 rowIdx = coordRowIdx*blkSize;
880 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
881 columnOffset = coordColumnOffset*blkSize;
882 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
883 for(LO l = 0; l < blkSize; ++l) {
884 for(LO k = 0; k < 3; ++k) {
885 for(LO j = 0; j < 5; ++j) {
886 for(LO i = 0; i < 5; ++i) {
887 entries_h(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
888 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
889 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
890 values_h(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
891 }
892 }
893 }
894 }
895 for(LO l = 0; l < blkSize; ++l) {
896 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
897 }
898 for(int dim = 0; dim <numDimensions; ++dim) {
899 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
900 }
901
902 // Last step in the loop
903 // update the grid indices
904 // for next grid point
905 ++gridIdx[0];
906 if(gridIdx[0] == lCoarseNodesPerDim[0] - 2) {
907 gridIdx[0] = 0;
908 ++gridIdx[1];
909 }
910 }
911 }
912
913 // Faces in 0-2 plane
914 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
915
916 Array<LO> gridIdx(3);
917 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
918 for(LO faceIdx=0; faceIdx < (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[0]-2); ++faceIdx) {
919
920 // Face 0
921 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] + (gridIdx[0] + 1));
922 rowIdx = coordRowIdx*blkSize;
923 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
924 + gridIdx[0] + 1)*3;
925 columnOffset = coordColumnOffset*blkSize;
926 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + edgeStencilLength
927 + gridIdx[0]*faceStencilLength)*blkSize;
928 for(LO l = 0; l < blkSize; ++l) {
929 for(LO k = 0; k < 5; ++k) {
930 for(LO j = 0; j < 3; ++j) {
931 for(LO i = 0; i < 5; ++i) {
932 entries_h(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
933 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
934 values_h(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
935 }
936 }
937 }
938 }
939 for(LO l = 0; l < blkSize; ++l) {
940 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
941 }
942 for(int dim = 0; dim <numDimensions; ++dim) {
943 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
944 }
945
946 // Face 1
947 coordRowIdx += (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0];
948 rowIdx = coordRowIdx*blkSize;
949 coordColumnOffset += (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0];
950 columnOffset = coordColumnOffset*blkSize;
951 entryOffset += (faceLineOffset + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset)*blkSize;
952 for(LO l = 0; l < blkSize; ++l) {
953 for(LO k = 0; k < 5; ++k) {
954 for(LO j = 0; j < 3; ++j) {
955 for(LO i = 0; i < 5; ++i) {
956 entries_h(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
957 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
958 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
959 values_h(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
960 }
961 }
962 }
963 }
964 for(LO l = 0; l < blkSize; ++l) {
965 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
966 }
967 for(int dim = 0; dim <numDimensions; ++dim) {
968 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
969 }
970
971 // Last step in the loop
972 // update the grid indices
973 // for next grid point
974 ++gridIdx[0];
975 if(gridIdx[0] == lCoarseNodesPerDim[0] - 2) {
976 gridIdx[0] = 0;
977 ++gridIdx[2];
978 }
979 }
980 }
981
982 // Faces in 1-2 plane
983 if((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
984
985 Array<LO> gridIdx(3);
986 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
987 for(LO faceIdx=0; faceIdx < (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[1]-2); ++faceIdx) {
988
989 // Face 0
990 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
991 + (gridIdx[1] + 1)*lCoarseNodesPerDim[0]);
992 rowIdx = coordRowIdx*blkSize;
993 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
994 + (gridIdx[1] + 1)*lFineNodesPerDim[0])*3;
995 columnOffset = coordColumnOffset*blkSize;
996 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + faceLineOffset
997 + gridIdx[1]*interiorLineOffset)*blkSize;
998 for(LO l = 0; l < blkSize; ++l) {
999 for(LO k = 0; k < 5; ++k) {
1000 for(LO j = 0; j < 5; ++j) {
1001 for(LO i = 0; i < 3; ++i) {
1002 entries_h(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1003 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
1004 values_h(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
1005 }
1006 }
1007 }
1008 }
1009 for(LO l = 0; l < blkSize; ++l) {
1010 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1011 }
1012 for(int dim = 0; dim <numDimensions; ++dim) {
1013 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
1014 }
1015
1016 // Face 1
1017 coordRowIdx += (lCoarseNodesPerDim[0] - 1);
1018 rowIdx = coordRowIdx*blkSize;
1019 coordColumnOffset += (lFineNodesPerDim[0] - 1);
1020 columnOffset = coordColumnOffset*blkSize;
1021 entryOffset += (faceStencilLength + (lCoarseNodesPerDim[0] - 2)*interiorStencilLength)*blkSize;
1022 for(LO l = 0; l < blkSize; ++l) {
1023 for(LO k = 0; k < 5; ++k) {
1024 for(LO j = 0; j < 5; ++j) {
1025 for(LO i = 0; i < 3; ++i) {
1026 entries_h(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1027 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1028 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
1029 values_h(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
1030 }
1031 }
1032 }
1033 }
1034 for(LO l = 0; l < blkSize; ++l) {
1035 row_map_h(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1036 }
1037 for(int dim = 0; dim <numDimensions; ++dim) {
1038 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
1039 }
1040
1041 // Last step in the loop
1042 // update the grid indices
1043 // for next grid point
1044 ++gridIdx[1];
1045 if(gridIdx[1] == lCoarseNodesPerDim[1] - 2) {
1046 gridIdx[1] = 0;
1047 ++gridIdx[2];
1048 }
1049 }
1050 }
1051
1052 if(numInteriors > 0) {
1053 // Allocate and compute arrays
1054 // containing column offsets
1055 // and values associated with
1056 // interior points
1057 LO countRowEntries = 0;
1058 Array<LO> coordColumnOffsets(125);
1059 for(LO k = -2; k < 3; ++k) {
1060 for(LO j = -2; j < 3; ++j) {
1061 for(LO i = -2; i < 3; ++i) {
1062 coordColumnOffsets[countRowEntries] = k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1063 + j*lFineNodesPerDim[0] + i;
1064 ++countRowEntries;
1065 }
1066 }
1067 }
1068
1069 LO countValues = 0;
1070 Array<SC> interiorValues(125);
1071 for(LO k = 0; k < 5; ++k) {
1072 for(LO j = 0; j < 5; ++j) {
1073 for(LO i = 0; i < 5; ++i) {
1074 interiorValues[countValues] = coeffs[k]*coeffs[j]*coeffs[i];
1075 ++countValues;
1076 }
1077 }
1078 }
1079
1080 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1081 Array<LO> gridIdx(3);
1082 for(LO interiorIdx = 0; interiorIdx < numInteriors; ++interiorIdx) {
1083 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]
1084 + (gridIdx[1] + 1)*lCoarseNodesPerDim[0]
1085 + gridIdx[0] + 1);
1086 rowIdx = coordRowIdx*blkSize;
1087 coordColumnOffset = ((gridIdx[2] + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1088 + (gridIdx[1] + 1)*3*lFineNodesPerDim[0] + (gridIdx[0] + 1)*3);
1089 columnOffset = coordColumnOffset*blkSize;
1090 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength
1091 + gridIdx[2]*interiorPlaneOffset + gridIdx[1]*interiorLineOffset
1092 + gridIdx[0]*interiorStencilLength)*blkSize;
1093 for(LO l = 0; l < blkSize; ++l) {
1094 row_map_h(rowIdx + 1 + l) = entryOffset + interiorStencilLength*(l+1);
1095 }
1096 // Fill the column indices
1097 // and values in the approproate
1098 // views.
1099 for(LO l = 0; l < blkSize; ++l) {
1100 for(LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1101 entries_h(entryOffset + entryIdx + interiorStencilLength*l) = columnOffset + coordColumnOffsets[entryIdx]*blkSize + l;
1102 values_h(entryOffset + entryIdx + interiorStencilLength*l) = interiorValues[entryIdx];
1103 }
1104 }
1105 for(int dim = 0; dim <numDimensions; ++dim) {
1106 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
1107 }
1108
1109 // Last step in the loop
1110 // update the grid indices
1111 // for next grid point
1112 ++gridIdx[0];
1113 if(gridIdx[0] == lCoarseNodesPerDim[0] - 2) {
1114 gridIdx[0] = 0;
1115 ++gridIdx[1];
1116 if(gridIdx[1] == lCoarseNodesPerDim[1] - 2) {
1117 gridIdx[1] = 0;
1118 ++gridIdx[2];
1119 }
1120 }
1121 }
1122 }
1123
1124 Kokkos::deep_copy(row_map, row_map_h);
1125 Kokkos::deep_copy(entries, entries_h);
1126 Kokkos::deep_copy(values, values_h);
1127
1128 local_graph_type localGraph(entries, row_map);
1129 local_matrix_type localR("R", numCols, values, localGraph);
1130
1131 R = MatrixFactory::Build(localR, // the local data
1132 rowMap, // rowMap
1133 A->getRowMap(), // colMap
1134 A->getRowMap(), // domainMap == colMap
1135 rowMap, // rangeMap == rowMap
1136 Teuchos::null); // params for optimized construction
1137
1138 } // Build3D()
1139
1140} //namespace MueLu
1141
1142#define MUELU_REGIONRFACTORY_SHORT
1143#endif //ifdef HAVE_MUELU_KOKKOS_REFACTOR
1144#endif // MUELU_REGIONRFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
static const NoFactory * get()
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.