Intrepid2
Intrepid2_TensorBasis.hpp
Go to the documentation of this file.
1// @HEADER
2// ************************************************************************
3//
4// Intrepid2 Package
5// Copyright (2007) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Kyungjoo Kim (kyukim@sandia.gov),
38// Mauro Perego (mperego@sandia.gov), or
39// Nate Roberts (nvrober@sandia.gov)
40//
41// ************************************************************************
42// @HEADER
43
49#ifndef Intrepid2_TensorBasis_h
50#define Intrepid2_TensorBasis_h
51
52#include <Kokkos_View.hpp>
53#include <Kokkos_DynRankView.hpp>
54
55#include <Teuchos_RCP.hpp>
56
57#include <Intrepid2_config.h>
58
59#include <map>
60#include <set>
61#include <vector>
62
63#include "Intrepid2_Basis.hpp"
67#include "Intrepid2_Utils.hpp" // defines FAD_VECTOR_SIZE, VECTOR_SIZE
68
69namespace Intrepid2
70{
71 template<ordinal_type spaceDim>
72 KOKKOS_INLINE_FUNCTION
73 void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder);
74
75 template<>
76 KOKKOS_INLINE_FUNCTION
77 void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
78 {
79 entries[0] = operatorOrder;
80 }
81
82 template<>
83 KOKKOS_INLINE_FUNCTION
84 void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
85 {
86 entries[0] = operatorOrder - dkEnum;
87 entries[1] = dkEnum;
88 }
89
90 template<>
91 KOKKOS_INLINE_FUNCTION
92 void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
93 {
94 // formula is zMult + (yMult+zMult)*(yMult+zMult+1)/2; where xMult+yMult+zMult = operatorOrder
95 // it seems complicated to work out a formula that will invert this. For the present we just take a brute force approach,
96 // using getDkEnumeration() to check each possibility
97 for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
98 {
99 for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
100 {
101 const ordinal_type xMult = operatorOrder-(zMult+yMult);
102 if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
103 {
104 entries[0] = xMult;
105 entries[1] = yMult;
106 entries[2] = zMult;
107 }
108 }
109 }
110 }
111
112 template<ordinal_type spaceDim>
113 ordinal_type getDkEnumeration(Kokkos::Array<int,spaceDim> &entries);
114
115 template<>
116 inline ordinal_type getDkEnumeration<1>(Kokkos::Array<int,1> &entries)
117 {
118 return getDkEnumeration<1>(entries[0]);
119 }
120
121 template<>
122 inline ordinal_type getDkEnumeration<2>(Kokkos::Array<int,2> &entries)
123 {
124 return getDkEnumeration<2>(entries[0],entries[1]);
125 }
126
127 template<>
128 inline ordinal_type getDkEnumeration<3>(Kokkos::Array<int,3> &entries)
129 {
130 return getDkEnumeration<3>(entries[0],entries[1],entries[2]);
131 }
132
133 template<ordinal_type spaceDim1, ordinal_type spaceDim2>
134 inline ordinal_type getDkTensorIndex(const ordinal_type dkEnum1, const ordinal_type operatorOrder1,
135 const ordinal_type dkEnum2, const ordinal_type operatorOrder2)
136 {
137 Kokkos::Array<int,spaceDim1> entries1;
138 getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
139
140 Kokkos::Array<int,spaceDim2> entries2;
141 getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
142
143 const int spaceDim = spaceDim1 + spaceDim2;
144 Kokkos::Array<int,spaceDim> entries;
145
146 for (ordinal_type d=0; d<spaceDim1; d++)
147 {
148 entries[d] = entries1[d];
149 }
150
151 for (ordinal_type d=0; d<spaceDim2; d++)
152 {
153 entries[d+spaceDim1] = entries2[d];
154 }
155
156 return getDkEnumeration<spaceDim>(entries);
157 }
158
159template<typename BasisBase>
160class Basis_TensorBasis;
161
166{
167 // if we want to make this usable on device, we could switch to Kokkos::Array instead of std::vector. But this is not our immediate use case.
168 std::vector< std::vector<EOperator> > ops; // outer index: vector entry ordinal; inner index: basis component ordinal. (scalar-valued operators have a single entry in outer vector)
169 std::vector<double> weights; // weights for each vector entry
170 ordinal_type numBasisComponents_;
171
172 OperatorTensorDecomposition(const std::vector<EOperator> &opsBasis1, const std::vector<EOperator> &opsBasis2, const std::vector<double> vectorComponentWeights)
173 :
174 weights(vectorComponentWeights),
175 numBasisComponents_(2)
176 {
177 const ordinal_type size = opsBasis1.size();
178 const ordinal_type opsBasis2Size = opsBasis2.size();
179 const ordinal_type weightsSize = weights.size();
180 INTREPID2_TEST_FOR_EXCEPTION(size != opsBasis2Size, std::invalid_argument, "opsBasis1.size() != opsBasis2.size()");
181 INTREPID2_TEST_FOR_EXCEPTION(size != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
182
183 for (ordinal_type i=0; i<size; i++)
184 {
185 ops.push_back(std::vector<EOperator>{opsBasis1[i],opsBasis2[i]});
186 }
187 }
188
189 OperatorTensorDecomposition(const std::vector< std::vector<EOperator> > &vectorEntryOps, const std::vector<double> &vectorComponentWeights)
190 :
191 ops(vectorEntryOps),
192 weights(vectorComponentWeights)
193 {
194 const ordinal_type numVectorComponents = ops.size();
195 const ordinal_type weightsSize = weights.size();
196 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
197
198 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents == 0, std::invalid_argument, "must have at least one entry!");
199
200 ordinal_type numBases = 0;
201 for (ordinal_type i=0; i<numVectorComponents; i++)
202 {
203 if (numBases == 0)
204 {
205 numBases = ops[i].size();
206 }
207 else if (ops[i].size() != 0)
208 {
209 const ordinal_type opsiSize = ops[i].size();
210 INTREPID2_TEST_FOR_EXCEPTION(numBases != opsiSize, std::invalid_argument, "must have one operator for each basis in each nontrivial entry in vectorEntryOps");
211 }
212 }
213 INTREPID2_TEST_FOR_EXCEPTION(numBases == 0, std::invalid_argument, "at least one vectorEntryOps entry must be non-trivial");
214 numBasisComponents_ = numBases;
215 }
216
217 OperatorTensorDecomposition(const std::vector<EOperator> &basisOps, const double weight = 1.0)
218 :
219 ops({basisOps}),
220 weights({weight}),
221 numBasisComponents_(basisOps.size())
222 {}
223
224 OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, double weight = 1.0)
225 :
226 ops({ std::vector<EOperator>{opBasis1, opBasis2} }),
227 weights({weight}),
228 numBasisComponents_(2)
229 {}
230
231 OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, const EOperator &opBasis3, double weight = 1.0)
232 :
233 ops({ std::vector<EOperator>{opBasis1, opBasis2, opBasis3} }),
234 weights({weight}),
235 numBasisComponents_(3)
236 {}
237
238 ordinal_type numVectorComponents() const
239 {
240 return ops.size(); // will match weights.size()
241 }
242
243 ordinal_type numBasisComponents() const
244 {
245 return numBasisComponents_;
246 }
247
248 double weight(const ordinal_type &vectorComponentOrdinal) const
249 {
250 return weights[vectorComponentOrdinal];
251 }
252
253 bool identicallyZeroComponent(const ordinal_type &vectorComponentOrdinal) const
254 {
255 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
256 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
257 return ops[vectorComponentOrdinal].size() == 0;
258 }
259
260 EOperator op(const ordinal_type &vectorComponentOrdinal, const ordinal_type &basisOrdinal) const
261 {
262 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
263 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
264 if (identicallyZeroComponent(vectorComponentOrdinal))
265 {
266 return OPERATOR_MAX; // by convention: zero in this component
267 }
268 else
269 {
270 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal < 0, std::invalid_argument, "basisOrdinal is out of bounds");
271 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal >= numBasisComponents_, std::invalid_argument, "basisOrdinal is out of bounds");
272 return ops[vectorComponentOrdinal][basisOrdinal];
273 }
274 }
275
277 template<typename DeviceType, typename OutputValueType, class PointValueType>
279 {
280 const ordinal_type basesSize = bases.size();
281 INTREPID2_TEST_FOR_EXCEPTION(basesSize != numBasisComponents_, std::invalid_argument, "The number of bases provided must match the number of basis components in this decomposition");
282
283 ordinal_type numExpandedBasisComponents = 0;
285 using TensorBasis = Basis_TensorBasis<BasisBase>;
286 std::vector<TensorBasis*> basesAsTensorBasis(numBasisComponents_);
287 for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
288 {
289 TensorBasis* basisAsTensorBasis = dynamic_cast<TensorBasis*>(bases[basisComponentOrdinal].get());
290 basesAsTensorBasis[basisComponentOrdinal] = basisAsTensorBasis;
291 if (basisAsTensorBasis)
292 {
293 numExpandedBasisComponents += basisAsTensorBasis->getTensorBasisComponents().size();
294 }
295 else
296 {
297 numExpandedBasisComponents += 1;
298 }
299 }
300
301 std::vector< std::vector<EOperator> > expandedOps; // outer index: vector entry ordinal; inner index: basis component ordinal.
302 std::vector<double> expandedWeights;
303 const ordinal_type opsSize = ops.size();
304 for (ordinal_type simpleVectorEntryOrdinal=0; simpleVectorEntryOrdinal<opsSize; simpleVectorEntryOrdinal++)
305 {
306 if (identicallyZeroComponent(simpleVectorEntryOrdinal))
307 {
308 expandedOps.push_back(std::vector<EOperator>{});
309 expandedWeights.push_back(0.0);
310 continue;
311 }
312
313 std::vector< std::vector<EOperator> > expandedBasisOpsForSimpleVectorEntry(1); // start out with one outer entry; expands if a component is vector-valued
314
315 // this lambda appends an op to each of the vector components
316 auto addExpandedOp = [&expandedBasisOpsForSimpleVectorEntry](const EOperator &op)
317 {
318 const ordinal_type size = expandedBasisOpsForSimpleVectorEntry.size();
319 for (ordinal_type i=0; i<size; i++)
320 {
321 expandedBasisOpsForSimpleVectorEntry[i].push_back(op);
322 }
323 };
324
325 // this lambda takes a scalar-valued (single outer entry) expandedBasisOps and expands it
326 // according to the number of vector entries coming from the vector-valued component basis
327 auto vectorizeExpandedOps = [&expandedBasisOpsForSimpleVectorEntry](const int &numSubVectors)
328 {
329 // we require that this only gets called once per simpleVectorEntryOrdinal -- i.e., only one basis component gets to be vector-valued.
330 INTREPID2_TEST_FOR_EXCEPTION(expandedBasisOpsForSimpleVectorEntry.size() != 1, std::invalid_argument, "multiple basis components may not be vector-valued!");
331 for (ordinal_type i=1; i<numSubVectors; i++)
332 {
333 expandedBasisOpsForSimpleVectorEntry.push_back(expandedBasisOpsForSimpleVectorEntry[0]);
334 }
335 };
336
337 std::vector<EOperator> subVectorOps; // only used if one of the components is vector-valued
338 std::vector<double> subVectorWeights {weights[simpleVectorEntryOrdinal]};
339 for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
340 {
341 const auto &op = ops[simpleVectorEntryOrdinal][basisComponentOrdinal];
342
343 if (! basesAsTensorBasis[basisComponentOrdinal])
344 {
345 addExpandedOp(op);
346 }
347 else
348 {
349 OperatorTensorDecomposition basisOpDecomposition = basesAsTensorBasis[basisComponentOrdinal]->getOperatorDecomposition(op);
350 if (basisOpDecomposition.numVectorComponents() > 1)
351 {
352 // We don't currently support a use case where we have multiple component bases that are vector-valued:
353 INTREPID2_TEST_FOR_EXCEPTION(subVectorWeights.size() > 1, std::invalid_argument, "Unhandled case: multiple component bases are vector-valued");
354 // We do support a single vector-valued case, though; this splits the current simpleVectorEntryOrdinal into an appropriate number of components that come in order in the expanded vector
355 ordinal_type numSubVectors = basisOpDecomposition.numVectorComponents();
356 vectorizeExpandedOps(numSubVectors);
357
358 double weightSoFar = subVectorWeights[0];
359 for (ordinal_type subVectorEntryOrdinal=1; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
360 {
361 subVectorWeights.push_back(weightSoFar * basisOpDecomposition.weight(subVectorEntryOrdinal));
362 }
363 subVectorWeights[0] *= basisOpDecomposition.weight(0);
364 for (ordinal_type subVectorEntryOrdinal=0; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
365 {
366 for (ordinal_type subComponentBasis=0; subComponentBasis<basisOpDecomposition.numBasisComponents(); subComponentBasis++)
367 {
368 const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, subComponentBasis);
369 expandedBasisOpsForSimpleVectorEntry[subVectorEntryOrdinal].push_back(basisOp);
370 }
371 }
372 }
373 else
374 {
375 double componentWeight = basisOpDecomposition.weight(0);
376 const ordinal_type size = subVectorWeights.size();
377 for (ordinal_type i=0; i<size; i++)
378 {
379 subVectorWeights[i] *= componentWeight;
380 }
381 ordinal_type subVectorEntryOrdinal = 0;
382 const ordinal_type numBasisComponents = basisOpDecomposition.numBasisComponents();
383 for (ordinal_type subComponentBasis=0; subComponentBasis<numBasisComponents; subComponentBasis++)
384 {
385 const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, basisComponentOrdinal);
386 addExpandedOp( basisOp );
387 }
388 }
389 }
390 }
391
392 // sanity check on the new expandedOps entries:
393 for (ordinal_type i=0; i<static_cast<ordinal_type>(expandedBasisOpsForSimpleVectorEntry.size()); i++)
394 {
395 const ordinal_type size = expandedBasisOpsForSimpleVectorEntry[i].size();
396 INTREPID2_TEST_FOR_EXCEPTION(size != numExpandedBasisComponents, std::logic_error, "each vector in expandedBasisOpsForSimpleVectorEntry should have as many entries as there are expanded basis components");
397 }
398
399 expandedOps.insert(expandedOps.end(), expandedBasisOpsForSimpleVectorEntry.begin(), expandedBasisOpsForSimpleVectorEntry.end());
400 expandedWeights.insert(expandedWeights.end(), subVectorWeights.begin(), subVectorWeights.end());
401 }
402 // check that vector lengths agree:
403 INTREPID2_TEST_FOR_EXCEPTION(expandedOps.size() != expandedWeights.size(), std::logic_error, "expandedWeights and expandedOps do not agree on the number of vector components");
404
405 return OperatorTensorDecomposition(expandedOps, expandedWeights);
406 }
407};
408
414 template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
416 {
417 using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
418 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
419
420 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
421 using TeamMember = typename TeamPolicy::member_type;
422
424 using RankCombinationType = typename TensorViewIteratorType::RankCombinationType;
425
426 OutputFieldType output_; // F,P[,D…]
427 OutputFieldType input1_; // F1,P[,D…] or F1,P1[,D…]
428 OutputFieldType input2_; // F2,P[,D…] or F2,P2[,D…]
429
430 int numFields_, numPoints_;
431 int numFields1_, numPoints1_;
432 int numFields2_, numPoints2_;
433
434 bool tensorPoints_; // if true, input1 and input2 refer to values at decomposed points, and P = P1 * P2. If false, then the two inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
435
436 using RankCombinationViewType = typename TensorViewIteratorType::RankCombinationViewType;
437 RankCombinationViewType rank_combinations_;// indicates the policy by which the input views will be combined in output view
438
439 double weight_;
440
441 public:
442
443 TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
444 bool tensorPoints, double weight)
445 : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
446 {
447 numFields_ = output.extent_int(0);
448 numPoints_ = output.extent_int(1);
449
450 numFields1_ = inputValues1.extent_int(0);
451 numPoints1_ = inputValues1.extent_int(1);
452
453 numFields2_ = inputValues2.extent_int(0);
454 numPoints2_ = inputValues2.extent_int(1);
455
456 if (!tensorPoints_)
457 {
458 // then the point counts should all match
459 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
460 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
461 }
462 else
463 {
464 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument, "incompatible point counts");
465 }
466
467 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument, "incompatible field sizes");
468
469 const ordinal_type max_rank = std::max(inputValues1.rank(),inputValues2.rank());
470 // at present, no supported case will result in an output rank greater than both input ranks
471
472 const ordinal_type outputRank = output.rank();
473 INTREPID2_TEST_FOR_EXCEPTION(outputRank > max_rank, std::invalid_argument, "Unsupported view combination.");
474 rank_combinations_ = RankCombinationViewType("Rank_combinations_", max_rank);
475 auto rank_combinations_host = Kokkos::create_mirror_view(rank_combinations_);
476
477 rank_combinations_host[0] = TensorViewIteratorType::TENSOR_PRODUCT; // field combination is always tensor product
478 rank_combinations_host[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH; // tensorPoints controls interpretation of the point dimension
479 for (ordinal_type d=2; d<max_rank; d++)
480 {
481 // d >= 2 have the interpretation of spatial dimensions (gradients, etc.)
482 // we let the extents of the containers determine what we're doing here
483 if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
484 {
485 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
486 }
487 else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
488 || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
489 )
490 {
491 // this looks like multiplication of a vector by a scalar, resulting in a vector
492 // this can be understood as a tensor product
493 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
494 }
495 else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
496 {
497 // this is actually a generalization of the above case: a tensor product, something like a vector outer product
498 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
499 }
500 else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
501 {
502 // it's a bit weird (I'm not aware of the use case, in the present context), but we can handle this case by adopting DIMENSION_MATCH here
503 // this is something like MATLAB's .* and .+ operators, which operate entry-wise
504 rank_combinations_host[d] = TensorViewIteratorType::DIMENSION_MATCH;
505 }
506 else
507 {
508 std::cout << "inputValues1.extent_int(" << d << ") = " << inputValues1.extent_int(d) << std::endl;
509 std::cout << "inputValues2.extent_int(" << d << ") = " << inputValues2.extent_int(d) << std::endl;
510 std::cout << "output.extent_int(" << d << ") = " << output.extent_int(d) << std::endl;
511 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unable to find an interpretation for this combination of views");
512 }
513 }
514 Kokkos::deep_copy(rank_combinations_,rank_combinations_host);
515 }
516
517 KOKKOS_INLINE_FUNCTION
518 void operator()( const TeamMember & teamMember ) const
519 {
520 auto fieldOrdinal1 = teamMember.league_rank();
521
522 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
523 TensorViewIteratorType it(output_,input1_,input2_,rank_combinations_);
524 const int FIELD_ORDINAL_DIMENSION = 0;
525 it.setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
526 int next_increment_rank = FIELD_ORDINAL_DIMENSION; // used to initialize prev_increment_rank at the start of the do/while loop. Notionally, we last incremented in the field ordinal rank to get to the {fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0} location.
527 OutputScalar accumulator = 0;
528
529 do
530 {
531 accumulator += weight_ * it.getView1Entry() * it.getView2Entry();
532 next_increment_rank = it.nextIncrementRank();
533
534 if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
535 {
536 // then we've finished the accumulation and should set the value
537 it.set(accumulator);
538 // reset the accumulator:
539 accumulator = 0;
540 }
541 } while (it.increment() > FIELD_ORDINAL_DIMENSION);
542 });
543 }
544 };
545
559 template<typename BasisBaseClass = void>
561 :
562 public BasisBaseClass
563 {
564 public:
565 using BasisBase = BasisBaseClass;
566 using BasisPtr = Teuchos::RCP<BasisBase>;
567
568 protected:
569 BasisPtr basis1_;
570 BasisPtr basis2_;
571
572 std::vector<BasisPtr> tensorComponents_;
573
574 std::string name_; // name of the basis
575 public:
576 using DeviceType = typename BasisBase::DeviceType;
577 using ExecutionSpace = typename BasisBase::ExecutionSpace;
578 using OutputValueType = typename BasisBase::OutputValueType;
579 using PointValueType = typename BasisBase::PointValueType;
580
581 using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost;
582 using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost;
583 using OutputViewType = typename BasisBase::OutputViewType;
584 using PointViewType = typename BasisBase::PointViewType;
586 public:
591 Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace = FUNCTION_SPACE_MAX)
592 :
593 basis1_(basis1),basis2_(basis2)
594 {
595 this->functionSpace_ = functionSpace;
596
597 Basis_TensorBasis* basis1AsTensor = dynamic_cast<Basis_TensorBasis*>(basis1_.get());
598 if (basis1AsTensor)
599 {
600 auto basis1Components = basis1AsTensor->getTensorBasisComponents();
601 tensorComponents_.insert(tensorComponents_.end(), basis1Components.begin(), basis1Components.end());
602 }
603 else
604 {
605 tensorComponents_.push_back(basis1_);
606 }
607
608 Basis_TensorBasis* basis2AsTensor = dynamic_cast<Basis_TensorBasis*>(basis2_.get());
609 if (basis2AsTensor)
610 {
611 auto basis2Components = basis2AsTensor->getTensorBasisComponents();
612 tensorComponents_.insert(tensorComponents_.end(), basis2Components.begin(), basis2Components.end());
613 }
614 else
615 {
616 tensorComponents_.push_back(basis2_);
617 }
618
619 this->basisCardinality_ = basis1->getCardinality() * basis2->getCardinality();
620 this->basisDegree_ = std::max(basis1->getDegree(), basis2->getDegree());
621
622 {
623 std::ostringstream basisName;
624 basisName << basis1->getName() << " x " << basis2->getName();
625 name_ = basisName.str();
626 }
627
628 // set cell topology
629 shards::CellTopology cellTopo1 = basis1->getBaseCellTopology();
630 shards::CellTopology cellTopo2 = basis2->getBaseCellTopology();
631
632 auto cellKey1 = basis1->getBaseCellTopology().getKey();
633 auto cellKey2 = basis2->getBaseCellTopology().getKey();
634 if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key))
635 {
636 this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() );
637 }
638 else if (((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
639 || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key)))
640 {
641 this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() );
642 }
643 else
644 {
645 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Cell topology combination not yet supported");
646 }
647
648 this->basisType_ = basis1->getBasisType();
649 this->basisCoordinates_ = COORDINATES_CARTESIAN;
650
651 // initialize tags
652 {
653 const auto & cardinality = this->basisCardinality_;
654
655 // Basis-dependent initializations
656 const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
657 const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
658 const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
659 const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
660
661 OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
662
663 shards::CellTopology cellTopo = this->basisCellTopology_;
664
665 ordinal_type tensorSpaceDim = cellTopo.getDimension();
666 ordinal_type spaceDim1 = cellTopo1.getDimension();
667 ordinal_type spaceDim2 = cellTopo2.getDimension();
668
669 if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
670 {
671 int degreeSize = basis1_->getPolynomialDegreeLength() + basis2_->getPolynomialDegreeLength();
672 this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
673 }
674
675 TensorTopologyMap topoMap(cellTopo1, cellTopo2);
676
677 for (ordinal_type d=0; d<=tensorSpaceDim; d++) // d: tensorial dimension
678 {
679 ordinal_type d2_max = std::min(spaceDim2,d);
680 int subcellOffset = 0; // for this dimension of tensor subcells, how many subcells have we already counted with other d2/d1 combos?
681 for (ordinal_type d2=0; d2<=d2_max; d2++)
682 {
683 ordinal_type d1 = d-d2;
684 if (d1 > spaceDim1) continue;
685
686 ordinal_type subcellCount2 = cellTopo2.getSubcellCount(d2);
687 ordinal_type subcellCount1 = cellTopo1.getSubcellCount(d1);
688 for (ordinal_type subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
689 {
690 ordinal_type subcellDofCount2 = basis2_->getDofCount(d2, subcellOrdinal2);
691 for (ordinal_type subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
692 {
693 ordinal_type subcellDofCount1 = basis1_->getDofCount(d1, subcellOrdinal1);
694 ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
695 for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
696 {
697 ordinal_type fieldOrdinal2 = basis2_->getDofOrdinal(d2, subcellOrdinal2, localDofID2);
698 OrdinalTypeArray1DHost degreesField2;
699 if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
700 for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
701 {
702 ordinal_type fieldOrdinal1 = basis1_->getDofOrdinal(d1, subcellOrdinal1, localDofID1);
703 ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
704 ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
705 tagView(tensorFieldOrdinal*tagSize+0) = d; // subcell dimension
706 tagView(tensorFieldOrdinal*tagSize+1) = topoMap.getCompositeSubcellOrdinal(d1, subcellOrdinal1, d2, subcellOrdinal2);
707 tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
708 tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
709
710 if (this->basisType_ == BASIS_FEM_HIERARCHICAL)
711 {
712 // fill in degree lookup:
713 OrdinalTypeArray1DHost degreesField1 = basis1_->getPolynomialDegreeOfField(fieldOrdinal1);
714
715 int degreeLengthField1 = degreesField1.extent_int(0);
716 int degreeLengthField2 = degreesField2.extent_int(0);
717 for (int d3=0; d3<degreeLengthField1; d3++)
718 {
719 this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3) = degreesField1(d3);
720 }
721 for (int d3=0; d3<degreeLengthField2; d3++)
722 {
723 this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
724 }
725 }
726 } // localDofID1
727 } // localDofID2
728 } // subcellOrdinal1
729 } // subcellOrdinal2
730 subcellOffset += subcellCount1 * subcellCount2;
731 }
732 }
733
734 // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
735 // // tags are constructed on host
736 this->setOrdinalTagData(this->tagToOrdinal_,
737 this->ordinalToTag_,
738 tagView,
739 this->basisCardinality_,
740 tagSize,
741 posScDim,
742 posScOrd,
743 posDfOrd);
744 }
745 }
746
751 virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator operatorType) const
752 {
753 INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "subclasses must override either getSimpleOperatorDecomposition() or getOperatorDecomposition()");
754 // (TensorBasis3 overrides getOperatorDecomposition()…)
755 }
756
759 virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
760 {
761 OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
762 std::vector<BasisPtr> componentBases {basis1_, basis2_};
763 return opSimpleDecomposition.expandedDecomposition(componentBases);
764 }
765
770 virtual BasisValues<OutputValueType,DeviceType> allocateBasisValues( TensorPoints<PointValueType,DeviceType> points, const EOperator operatorType = OPERATOR_VALUE) const override
771 {
772 const bool operatorSupported = (operatorType == OPERATOR_VALUE) || (operatorType == OPERATOR_GRAD) || (operatorType == OPERATOR_CURL) || (operatorType == OPERATOR_DIV);
773 INTREPID2_TEST_FOR_EXCEPTION(!operatorSupported, std::invalid_argument, "operator is not supported by allocateBasisValues");
774
775 ordinal_type numBasisComponents = tensorComponents_.size();
776 OperatorTensorDecomposition opDecomposition = getOperatorDecomposition(operatorType);
777
778 const ordinal_type numVectorComponents = opDecomposition.numVectorComponents();
779 const bool useVectorData = numVectorComponents > 1;
780
781 if (useVectorData)
782 {
783 const int numFamilies = 1;
784 std::vector< std::vector<TensorData<OutputValueType,DeviceType> > > vectorComponents(numFamilies, std::vector<TensorData<OutputValueType,DeviceType> >(numVectorComponents));
785
786 const int familyOrdinal = 0;
787 for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
788 {
789 if (!opDecomposition.identicallyZeroComponent(vectorComponentOrdinal))
790 {
791 std::vector< Data<OutputValueType,DeviceType> > componentData;
792 for (ordinal_type r=0; r<numBasisComponents; r++)
793 {
794 const int numComponentPoints = points.componentPointCount(r);
795 const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
796 auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
797 componentData.push_back(Data<OutputValueType,DeviceType>(componentView));
798 }
799 vectorComponents[familyOrdinal][vectorComponentOrdinal] = TensorData<OutputValueType,DeviceType>(componentData);
800 }
801 }
802 VectorData<OutputValueType,DeviceType> vectorData(vectorComponents);
804 }
805 else
806 {
807 // TensorData: single tensor product
808 std::vector< Data<OutputValueType,DeviceType> > componentData;
809
810 const ordinal_type vectorComponentOrdinal = 0;
811 for (ordinal_type r=0; r<numBasisComponents; r++)
812 {
813 const int numComponentPoints = points.componentPointCount(r);
814 const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
815 auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
816
817 const int rank = 2; // (F,P) -- TensorData-only BasisValues are always scalar-valued. Use VectorData for vector-valued.
818 // (we need to be explicit about the rank argument because GRAD, even in 1D, elevates to rank 3), so e.g. DIV of HDIV uses a componentView that is rank 3;
819 // we want Data to insulate us from that fact)
820 const Kokkos::Array<int,7> extents {componentView.extent_int(0), componentView.extent_int(1), 1,1,1,1,1};
821 Kokkos::Array<DataVariationType,7> variationType {GENERAL, GENERAL, CONSTANT, CONSTANT, CONSTANT, CONSTANT, CONSTANT };
822 componentData.push_back(Data<OutputValueType,DeviceType>(componentView, rank, extents, variationType));
823 }
824
825 TensorData<OutputValueType,DeviceType> tensorData(componentData);
826
827 std::vector< TensorData<OutputValueType,DeviceType> > tensorDataEntries {tensorData};
828 return BasisValues<OutputValueType,DeviceType>(tensorDataEntries);
829 }
830 }
831
832 // since the getValues() below only overrides the FEM variant, we specify that
833 // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
834 // (It's an error to use the FVD variant on this basis.)
835 using BasisBase::getValues;
836
848 void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition,
849 PointViewType & inputPoints1, PointViewType & inputPoints2, bool &tensorDecompositionSucceeded) const
850 {
851 INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument, "tensor decomposition not yet supported");
852
853 // for inputPoints that are actually tensor-product of component quadrature points (say),
854 // having just the one input (which will have a lot of redundant point data) is suboptimal
855 // The general case can have unique x/y/z coordinates at every point, though, so we have to support that
856 // when this interface is used. But we may try detecting that the data is tensor-product and compressing
857 // from there... Ultimately, we should also add a getValues() variant that takes multiple input point containers,
858 // one for each tensorial dimension.
859
860 // this initial implementation is intended to simplify development of 2D and 3D bases, while also opening
861 // the possibility of higher-dimensional bases. It is not necessarily optimized for speed/memory. There
862 // are things we can do in this regard, which may become important for matrix-free computations wherein
863 // basis values don't get stored but are computed dynamically.
864
865 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
866 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
867
868 int totalSpaceDim = inputPoints.extent_int(1);
869
870 TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
871
872 // first pass: just take subviews to get input points -- this will result in redundant computations when points are themselves tensor product (i.e., inputPoints itself contains redundant data)
873
874 inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
875 inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
876
877 // std::cout << "inputPoints : " << inputPoints.extent(0) << " x " << inputPoints.extent(1) << std::endl;
878 // std::cout << "inputPoints1 : " << inputPoints1.extent(0) << " x " << inputPoints1.extent(1) << std::endl;
879 // std::cout << "inputPoints2 : " << inputPoints2.extent(0) << " x " << inputPoints2.extent(1) << std::endl;
880
881 tensorDecompositionSucceeded = false;
882 }
883
892 virtual void getDofCoords( typename BasisBase::ScalarViewType dofCoords ) const override
893 {
894 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
895 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
896
897 using ValueType = typename BasisBase::ScalarViewType::value_type;
898 using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
899 using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
900
901 const ordinal_type basisCardinality1 = basis1_->getCardinality();
902 const ordinal_type basisCardinality2 = basis2_->getCardinality();
903
904 ViewType dofCoords1("dofCoords1",basisCardinality1,spaceDim1);
905 ViewType dofCoords2("dofCoords2",basisCardinality2,spaceDim2);
906
907 basis1_->getDofCoords(dofCoords1);
908 basis2_->getDofCoords(dofCoords2);
909
910 Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
911 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
912 {
913 for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
914 {
915 const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
916 for (int d1=0; d1<spaceDim1; d1++)
917 {
918 dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
919 }
920 for (int d2=0; d2<spaceDim2; d2++)
921 {
922 dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
923 }
924 }
925 });
926 }
927
928
936 virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
937 {
938 using ValueType = typename BasisBase::ScalarViewType::value_type;
939 using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
940 using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
941
942 ViewType dofCoeffs1("dofCoeffs1",basis1_->getCardinality());
943 ViewType dofCoeffs2("dofCoeffs2",basis2_->getCardinality());
944
945 basis1_->getDofCoeffs(dofCoeffs1);
946 basis2_->getDofCoeffs(dofCoeffs2);
947
948 const ordinal_type basisCardinality1 = basis1_->getCardinality();
949 const ordinal_type basisCardinality2 = basis2_->getCardinality();
950
951 Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
952 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
953 {
954 for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
955 {
956 const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
957 dofCoeffs(fieldOrdinal) = dofCoeffs1(fieldOrdinal1);
958 dofCoeffs(fieldOrdinal) = dofCoeffs2(fieldOrdinal2);
959 }
960 });
961 }
962
967 virtual
968 const char*
969 getName() const override {
970 return name_.c_str();
971 }
972
981 ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1,
982 ordinal_type dkEnum2, ordinal_type operatorOrder2) const
983 {
984 ordinal_type spaceDim1 = basis1_->getBaseCellTopology().getDimension();
985 ordinal_type spaceDim2 = basis2_->getBaseCellTopology().getDimension();
986
987 // for now, we only support total spaceDim <= 3. It would not be too hard to extend to support higher dimensions,
988 // but the support needs to be built out in e.g. shards::CellTopology for this, as well as our DkEnumeration, etc.
989 switch (spaceDim1)
990 {
991 case 1:
992 switch (spaceDim2)
993 {
994 case 1:
995 return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
996 case 2:
997 return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
998 default:
999 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1000 }
1001 case 2:
1002 switch (spaceDim2)
1003 {
1004 case 1:
1005 return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1006 default:
1007 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1008 }
1009 // case 3:
1010 // switch (spaceDim2)
1011 // {
1012 // case 1:
1013 // return getDkTensorIndex<3, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1014 // case 2:
1015 // return getDkTensorIndex<3, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1016 // case 3:
1017 // return getDkTensorIndex<3, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1018 // default:
1019 // INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1020 // }
1021 default:
1022 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1023 }
1024 }
1025
1026 std::vector<BasisPtr> getTensorBasisComponents() const
1027 {
1028 return tensorComponents_;
1029 }
1030
1042 virtual
1043 void
1046 const EOperator operatorType = OPERATOR_VALUE ) const override
1047 {
1048 OperatorTensorDecomposition operatorDecomposition = getOperatorDecomposition(operatorType);
1049
1050 const ordinal_type numVectorComponents = operatorDecomposition.numVectorComponents();
1051 const bool useVectorData = numVectorComponents > 1;
1052 const ordinal_type numBasisComponents = operatorDecomposition.numBasisComponents();
1053
1054 for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1055 {
1056 const double weight = operatorDecomposition.weight(vectorComponentOrdinal);
1057 for (ordinal_type basisOrdinal=0; basisOrdinal<numBasisComponents; basisOrdinal++)
1058 {
1059 const EOperator op = operatorDecomposition.op(vectorComponentOrdinal, basisOrdinal);
1060 // by convention, op == OPERATOR_MAX signals a zero component; skip
1061 if (op != OPERATOR_MAX)
1062 {
1063 const int vectorFamily = 0; // TensorBasis always has just a single family; multiple families arise in DirectSumBasis
1064 auto tensorData = useVectorData ? outputValues.vectorData().getComponent(vectorFamily,vectorComponentOrdinal) : outputValues.tensorData();
1065 INTREPID2_TEST_FOR_EXCEPTION( ! tensorData.getTensorComponent(basisOrdinal).isValid(), std::invalid_argument, "Invalid output component encountered");
1066
1067 PointViewType pointView = inputPoints.getTensorComponent(basisOrdinal);
1068
1069 // Data stores things in fixed-rank Kokkos::View, but Basis requires DynRankView. We allocate a temporary DynRankView, then copy back to Data.
1070 const Data<OutputValueType,DeviceType> & outputData = tensorData.getTensorComponent(basisOrdinal);
1071
1072 auto basisValueView = outputData.getUnderlyingView();
1073 tensorComponents_[basisOrdinal]->getValues(basisValueView, pointView, op);
1074
1075 // if weight is non-trivial (not 1.0), then we need to multiply one of the component views by weight.
1076 // we do that for the first basisOrdinal's values
1077 if ((weight != 1.0) && (basisOrdinal == 0))
1078 {
1079 if (basisValueView.rank() == 2)
1080 {
1081 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1082 Kokkos::parallel_for("multiply basisValueView by weight", policy,
1083 KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1084 basisValueView(fieldOrdinal,pointOrdinal) *= weight;
1085 });
1086 }
1087 else if (basisValueView.rank() == 3)
1088 {
1089 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1),basisValueView.extent_int(2)});
1090 Kokkos::parallel_for("multiply basisValueView by weight", policy,
1091 KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal, const int &d) {
1092 basisValueView(fieldOrdinal,pointOrdinal,d) *= weight;
1093 });
1094 }
1095 else
1096 {
1097 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported rank for basisValueView");
1098 }
1099 }
1100 }
1101 }
1102 }
1103 }
1104
1123 void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1124 const EOperator operatorType = OPERATOR_VALUE ) const override
1125 {
1126 bool tensorPoints; // true would mean that we take the tensor product of inputPoints1 and inputPoints2 (and that this would be equivalent to inputPoints as given -- i.e., inputPoints1 and inputPoints2 would be a tensor decomposition of inputPoints)
1127 bool attemptTensorDecomposition = false; // support for this not yet implemented
1128 PointViewType inputPoints1, inputPoints2;
1129 getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
1130
1131 switch (operatorType)
1132 {
1133 case OPERATOR_D1:
1134 case OPERATOR_D2:
1135 case OPERATOR_D3:
1136 case OPERATOR_D4:
1137 case OPERATOR_D5:
1138 case OPERATOR_D6:
1139 case OPERATOR_D7:
1140 case OPERATOR_D8:
1141 case OPERATOR_D9:
1142 case OPERATOR_D10:
1143 {
1144 auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1145 // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1146 // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1147 for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1148 {
1149 int derivativeCountComp1=opOrder-derivativeCountComp2;
1150 EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1151 EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1152
1153 int spaceDim1 = inputPoints1.extent_int(1);
1154 int spaceDim2 = inputPoints2.extent_int(1);
1155
1156 int dkCardinality1 = (op1 != OPERATOR_VALUE) ? getDkCardinality(op1, spaceDim1) : 1;
1157 int dkCardinality2 = (op2 != OPERATOR_VALUE) ? getDkCardinality(op2, spaceDim2) : 1;
1158
1159 int basisCardinality1 = basis1_->getCardinality();
1160 int basisCardinality2 = basis2_->getCardinality();
1161
1162 int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1163
1164 int pointCount1, pointCount2;
1165 if (tensorPoints)
1166 {
1167 pointCount1 = inputPoints1.extent_int(0);
1168 pointCount2 = inputPoints2.extent_int(0);
1169 }
1170 else
1171 {
1172 pointCount1 = totalPointCount;
1173 pointCount2 = totalPointCount;
1174 }
1175
1176 OutputViewType outputValues1, outputValues2;
1177 if (op1 == OPERATOR_VALUE)
1178 outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1);
1179 else
1180 outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
1181
1182 if (op2 == OPERATOR_VALUE)
1183 outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2);
1184 else
1185 outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
1186
1187 basis1_->getValues(outputValues1,inputPoints1,op1);
1188 basis2_->getValues(outputValues2,inputPoints2,op2);
1189
1190 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1191 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1192 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1193
1194 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1195
1196 double weight = 1.0;
1198
1199 for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1200 {
1201 auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
1202 : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
1203 for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1204 {
1205 auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
1206 : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
1207
1208 ordinal_type dkTensorIndex = getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1209 auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
1210 // Note that there may be performance optimizations available here:
1211 // - could eliminate interior for loop in favor of having a vector-valued outputValues1_dk
1212 // - could add support to TensorViewFunctor (and probably TensorViewIterator) for this kind of tensor Dk type of traversal
1213 // (this would allow us to eliminate both for loops here)
1214 // At the moment, we defer such optimizations on the idea that this may not ever become a performance bottleneck.
1215 FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
1216 Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
1217 }
1218 }
1219 }
1220 }
1221 break;
1222 default: // non-OPERATOR_Dn case must be handled by subclass.
1223 this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1224 }
1225 }
1226
1252 virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1253 const PointViewType inputPoints1, const PointViewType inputPoints2,
1254 bool tensorPoints) const
1255 {
1256 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
1257 }
1258
1282 void getValues( OutputViewType outputValues,
1283 const PointViewType inputPoints1, const EOperator operatorType1,
1284 const PointViewType inputPoints2, const EOperator operatorType2,
1285 bool tensorPoints, double weight=1.0) const
1286 {
1287 int basisCardinality1 = basis1_->getCardinality();
1288 int basisCardinality2 = basis2_->getCardinality();
1289
1290 int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1291
1292 int pointCount1, pointCount2;
1293 if (tensorPoints)
1294 {
1295 pointCount1 = inputPoints1.extent_int(0);
1296 pointCount2 = inputPoints2.extent_int(0);
1297 }
1298 else
1299 {
1300 pointCount1 = totalPointCount;
1301 pointCount2 = totalPointCount;
1302 }
1303
1304 int spaceDim1 = inputPoints1.extent_int(1);
1305 int spaceDim2 = inputPoints2.extent_int(1);
1306
1307 INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
1308 std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1309
1310 int opRank1 = getOperatorRank(basis1_->getFunctionSpace(), operatorType1, spaceDim1);
1311 int opRank2 = getOperatorRank(basis2_->getFunctionSpace(), operatorType2, spaceDim2);
1312
1313 OutputViewType outputValues1, outputValues2;
1314 if (opRank1 == 0)
1315 {
1316 outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1317 }
1318 else if (opRank1 == 1)
1319 {
1320 outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1321 }
1322 else
1323 {
1324 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank1");
1325 }
1326
1327 if (opRank2 == 0)
1328 {
1329 outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1330 }
1331 else if (opRank2 == 1)
1332 {
1333 outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1334 }
1335 else
1336 {
1337 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank2");
1338 }
1339
1340 basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1341 basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1342
1343 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1344 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1345 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1346
1347 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1348
1350
1351 FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
1352 Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
1353 }
1354
1360 getHostBasis() const override {
1361 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis subclasses must override getHostBasis");
1362 }
1363 }; // Basis_TensorBasis
1364
1372 template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
1374 {
1375 using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
1376 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1377
1378 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
1379 using TeamMember = typename TeamPolicy::member_type;
1380
1381 OutputFieldType output_; // F,P
1382 OutputFieldType input1_; // F1,P[,D] or F1,P1[,D]
1383 OutputFieldType input2_; // F2,P[,D] or F2,P2[,D]
1384 OutputFieldType input3_; // F2,P[,D] or F2,P2[,D]
1385
1386 int numFields_, numPoints_;
1387 int numFields1_, numPoints1_;
1388 int numFields2_, numPoints2_;
1389 int numFields3_, numPoints3_;
1390
1391 bool tensorPoints_; // if true, input1, input2, input3 refer to values at decomposed points, and P = P1 * P2 * P3. If false, then the three inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
1392
1393 double weight_;
1394
1395 TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
1396 bool tensorPoints, double weight)
1397 : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
1398 {
1399 numFields_ = output.extent_int(0);
1400 numPoints_ = output.extent_int(1);
1401
1402 numFields1_ = inputValues1.extent_int(0);
1403 numPoints1_ = inputValues1.extent_int(1);
1404
1405 numFields2_ = inputValues2.extent_int(0);
1406 numPoints2_ = inputValues2.extent_int(1);
1407
1408 numFields3_ = inputValues3.extent_int(0);
1409 numPoints3_ = inputValues3.extent_int(1);
1410 /*
1411 We don't yet support tensor-valued bases here (only vector and scalar). The main design question is how the layouts
1412 of the input containers relates to the layout of the output container. The work we've done in TensorViewIterator basically
1413 shows the choices that can be made. It does appear that in most cases (at least (most of?) those supported by TensorViewIterator),
1414 we can infer from the dimensions of input/output containers what choice should be made in each dimension.
1415 */
1416 INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1417 INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1418 INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1419 INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1420 INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1421 INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1422
1423 if (!tensorPoints_)
1424 {
1425 // then the point counts should all match
1426 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
1427 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
1428 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument, "incompatible point counts");
1429 }
1430 else
1431 {
1432 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument, "incompatible point counts");
1433 }
1434
1435 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument, "incompatible field sizes");
1436 }
1437
1438 KOKKOS_INLINE_FUNCTION
1439 void operator()( const TeamMember & teamMember ) const
1440 {
1441 auto fieldOrdinal1 = teamMember.league_rank();
1442
1443 if (!tensorPoints_)
1444 {
1445 if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
1446 {
1447 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1448 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1449 {
1450 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1451 for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1452 {
1453 output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1454 }
1455 }
1456 });
1457 }
1458 else if (input1_.rank() == 3)
1459 {
1460 int spaceDim = input1_.extent_int(2);
1461 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1462 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1463 {
1464 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1465 for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1466 {
1467 for (int d=0; d<spaceDim; d++)
1468 {
1469 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1470 }
1471 }
1472 }
1473 });
1474 }
1475 else if (input2_.rank() == 3)
1476 {
1477 int spaceDim = input2_.extent_int(2);
1478 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1479 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1480 {
1481 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1482 for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1483 {
1484 for (int d=0; d<spaceDim; d++)
1485 {
1486 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
1487 }
1488 }
1489 }
1490 });
1491 }
1492 else if (input3_.rank() == 3)
1493 {
1494 int spaceDim = input3_.extent_int(2);
1495 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1496 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1497 {
1498 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1499 for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1500 {
1501 for (int d=0; d<spaceDim; d++)
1502 {
1503 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
1504 }
1505 }
1506 }
1507 });
1508 }
1509 else
1510 {
1511 // unsupported rank combination -- enforced in constructor
1512 }
1513 }
1514 else
1515 {
1516 if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
1517 {
1518 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1519 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1520 {
1521 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1522 for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1523 {
1524 for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1525 {
1526 for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1527 {
1528 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1529 output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1530 }
1531 }
1532 }
1533 }
1534 });
1535 }
1536 else if (input1_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1537 {
1538 int spaceDim = input1_.extent_int(2);
1539 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1540 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1541 {
1542 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1543 for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1544 {
1545 for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1546 {
1547 for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1548 {
1549 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1550 for (int d=0; d<spaceDim; d++)
1551 {
1552 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1553 }
1554 }
1555 }
1556 }
1557 }
1558 });
1559 }
1560 else if (input2_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1561 {
1562 int spaceDim = input2_.extent_int(2);
1563 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1564 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1565 {
1566 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1567 for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1568 {
1569 for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1570 {
1571 for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1572 {
1573 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1574 for (int d=0; d<spaceDim; d++)
1575 {
1576 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
1577 }
1578 }
1579 }
1580 }
1581 }
1582 });
1583 }
1584 else if (input3_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1585 {
1586 int spaceDim = input3_.extent_int(2);
1587 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1588 for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1589 {
1590 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1591 for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1592 {
1593 for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1594 {
1595 for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1596 {
1597 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1598 for (int d=0; d<spaceDim; d++)
1599 {
1600 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
1601 }
1602 }
1603 }
1604 }
1605 }
1606 });
1607 }
1608 else
1609 {
1610 // unsupported rank combination -- enforced in constructor
1611 }
1612 }
1613 }
1614 }; // TensorBasis3_Functor
1615
1616
1617 template<typename BasisBaseClass = void>
1619 : public Basis_TensorBasis<BasisBaseClass>
1620 {
1621 using BasisBase = BasisBaseClass;
1623 public:
1624 using OutputViewType = typename BasisBase::OutputViewType;
1625 using PointViewType = typename BasisBase::PointViewType;
1626 using ScalarViewType = typename BasisBase::ScalarViewType;
1627
1628 using OutputValueType = typename BasisBase::OutputValueType;
1629 using PointValueType = typename BasisBase::PointValueType;
1630
1631 using BasisPtr = Teuchos::RCP<BasisBase>;
1632 protected:
1633 BasisPtr basis1_;
1634 BasisPtr basis2_;
1635 BasisPtr basis3_;
1636 public:
1637 Basis_TensorBasis3(BasisPtr basis1, BasisPtr basis2, BasisPtr basis3)
1638 :
1639 TensorBasis(Teuchos::rcp( new TensorBasis(basis1,basis2)),basis3),
1640 basis1_(basis1),
1641 basis2_(basis2),
1642 basis3_(basis3)
1643 {}
1644
1651 virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
1652 {
1653 OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
1654 std::vector<BasisPtr> componentBases {basis1_, basis2_, basis3_};
1655 return opSimpleDecomposition.expandedDecomposition(componentBases);
1656 }
1657
1659
1684 virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1685 const PointViewType inputPoints12, const PointViewType inputPoints3,
1686 bool tensorPoints) const override
1687 {
1688 // TODO: rework this to use superclass's getComponentPoints.
1689
1690 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
1691 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
1692
1693 int totalSpaceDim12 = inputPoints12.extent_int(1);
1694
1695 TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
1696
1697 if (!tensorPoints)
1698 {
1699 auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1700 auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
1701
1702 this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
1703 }
1704 else
1705 {
1706 // superclass doesn't (yet) have a clever way to detect tensor points in a single container
1707 // we'd need something along those lines here to detect them in inputPoints12.
1708 // if we do add such a mechanism to superclass, it should be simple enough to call that from here
1709 INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "This method does not yet handle tensorPoints=true");
1710 }
1711 }
1712
1740 virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1741 const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3,
1742 bool tensorPoints) const = 0;
1743
1771 void getValues( OutputViewType outputValues,
1772 const PointViewType inputPoints1, const EOperator operatorType1,
1773 const PointViewType inputPoints2, const EOperator operatorType2,
1774 const PointViewType inputPoints3, const EOperator operatorType3,
1775 bool tensorPoints, double weight=1.0) const
1776 {
1777 int basisCardinality1 = basis1_->getCardinality();
1778 int basisCardinality2 = basis2_->getCardinality();
1779 int basisCardinality3 = basis3_->getCardinality();
1780
1781 int spaceDim1 = inputPoints1.extent_int(1);
1782 int spaceDim2 = inputPoints2.extent_int(1);
1783 int spaceDim3 = inputPoints3.extent_int(1);
1784
1785 int totalPointCount;
1786 int pointCount1, pointCount2, pointCount3;
1787 if (tensorPoints)
1788 {
1789 pointCount1 = inputPoints1.extent_int(0);
1790 pointCount2 = inputPoints2.extent_int(0);
1791 pointCount3 = inputPoints3.extent_int(0);
1792 totalPointCount = pointCount1 * pointCount2 * pointCount3;
1793 }
1794 else
1795 {
1796 totalPointCount = inputPoints1.extent_int(0);
1797 pointCount1 = totalPointCount;
1798 pointCount2 = totalPointCount;
1799 pointCount3 = totalPointCount;
1800
1801 INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
1802 std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1803 }
1804
1805 // structure of this implementation:
1806 /*
1807 - allocate output1, output2, output3 containers
1808 - either:
1809 1. split off the tensor functor call into its own method in TensorBasis, and
1810 - call it once with output1, output2, placing these in another newly allocated output12, then
1811 - call it again with output12, output3
1812 OR
1813 2. create a 3-argument tensor functor and call it with output1,output2,output3
1814
1815 At the moment, the 3-argument functor seems like a better approach. It's likely more code, but somewhat
1816 more efficient and easier to understand/debug. And the code is fairly straightforward to produce.
1817 */
1818
1819 // copied from the 2-argument TensorBasis implementation:
1820
1821 OutputViewType outputValues1, outputValues2, outputValues3;
1822
1823 //Note: the gradient of HGRAD basis on a line has an output vector of rank 3, the last dimension being of size 1.
1824 // in particular this holds even when computing the divergence of an HDIV basis, which is scalar and has rank 2.
1825 if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
1826 {
1827 // use a rank 2 container for basis1
1828 outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1829 }
1830 else
1831 {
1832 outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1833 }
1834 if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
1835 {
1836 // use a rank 2 container for basis2
1837 outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1838 }
1839 else
1840 {
1841 outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1842 }
1843 if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
1844 {
1845 // use a rank 2 container for basis2
1846 outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3);
1847 }
1848 else
1849 {
1850 outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
1851 }
1852
1853 basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1854 basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1855 basis3_->getValues(outputValues3,inputPoints3,operatorType3);
1856
1857 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1858 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1859 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1860
1861 using ExecutionSpace = typename BasisBase::ExecutionSpace;
1862
1863 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1864
1866 FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
1867 Kokkos::parallel_for( policy , functor, "TensorBasis3_Functor");
1868 }
1869
1875 getHostBasis() const override {
1876 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis3 subclasses must override getHostBasis");
1877 }
1878 };
1879} // end namespace Intrepid2
1880
1881#endif /* Intrepid2_TensorBasis_h */
Header file for the abstract base class Intrepid2::Basis.
BasisPtr< typename Kokkos::HostSpace::device_type, OutputType, PointType > HostBasisPtr
Pointer to a Basis whose device type is on the host (Kokkos::HostSpace::device_type),...
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorRank(const EFunctionSpace spaceType, const EOperator operatorType, const ordinal_type spaceDim)
Returns rank of an operator.
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorOrder(const EOperator operatorType)
Returns order of an operator.
KOKKOS_INLINE_FUNCTION ordinal_type getDkCardinality(const EOperator operatorType, const ordinal_type spaceDim)
Returns cardinality of Dk, i.e., the number of all derivatives of order k.
KOKKOS_INLINE_FUNCTION ordinal_type getDkEnumeration(const ordinal_type xMult, const ordinal_type yMult=-1, const ordinal_type zMult=-1)
Returns the ordinal of a partial derivative of order k based on the multiplicities of the partials dx...
@ CONSTANT
does not vary
@ GENERAL
arbitrary variation
Implementation of an assert that can safely be called from device code.
Class that defines mappings from component cell topologies to their tensor product topologies.
Implementation of support for traversing component views alongside a view that represents a combinati...
Header function for Intrepid2::Util class and other utility functions.
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
The data containers in Intrepid2 that support sum factorization and other reduced-data optimizations ...
const VectorDataType & vectorData() const
VectorData accessor.
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3, bool tensorPoints) const =0
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
Basis defined as the tensor product of two component bases.
virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator operatorType) const
Returns a simple decomposition of the specified operator: what operator(s) should be applied to basis...
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
virtual void getDofCoords(typename BasisBase::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell.
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
virtual BasisValues< OutputValueType, DeviceType > allocateBasisValues(TensorPoints< PointValueType, DeviceType > points, const EOperator operatorType=OPERATOR_VALUE) const override
Allocate BasisValues container suitable for passing to the getValues() variant that takes a TensorPoi...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints1, const PointViewType inputPoints2, bool tensorPoints) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
virtual const char * getName() const override
Returns basis name.
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given "Dk" enumeration indices for the component bases, returns a Dk enumeration index for the compos...
Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace=FUNCTION_SPACE_MAX)
Constructor.
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
virtual void getValues(BasisValues< OutputValueType, DeviceType > outputValues, const TensorPoints< PointValueType, DeviceType > inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell, using point and output value containers that allow pre...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1.
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
View-like interface to tensor points; point components are stored separately; the appropriate coordin...
KOKKOS_INLINE_FUNCTION ScalarView< PointScalar, DeviceType > getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
ordinal_type componentPointCount(const ordinal_type &tensorComponentOrdinal) const
Returns the number of points in the indicated component.
For two cell topologies whose tensor product is a third, this class establishes a mapping from subcel...
unsigned getCompositeSubcellOrdinal(unsigned subcell1Dim, unsigned subcell1Ordinal, unsigned subcell2Dim, unsigned subcell2Ordinal)
Map from component subcell ordinals to the corresponding composite subcell ordinal.
Functor for computing values for the TensorBasis class.
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
Reference-space field values for a basis, designed to support typical vector-valued bases.
KOKKOS_INLINE_FUNCTION const TensorData< Scalar, DeviceType > & getComponent(const int &componentOrdinal) const
Single-argument component accessor for the axial-component or the single-family case; in this case,...
For a multi-component tensor basis, specifies the operators to be applied to the components to produc...
OperatorTensorDecomposition expandedDecomposition(std::vector< Teuchos::RCP< Basis< DeviceType, OutputValueType, PointValueType > > > &bases)
takes as argument bases that are components in this decomposition, and decomposes them further if the...
Functor for computing values for the TensorBasis3 class.