Intrepid2
Intrepid2_Data.hpp
Go to the documentation of this file.
1//
2// Intrepid2_Data.hpp
3// QuadraturePerformance
4//
5// Created by Roberts, Nathan V on 8/24/20.
6//
7
8#ifndef Intrepid2_Data_h
9#define Intrepid2_Data_h
10
12#include "Intrepid2_ScalarView.hpp"
13#include "Intrepid2_Utils.hpp"
14
20namespace Intrepid2 {
32 {
37 };
38
43 {
44 int logicalExtent;
45 DataVariationType variationType;
46 int dataExtent;
47 int variationModulus; // should be equal to dataExtent variationType other than MODULAR and CONSTANT
48 int blockPlusDiagonalLastNonDiagonal = -1; // only relevant for variationType == BLOCK_PLUS_DIAGONAL
49 };
50
52 KOKKOS_INLINE_FUNCTION
54 {
55 const int myNominalExtent = myData.logicalExtent;
56#ifdef HAVE_INTREPID2_DEBUG
57 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myNominalExtent != otherData.logicalExtent, std::invalid_argument, "both Data objects must match in their logical extent in the specified dimension");
58#endif
59 const DataVariationType & myVariation = myData.variationType;
60 const DataVariationType & otherVariation = otherData.variationType;
61
62 const int & myVariationModulus = myData.variationModulus;
63 const int & otherVariationModulus = otherData.variationModulus;
64
65 int myDataExtent = myData.dataExtent;
66 int otherDataExtent = otherData.dataExtent;
67
69 combinedDimensionInfo.logicalExtent = myNominalExtent;
70
71 switch (myVariation)
72 {
73 case CONSTANT:
74 switch (otherVariation)
75 {
76 case CONSTANT:
77 case MODULAR:
78 case GENERAL:
80 combinedDimensionInfo = otherData;
81 }
82 break;
83 case MODULAR:
84 switch (otherVariation)
85 {
86 case CONSTANT:
87 combinedDimensionInfo = myData;
88 break;
89 case MODULAR:
90 if (myVariationModulus == otherVariationModulus)
91 {
92 // in this case, expect both to have the same data extent
93 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myDataExtent != otherDataExtent, std::logic_error, "Unexpected data extent/modulus combination");
94 combinedDimensionInfo.variationType = MODULAR;
95 combinedDimensionInfo.dataExtent = myDataExtent;
96 combinedDimensionInfo.variationModulus = myVariationModulus;
97 }
98 else
99 {
100 // both modular with two different moduli
101 // we could do something clever with e.g. least common multiples, but for now we just use GENERAL
102 // (this is not a use case we anticipate being a common one)
103 combinedDimensionInfo.variationType = GENERAL;
104 combinedDimensionInfo.dataExtent = myNominalExtent;
105 combinedDimensionInfo.variationModulus = myNominalExtent;
106 }
107 break;
109 combinedDimensionInfo.variationType = GENERAL;
110 combinedDimensionInfo.dataExtent = myNominalExtent;
111 combinedDimensionInfo.variationModulus = myNominalExtent;
112 break;
113 case GENERAL:
114 // otherData is GENERAL: its info dominates
115 combinedDimensionInfo = otherData;
116 break;
117 }
118 break;
120 switch (otherVariation)
121 {
122 case CONSTANT:
123 combinedDimensionInfo = myData;
124 break;
125 case MODULAR:
126 combinedDimensionInfo.variationType = GENERAL;
127 combinedDimensionInfo.dataExtent = myNominalExtent;
128 combinedDimensionInfo.variationModulus = myNominalExtent;
129 break;
130 case GENERAL:
131 // otherData is GENERAL: its info dominates
132 combinedDimensionInfo = otherData;
133 break;
135 combinedDimensionInfo.variationType = GENERAL;
136 combinedDimensionInfo.dataExtent = max(myDataExtent,otherDataExtent);
137 combinedDimensionInfo.variationModulus = combinedDimensionInfo.dataExtent;
138 // for this case, we want to take the minimum of the two Data objects' blockPlusDiagonalLastNonDiagonal as the combined object's blockPlusDiagonalLastNonDiagonal
139 combinedDimensionInfo.blockPlusDiagonalLastNonDiagonal = min(myData.blockPlusDiagonalLastNonDiagonal, otherData.blockPlusDiagonalLastNonDiagonal);
140 }
141 break;
142 case GENERAL:
143 switch (otherVariation)
144 {
145 case CONSTANT:
146 case MODULAR:
147 case GENERAL:
149 combinedDimensionInfo = myData;
150 }
151 }
153 }
154
163template<class DataScalar,typename DeviceType>
164class ZeroView {
165public:
166 static ScalarView<DataScalar,DeviceType> zeroView()
167 {
168 static ScalarView<DataScalar,DeviceType> zeroView = ScalarView<DataScalar,DeviceType>("zero",1);
169 static bool havePushedFinalizeHook = false;
170 if (!havePushedFinalizeHook)
171 {
172 Kokkos::push_finalize_hook( [=] {
173 zeroView = ScalarView<DataScalar,DeviceType>();
174 });
175 havePushedFinalizeHook = true;
176 }
177 return zeroView;
178 }
179};
180
198 template<class DataScalar,typename DeviceType>
199 class Data {
200 public:
201 using value_type = DataScalar;
202 using execution_space = typename DeviceType::execution_space;
203 private:
204 ordinal_type dataRank_;
205 Kokkos::View<DataScalar*, DeviceType> data1_; // the rank 1 data that is explicitly stored
206 Kokkos::View<DataScalar**, DeviceType> data2_; // the rank 2 data that is explicitly stored
207 Kokkos::View<DataScalar***, DeviceType> data3_; // the rank 3 data that is explicitly stored
208 Kokkos::View<DataScalar****, DeviceType> data4_; // the rank 4 data that is explicitly stored
209 Kokkos::View<DataScalar*****, DeviceType> data5_; // the rank 5 data that is explicitly stored
210 Kokkos::View<DataScalar******, DeviceType> data6_; // the rank 6 data that is explicitly stored
211 Kokkos::View<DataScalar*******, DeviceType> data7_; // the rank 7 data that is explicitly stored
212 Kokkos::Array<int,7> extents_; // logical extents in each dimension
213 Kokkos::Array<DataVariationType,7> variationType_; // for each dimension, whether the data varies in that dimension
214 Kokkos::Array<int,7> variationModulus_; // for each dimension, a value by which indices should be modulused (only used when variationType_ is MODULAR)
215 int blockPlusDiagonalLastNonDiagonal_ = -1; // last row/column that is part of the non-diagonal part of the matrix indicated by BLOCK_PLUS_DIAGONAL (if any dimensions are thus marked)
216
217 bool hasNontrivialModulusUNUSED_; // this is a little nutty, but having this UNUSED member variable improves performance, probably by shifting the alignment of underlyingMatchesLogical_. This is true with nvcc; it may also be true with Apple clang
218 bool underlyingMatchesLogical_; // if true, this Data object has the same rank and extent as the underlying view
219 Kokkos::Array<ordinal_type,7> activeDims_;
220 int numActiveDims_; // how many of the 7 entries are actually filled in
221
222 ordinal_type rank_;
223
224 using reference_type = typename ScalarView<DataScalar,DeviceType>::reference_type;
225 using const_reference_type = typename ScalarView<const DataScalar,DeviceType>::reference_type;
226 // we use reference_type as the return for operator() for performance reasons, especially significant when using Sacado types
227 using return_type = const_reference_type;
228
229 ScalarView<DataScalar,DeviceType> zeroView_; // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
230
232 KOKKOS_INLINE_FUNCTION
233 static int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
234 {
235 return (lastNondiagonal + 1) * (lastNondiagonal + 1);
236 }
237
239 KOKKOS_INLINE_FUNCTION
240 static int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
241 {
242 return i * (lastNondiagonal + 1) + j;
243 }
244
246 KOKKOS_INLINE_FUNCTION
247 static int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
248 {
249 return i - (lastNondiagonal + 1) + numNondiagonalEntries;
250 }
251
253 KOKKOS_INLINE_FUNCTION
254 int getUnderlyingViewExtent(const int &dim) const
255 {
256 switch (dataRank_)
257 {
258 case 1: return data1_.extent_int(dim);
259 case 2: return data2_.extent_int(dim);
260 case 3: return data3_.extent_int(dim);
261 case 4: return data4_.extent_int(dim);
262 case 5: return data5_.extent_int(dim);
263 case 6: return data6_.extent_int(dim);
264 case 7: return data7_.extent_int(dim);
265 default: return -1;
266 }
267 }
268
271 {
272 zeroView_ = ZeroView<DataScalar,DeviceType>::zeroView(); // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
273 // check that rank is compatible with the claimed extents:
274 for (int d=rank_; d<7; d++)
275 {
276 INTREPID2_TEST_FOR_EXCEPTION(extents_[d] > 1, std::invalid_argument, "Nominal extents may not be > 1 in dimensions beyond the rank of the container");
277 }
278
279 numActiveDims_ = 0;
280 int blockPlusDiagonalCount = 0;
281 underlyingMatchesLogical_ = true;
282 for (ordinal_type i=0; i<7; i++)
283 {
284 if (variationType_[i] == GENERAL)
285 {
286 if (extents_[i] != 0)
287 {
288 variationModulus_[i] = extents_[i];
289 }
290 else
291 {
292 variationModulus_[i] = 1;
293 }
294 activeDims_[numActiveDims_] = i;
295 numActiveDims_++;
296 }
297 else if (variationType_[i] == MODULAR)
298 {
299 underlyingMatchesLogical_ = false;
300 if (extents_[i] != getUnderlyingViewExtent(numActiveDims_))
301 {
302 const int dataExtent = getUnderlyingViewExtent(numActiveDims_);
303 const int logicalExtent = extents_[i];
304 const int modulus = dataExtent;
305
306 INTREPID2_TEST_FOR_EXCEPTION( dataExtent * (logicalExtent / dataExtent) != logicalExtent, std::invalid_argument, "data extent must evenly divide logical extent");
307
308 variationModulus_[i] = modulus;
309 }
310 else
311 {
312 variationModulus_[i] = extents_[i];
313 }
314 activeDims_[numActiveDims_] = i;
315 numActiveDims_++;
316 }
317 else if (variationType_[i] == BLOCK_PLUS_DIAGONAL)
318 {
319 underlyingMatchesLogical_ = false;
320 blockPlusDiagonalCount++;
321 if (blockPlusDiagonalCount == 1) // first dimension thus marked --> active
322 {
323
324#ifdef HAVE_INTREPID2_DEBUG
325 const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
326 const int dataExtent = getUnderlyingViewExtent(numActiveDims_); // flat storage of all matrix entries
327 const int logicalExtent = extents_[i];
328 const int numDiagonalEntries = logicalExtent - (blockPlusDiagonalLastNonDiagonal_ + 1);
329 const int expectedDataExtent = numNondiagonalEntries + numDiagonalEntries;
330 INTREPID2_TEST_FOR_EXCEPTION(dataExtent != expectedDataExtent, std::invalid_argument, ("BLOCK_PLUS_DIAGONAL data extent of " + std::to_string(dataExtent) + " does not match expected based on blockPlusDiagonalLastNonDiagonal setting of " + std::to_string(blockPlusDiagonalLastNonDiagonal_)).c_str());
331#endif
332
333 activeDims_[numActiveDims_] = i;
334 numActiveDims_++;
335 }
336 variationModulus_[i] = getUnderlyingViewExtent(numActiveDims_);
337 INTREPID2_TEST_FOR_EXCEPTION(variationType_[i+1] != BLOCK_PLUS_DIAGONAL, std::invalid_argument, "BLOCK_PLUS_DIAGONAL ranks must be contiguous");
338 i++; // skip over the next BLOCK_PLUS_DIAGONAL
339 variationModulus_[i] = 1; // trivial modulus (should not ever be used)
340 INTREPID2_TEST_FOR_EXCEPTION(blockPlusDiagonalCount > 1, std::invalid_argument, "BLOCK_PLUS_DIAGONAL can only apply to two ranks");
341 }
342 else // CONSTANT
343 {
344 if (i < rank_)
345 {
346 underlyingMatchesLogical_ = false;
347 }
348 variationModulus_[i] = 1; // trivial modulus
349 }
350 }
351
352 if (rank_ != dataRank_)
353 {
354 underlyingMatchesLogical_ = false;
355 }
356
357 for (int d=numActiveDims_; d<7; d++)
358 {
359 // for *inactive* dims, the activeDims_ map just is the identity
360 // (this allows getEntry() to work even when the logical rank of the Data object is lower than that of the underlying View. This can happen for gradients in 1D.)
361 activeDims_[d] = d;
362 }
363 for (int d=0; d<7; d++)
364 {
365 INTREPID2_TEST_FOR_EXCEPTION(variationModulus_[d] == 0, std::logic_error, "variationModulus should not ever be 0");
366 }
367 }
368
369 public:
372 {
373 template<class ViewType, class ...IntArgs>
374 static KOKKOS_INLINE_FUNCTION reference_type get(const ViewType &view, const IntArgs&... intArgs)
375 {
376 return view.getWritableEntry(intArgs...);
377 }
378 };
379
380 template<class BinaryOperator, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
381 class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB, bool includeInnerLoop=false>
383 {
384 private:
385 ThisUnderlyingViewType this_underlying_;
386 AUnderlyingViewType A_underlying_;
387 BUnderlyingViewType B_underlying_;
388 BinaryOperator binaryOperator_;
389 int innerLoopSize_;
390 public:
391 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
392 BinaryOperator binaryOperator)
393 :
394 this_underlying_(this_underlying),
395 A_underlying_(A_underlying),
396 B_underlying_(B_underlying),
397 binaryOperator_(binaryOperator)
398 {
399 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
400 }
401
402 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
403 BinaryOperator binaryOperator, int innerLoopSize)
404 :
405 this_underlying_(this_underlying),
406 A_underlying_(A_underlying),
407 B_underlying_(B_underlying),
408 binaryOperator_(binaryOperator),
409 innerLoopSize_(innerLoopSize)
410 {
411 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
412 }
413
414 template<class ...IntArgs, bool M=includeInnerLoop>
415 KOKKOS_INLINE_FUNCTION
416 enable_if_t<!M, void>
417 operator()(const IntArgs&... args) const
418 {
419 auto & result = ArgExtractorThis::get( this_underlying_, args... );
420 const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
421 const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
422
423 result = binaryOperator_(A_val,B_val);
424 }
425
426 template<class ...IntArgs, bool M=includeInnerLoop>
427 KOKKOS_INLINE_FUNCTION
428 enable_if_t<M, void>
429 operator()(const IntArgs&... args) const
430 {
431 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
432 for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
433 {
434 auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
435 const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
436 const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
437
438 result = binaryOperator_(A_val,B_val);
439 }
440 }
441 };
442
444 template<class BinaryOperator, class PolicyType, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
445 class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB>
446 void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying,
447 AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
448 BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
449 {
451 Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
452 Kokkos::parallel_for("compute in-place", policy, functor);
453 }
454
456 template<class BinaryOperator, int rank>
457 enable_if_t<rank != 7, void>
458 storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
459 {
460 auto policy = dataExtentRangePolicy<rank>();
461
462 // shallow copy of this to avoid implicit references to this in calls to getWritableEntry() below
463 Data<DataScalar,DeviceType> thisData = *this;
464
465 const bool A_1D = A.getUnderlyingViewRank() == 1;
466 const bool B_1D = B.getUnderlyingViewRank() == 1;
467 const bool this_1D = this->getUnderlyingViewRank() == 1;
468 const bool A_constant = A_1D && (A.getUnderlyingViewSize() == 1);
469 const bool B_constant = B_1D && (B.getUnderlyingViewSize() == 1);
470 const bool this_constant = this_1D && (this->getUnderlyingViewSize() == 1);
471 const bool A_full = A.underlyingMatchesLogical();
472 const bool B_full = B.underlyingMatchesLogical();
473 const bool this_full = this->underlyingMatchesLogical();
474
476
478 const FullArgExtractor<const_reference_type> fullArgsConst;
479 const FullArgExtractorWritableData fullArgsWritable;
480
487
488 // this lambda returns -1 if there is not a rank-1 underlying view whose data extent matches the logical extent in the corresponding dimension;
489 // otherwise, it returns the logical index of the corresponding dimension.
490 auto get1DArgIndex = [](const Data<DataScalar,DeviceType> &data) -> int
491 {
492 const auto & variationTypes = data.getVariationTypes();
493 for (int d=0; d<rank; d++)
494 {
495 if (variationTypes[d] == GENERAL)
496 {
497 return d;
498 }
499 }
500 return -1;
501 };
502 if (this_constant)
503 {
504 // then A, B are constant, too
505 auto thisAE = constArg;
506 auto AAE = constArg;
507 auto BAE = constArg;
508 auto & this_underlying = this->getUnderlyingView<1>();
509 auto & A_underlying = A.getUnderlyingView<1>();
510 auto & B_underlying = B.getUnderlyingView<1>();
511 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
512 }
513 else if (this_full && A_full && B_full)
514 {
515 auto thisAE = fullArgs;
516 auto AAE = fullArgs;
517 auto BAE = fullArgs;
518
519 auto & this_underlying = this->getUnderlyingView<rank>();
520 auto & A_underlying = A.getUnderlyingView<rank>();
521 auto & B_underlying = B.getUnderlyingView<rank>();
522
523 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
524 }
525 else if (A_constant)
526 {
527 auto AAE = constArg;
528 auto & A_underlying = A.getUnderlyingView<1>();
529 if (this_full)
530 {
531 auto thisAE = fullArgs;
532 auto & this_underlying = this->getUnderlyingView<rank>();
533
534 if (B_full)
535 {
536 auto BAE = fullArgs;
537 auto & B_underlying = B.getUnderlyingView<rank>();
538 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
539 }
540 else // this_full, not B_full: B may have modular data, etc.
541 {
542 auto BAE = fullArgsConst;
543 storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
544 }
545 }
546 else // this is not full
547 {
548 // below, we optimize for the case of 1D data in B, when A is constant. Still need to handle other cases…
549 if (B_1D && (get1DArgIndex(B) != -1) )
550 {
551 // since A is constant, that implies that this_1D is true, and has the same 1DArgIndex
552 const int argIndex = get1DArgIndex(B);
553 auto & B_underlying = B.getUnderlyingView<1>();
554 auto & this_underlying = this->getUnderlyingView<1>();
555 switch (argIndex)
556 {
557 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0); break;
558 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1); break;
559 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2); break;
560 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3); break;
561 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4); break;
562 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5); break;
563 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
564 }
565 }
566 else
567 {
568 // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
569 auto thisAE = fullArgsWritable;
570 auto BAE = fullArgsConst;
571 storeInPlaceCombination(policy, thisData, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
572 }
573 }
574 }
575 else if (B_constant)
576 {
577 auto BAE = constArg;
578 auto & B_underlying = B.getUnderlyingView<1>();
579 if (this_full)
580 {
581 auto thisAE = fullArgs;
582 auto & this_underlying = this->getUnderlyingView<rank>();
583 if (A_full)
584 {
585 auto AAE = fullArgs;
586 auto & A_underlying = A.getUnderlyingView<rank>();
587
588 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
589 }
590 else // this_full, not A_full: A may have modular data, etc.
591 {
592 // use A (the Data object). This could be further optimized by using A's underlying View and an appropriately-defined ArgExtractor.
593 auto AAE = fullArgsConst;
594 storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
595 }
596 }
597 else // this is not full
598 {
599 // below, we optimize for the case of 1D data in A, when B is constant. Still need to handle other cases…
600 if (A_1D && (get1DArgIndex(A) != -1) )
601 {
602 // since B is constant, that implies that this_1D is true, and has the same 1DArgIndex as A
603 const int argIndex = get1DArgIndex(A);
604 auto & A_underlying = A.getUnderlyingView<1>();
605 auto & this_underlying = this->getUnderlyingView<1>();
606 switch (argIndex)
607 {
608 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE); break;
609 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE); break;
610 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE); break;
611 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE); break;
612 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE); break;
613 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE); break;
614 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
615 }
616 }
617 else
618 {
619 // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
620 auto thisAE = fullArgsWritable;
621 auto AAE = fullArgsConst;
622 storeInPlaceCombination(policy, thisData, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
623 }
624 }
625 }
626 else // neither A nor B constant
627 {
628 if (this_1D && (get1DArgIndex(thisData) != -1))
629 {
630 // possible ways that "this" could have full-extent, 1D data
631 // 1. A constant, B 1D
632 // 2. A 1D, B constant
633 // 3. A 1D, B 1D
634 // The constant possibilities are already addressed above, leaving us with (3). Note that A and B don't have to be full-extent, however
635 const int argThis = get1DArgIndex(thisData);
636 const int argA = get1DArgIndex(A); // if not full-extent, will be -1
637 const int argB = get1DArgIndex(B); // ditto
638
639 auto & A_underlying = A.getUnderlyingView<1>();
640 auto & B_underlying = B.getUnderlyingView<1>();
641 auto & this_underlying = this->getUnderlyingView<1>();
642 if ((argA != -1) && (argB != -1))
643 {
644#ifdef INTREPID2_HAVE_DEBUG
645 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error, "Unexpected 1D arg combination.");
646 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error, "Unexpected 1D arg combination.");
647#endif
648 switch (argThis)
649 {
650 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0); break;
651 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1); break;
652 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2); break;
653 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3); break;
654 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4); break;
655 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5); break;
656 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
657 }
658 }
659 else if (argA != -1)
660 {
661 // B is not full-extent in dimension argThis; use the Data object
662 switch (argThis)
663 {
664 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsConst); break;
665 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsConst); break;
666 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsConst); break;
667 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsConst); break;
668 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsConst); break;
669 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsConst); break;
670 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
671 }
672 }
673 else
674 {
675 // A is not full-extent in dimension argThis; use the Data object
676 switch (argThis)
677 {
678 case 0: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsConst, arg0); break;
679 case 1: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsConst, arg1); break;
680 case 2: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsConst, arg2); break;
681 case 3: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsConst, arg3); break;
682 case 4: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsConst, arg4); break;
683 case 5: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsConst, arg5); break;
684 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
685 }
686 }
687 }
688 else if (this_full)
689 {
690 // This case uses A,B Data objects; could be optimized by dividing into subcases and using underlying Views with appropriate ArgExtractors.
691 auto & this_underlying = this->getUnderlyingView<rank>();
692 auto thisAE = fullArgs;
693
694 if (A_full)
695 {
696 auto & A_underlying = A.getUnderlyingView<rank>();
697 auto AAE = fullArgs;
698
699 if (B_1D && (get1DArgIndex(B) != -1))
700 {
701 const int argIndex = get1DArgIndex(B);
702 auto & B_underlying = B.getUnderlyingView<1>();
703 switch (argIndex)
704 {
705 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0); break;
706 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1); break;
707 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2); break;
708 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3); break;
709 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4); break;
710 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5); break;
711 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
712 }
713 }
714 else
715 {
716 // A is full; B is not full, but not constant or full-extent 1D
717 // unoptimized in B access:
718 auto BAE = fullArgsConst;
719 storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
720 }
721 }
722 else // A is not full
723 {
724 if (A_1D && (get1DArgIndex(A) != -1))
725 {
726 const int argIndex = get1DArgIndex(A);
727 auto & A_underlying = A.getUnderlyingView<1>();
728 if (B_full)
729 {
730 auto & B_underlying = B.getUnderlyingView<rank>();
731 auto BAE = fullArgs;
732 switch (argIndex)
733 {
734 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE); break;
735 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE); break;
736 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE); break;
737 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE); break;
738 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE); break;
739 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE); break;
740 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
741 }
742 }
743 else
744 {
745 auto BAE = fullArgsConst;
746 switch (argIndex)
747 {
748 case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE); break;
749 case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE); break;
750 case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE); break;
751 case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE); break;
752 case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE); break;
753 case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE); break;
754 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
755 }
756 }
757 }
758 else // A not full, and not full-extent 1D
759 {
760 // unoptimized in A, B accesses.
761 auto AAE = fullArgsConst;
762 auto BAE = fullArgsConst;
763 storeInPlaceCombination(policy, this_underlying, A, B, binaryOperator, thisAE, AAE, BAE);
764 }
765 }
766 }
767 else
768 {
769 // completely un-optimized case: we use Data objects for this, A, B.
770 auto thisAE = fullArgsWritable;
771 auto AAE = fullArgsConst;
772 auto BAE = fullArgsConst;
773 storeInPlaceCombination(policy, thisData, A, B, binaryOperator, thisAE, AAE, BAE);
774 }
775 }
776 }
777
779 template<class BinaryOperator, int rank>
780 enable_if_t<rank == 7, void>
781 storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
782 {
783 auto policy = dataExtentRangePolicy<rank>();
784
785 using DataType = Data<DataScalar,DeviceType>;
786 using ThisAE = FullArgExtractorWritableData;
789
790 const ordinal_type dim6 = getDataExtent(6);
791 const bool includeInnerLoop = true;
793 Functor functor(*this, A, B, binaryOperator, dim6);
794 Kokkos::parallel_for("compute in-place", policy, functor);
795 }
796 public:
798 template<class UnaryOperator>
799 void applyOperator(UnaryOperator unaryOperator)
800 {
801 using ExecutionSpace = typename DeviceType::execution_space;
802
803 switch (dataRank_)
804 {
805 case 1:
806 {
807 const int dataRank = 1;
808 auto view = getUnderlyingView<dataRank>();
809
810 const int dataExtent = this->getDataExtent(0);
811 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,dataExtent);
812 Kokkos::parallel_for("apply operator in-place", policy,
813 KOKKOS_LAMBDA (const int &i0) {
814 view(i0) = unaryOperator(view(i0));
815 });
816
817 }
818 break;
819 case 2:
820 {
821 const int dataRank = 2;
822 auto policy = dataExtentRangePolicy<dataRank>();
823 auto view = getUnderlyingView<dataRank>();
824
825 Kokkos::parallel_for("apply operator in-place", policy,
826 KOKKOS_LAMBDA (const int &i0, const int &i1) {
827 view(i0,i1) = unaryOperator(view(i0,i1));
828 });
829 }
830 break;
831 case 3:
832 {
833 const int dataRank = 3;
834 auto policy = dataExtentRangePolicy<dataRank>();
835 auto view = getUnderlyingView<dataRank>();
836
837 Kokkos::parallel_for("apply operator in-place", policy,
838 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2) {
839 view(i0,i1,i2) = unaryOperator(view(i0,i1,i2));
840 });
841 }
842 break;
843 case 4:
844 {
845 const int dataRank = 4;
846 auto policy = dataExtentRangePolicy<dataRank>();
847 auto view = getUnderlyingView<dataRank>();
848
849 Kokkos::parallel_for("apply operator in-place", policy,
850 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3) {
851 view(i0,i1,i2,i3) = unaryOperator(view(i0,i1,i2,i3));
852 });
853 }
854 break;
855 case 5:
856 {
857 const int dataRank = 5;
858 auto policy = dataExtentRangePolicy<dataRank>();
859 auto view = getUnderlyingView<dataRank>();
860
861 Kokkos::parallel_for("apply operator in-place", policy,
862 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4) {
863 view(i0,i1,i2,i3,i4) = unaryOperator(view(i0,i1,i2,i3,i4));
864 });
865 }
866 break;
867 case 6:
868 {
869 const int dataRank = 6;
870 auto policy = dataExtentRangePolicy<dataRank>();
871 auto view = getUnderlyingView<dataRank>();
872
873 Kokkos::parallel_for("apply operator in-place", policy,
874 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
875 view(i0,i1,i2,i3,i4,i5) = unaryOperator(view(i0,i1,i2,i3,i4,i5));
876 });
877 }
878 break;
879 case 7:
880 {
881 const int dataRank = 7;
882 auto policy6 = dataExtentRangePolicy<6>();
883 auto view = getUnderlyingView<dataRank>();
884
885 const int dim_i6 = view.extent_int(6);
886
887 Kokkos::parallel_for("apply operator in-place", policy6,
888 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
889 for (int i6=0; i6<dim_i6; i6++)
890 {
891 view(i0,i1,i2,i3,i4,i5,i6) = unaryOperator(view(i0,i1,i2,i3,i4,i5,i6));
892 }
893 });
894 }
895 break;
896 default:
897 INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unsupported data rank");
898 }
899 }
900
902 template<class ...IntArgs>
903 KOKKOS_INLINE_FUNCTION
904 reference_type getWritableEntry(const IntArgs... intArgs) const
905 {
906#ifdef INTREPID2_HAVE_DEBUG
907 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(numArgs != rank_, std::invalid_argument, "getWritableEntry() should have the same number of arguments as the logical rank.");
908#endif
909 constexpr int numArgs = sizeof...(intArgs);
910 if (underlyingMatchesLogical_)
911 {
912 // in this case, we require that numArgs == dataRank_
913 return getUnderlyingView<numArgs>()(intArgs...);
914 }
915
916 // extract the type of the first argument; use that for the arrays below
917 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
918
919 const Kokkos::Array<int_type, numArgs> args {intArgs...};
920 Kokkos::Array<int_type, 7> refEntry;
921 for (int d=0; d<numArgs; d++)
922 {
923 switch (variationType_[d])
924 {
925 case CONSTANT: refEntry[d] = 0; break;
926 case GENERAL: refEntry[d] = args[d]; break;
927 case MODULAR: refEntry[d] = args[d] % variationModulus_[d]; break;
929 {
930 const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
931
932 const int_type &i = args[d];
933 if (d+1 >= numArgs)
934 {
935 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "BLOCK_PLUS_DIAGONAL must be present for two dimensions; here, encountered only one");
936 }
937 else
938 {
939 const int_type &j = args[d+1];
940
941 if ((i > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)) || (j > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)))
942 {
943 if (i != j)
944 {
945 // off diagonal: zero
946 return zeroView_(0); // NOTE: this branches in an argument-dependent way; this is not great for CUDA performance. When using BLOCK_PLUS_DIAGONAL, should generally avoid calls to this getEntry() method. (Use methods that directly take advantage of the data packing instead.)
947 }
948 else
949 {
950 refEntry[d] = blockPlusDiagonalDiagonalEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i);
951 }
952 }
953 else
954 {
955 refEntry[d] = blockPlusDiagonalBlockEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i, j);
956 }
957
958 // skip next d (this is required also to be BLOCK_PLUS_DIAGONAL, and we've consumed its arg as j above)
959 refEntry[d+1] = 0;
960 }
961 d++;
962 }
963 }
964 }
965 // refEntry should be zero-filled beyond numArgs, for cases when rank_ < dataRank_ (this only is allowed if the extra dimensions each has extent 1).
966 for (int d=numArgs; d<7; d++)
967 {
968 refEntry[d] = 0;
969 }
970
971 if (dataRank_ == 1)
972 {
973 return data1_(refEntry[activeDims_[0]]);
974 }
975 else if (dataRank_ == 2)
976 {
977 return data2_(refEntry[activeDims_[0]],refEntry[activeDims_[1]]);
978 }
979 else if (dataRank_ == 3)
980 {
981 return data3_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]]);
982 }
983 else if (dataRank_ == 4)
984 {
985 return data4_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]]);
986 }
987 else if (dataRank_ == 5)
988 {
989 return data5_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
990 refEntry[activeDims_[4]]);
991 }
992 else if (dataRank_ == 6)
993 {
994 return data6_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
995 refEntry[activeDims_[4]],refEntry[activeDims_[5]]);
996 }
997 else // dataRank_ == 7
998 {
999 return data7_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1000 refEntry[activeDims_[4]],refEntry[activeDims_[5]],refEntry[activeDims_[6]]);
1001 }
1002 }
1003 public:
1005 template<class ToContainer, class FromContainer>
1006 static void copyContainer(ToContainer to, FromContainer from)
1007 {
1008// std::cout << "Entered copyContainer().\n";
1009 auto policy = Kokkos::MDRangePolicy<execution_space,Kokkos::Rank<6>>({0,0,0,0,0,0},{from.extent_int(0),from.extent_int(1),from.extent_int(2), from.extent_int(3), from.extent_int(4), from.extent_int(5)});
1010
1011 Kokkos::parallel_for("copyContainer", policy,
1012 KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
1013 for (int i6=0; i6<from.extent_int(6); i6++)
1014 {
1015 to.access(i0,i1,i2,i3,i4,i5,i6) = from.access(i0,i1,i2,i3,i4,i5,i6);
1016 }
1017 });
1018 }
1019
1021 void allocateAndCopyFromDynRankView(ScalarView<DataScalar,DeviceType> data)
1022 {
1023// std::cout << "Entered allocateAndCopyFromDynRankView().\n";
1024 switch (dataRank_)
1025 {
1026 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", data.extent_int(0)); break;
1027 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1)); break;
1028 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2)); break;
1029 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3)); break;
1030 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4)); break;
1031 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5)); break;
1032 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5), data.extent_int(6)); break;
1033 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1034 }
1035
1036 switch (dataRank_)
1037 {
1038 case 1: copyContainer(data1_,data); break;
1039 case 2: copyContainer(data2_,data); break;
1040 case 3: copyContainer(data3_,data); break;
1041 case 4: copyContainer(data4_,data); break;
1042 case 5: copyContainer(data5_,data); break;
1043 case 6: copyContainer(data6_,data); break;
1044 case 7: copyContainer(data7_,data); break;
1045 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1046 }
1047 }
1048
1050 Data(std::vector<DimensionInfo> dimInfoVector)
1051 :
1052 // initialize member variables as if default constructor; if dimInfoVector is empty, we want default constructor behavior.
1053 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1054 {
1055 // If dimInfoVector is empty, the member initialization above is correct; otherwise, we set as below.
1056 // Either way, once members are initialized, we must call setActiveDims().
1057 if (dimInfoVector.size() != 0)
1058 {
1059 std::vector<int> dataExtents;
1060
1061 bool blockPlusDiagonalEncountered = true;
1062 for (int d=0; d<rank_; d++)
1063 {
1064 const DimensionInfo & dimInfo = dimInfoVector[d];
1065 extents_[d] = dimInfo.logicalExtent;
1066 variationType_[d] = dimInfo.variationType;
1067 const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1068 const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1069 if (isBlockPlusDiagonal)
1070 {
1071 blockPlusDiagonalEncountered = true;
1072 blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1073 }
1074 if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1075 {
1076 dataExtents.push_back(dimInfo.dataExtent);
1077 }
1078 }
1079 if (dataExtents.size() == 0)
1080 {
1081 // constant data
1082 dataExtents.push_back(1);
1083 }
1084 dataRank_ = dataExtents.size();
1085 switch (dataRank_)
1086 {
1087 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", dataExtents[0]); break;
1088 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1]); break;
1089 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]); break;
1090 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]); break;
1091 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]); break;
1092 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]); break;
1093 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]); break;
1094 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1095 }
1096 }
1097 setActiveDims();
1098 }
1099
1101 Data(const ScalarView<DataScalar,DeviceType> &data, int rank, Kokkos::Array<int,7> extents, Kokkos::Array<DataVariationType,7> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1102 :
1103 dataRank_(data.rank()), extents_(extents), variationType_(variationType), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1104 {
1106 setActiveDims();
1107 }
1108
1110 template<typename OtherDeviceType, class = typename std::enable_if< std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type,
1111 class = typename std::enable_if<!std::is_same<DeviceType,OtherDeviceType>::value>::type>
1112 Data(const Data<DataScalar,OtherDeviceType> &data)
1113 :
1114 dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1115 {
1116// std::cout << "Entered copy-like Data constructor.\n";
1117 if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1118 {
1119 const auto view = data.getUnderlyingView();
1120 switch (dataRank_)
1121 {
1122 case 1: data1_ = data.getUnderlyingView1(); break;
1123 case 2: data2_ = data.getUnderlyingView2(); break;
1124 case 3: data3_ = data.getUnderlyingView3(); break;
1125 case 4: data4_ = data.getUnderlyingView4(); break;
1126 case 5: data5_ = data.getUnderlyingView5(); break;
1127 case 6: data6_ = data.getUnderlyingView6(); break;
1128 case 7: data7_ = data.getUnderlyingView7(); break;
1129 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1130 }
1131 }
1132 setActiveDims();
1133 }
1134
1136 template<typename OtherDeviceType, class = typename std::enable_if<!std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type>
1138 :
1139 dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1140 {
1141// std::cout << "Entered copy-like Data constructor.\n";
1142 if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1143 {
1144 const auto view = data.getUnderlyingView();
1145 switch (dataRank_)
1146 {
1147 case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", view.extent_int(0)); break;
1148 case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1)); break;
1149 case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1150 case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1151 case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1152 case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1153 case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1154 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1155 }
1156
1157 // copy
1158 // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1159 // first, mirror and copy dataView; then copy to the appropriate data_ member
1160 using MemorySpace = typename DeviceType::memory_space;
1161 switch (dataRank_)
1162 {
1163 case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1164 case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1165 case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1166 case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1167 case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1168 case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1169 case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1170 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1171 }
1172 }
1173 setActiveDims();
1174 }
1175
1177// Data(const Data<DataScalar,ExecSpaceType> &data)
1178// :
1179// dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1180// {
1181// std::cout << "Entered Data copy constructor.\n";
1182// if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1183// {
1184// const auto view = data.getUnderlyingView();
1185// switch (dataRank_)
1186// {
1187// case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0)); break;
1188// case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1)); break;
1189// case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1190// case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1191// case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1192// case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1193// case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1194// default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1195// }
1196//
1197// // copy
1198// // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1199// // first, mirror and copy dataView; then copy to the appropriate data_ member
1200// using MemorySpace = typename DeviceType::memory_space;
1201// switch (dataRank_)
1202// {
1203// case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1204// case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1205// case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1206// case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1207// case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1208// case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1209// case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1210// default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1211// }
1212// }
1213//
1214// setActiveDims();
1215// }
1216
1218 Data(ScalarView<DataScalar,DeviceType> data)
1219 :
1220 Data(data,
1221 data.rank(),
1222 Kokkos::Array<int,7> {data.extent_int(0),data.extent_int(1),data.extent_int(2),data.extent_int(3),data.extent_int(4),data.extent_int(5),data.extent_int(6)},
1223 Kokkos::Array<DataVariationType,7> {GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL}, -1)
1224 {}
1225
1227 template<size_t rank, class ...DynRankViewProperties>
1228 Data(const Kokkos::DynRankView<DataScalar,DeviceType, DynRankViewProperties...> &data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1229 :
1230 dataRank_(data.rank()), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1231 {
1232// std::cout << "Entered a DynRankView Data() constructor.\n";
1234 for (unsigned d=0; d<rank; d++)
1235 {
1236 extents_[d] = extents[d];
1237 variationType_[d] = variationType[d];
1238 }
1239 setActiveDims();
1240 }
1241
1242 template<size_t rank, class ...ViewProperties>
1243 Data(Kokkos::View<DataScalar*,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1244 :
1245 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1246 {
1247 data1_ = data;
1248 for (unsigned d=0; d<rank; d++)
1249 {
1250 extents_[d] = extents[d];
1251 variationType_[d] = variationType[d];
1252 }
1253 setActiveDims();
1254 }
1255
1256 template<size_t rank, class ...ViewProperties>
1257 Data(Kokkos::View<DataScalar**,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1258 :
1259 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1260 {
1261 data2_ = data;
1262 for (unsigned d=0; d<rank; d++)
1263 {
1264 extents_[d] = extents[d];
1265 variationType_[d] = variationType[d];
1266 }
1267 setActiveDims();
1268 }
1269
1270 template<size_t rank, class ...ViewProperties>
1271 Data(Kokkos::View<DataScalar***,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1272 :
1273 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1274 {
1275 data3_ = data;
1276 for (unsigned d=0; d<rank; d++)
1277 {
1278 extents_[d] = extents[d];
1279 variationType_[d] = variationType[d];
1280 }
1281 setActiveDims();
1282 }
1283
1284 template<size_t rank, class ...ViewProperties>
1285 Data(Kokkos::View<DataScalar****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1286 :
1287 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1288 {
1289 data4_ = data;
1290 for (unsigned d=0; d<rank; d++)
1291 {
1292 extents_[d] = extents[d];
1293 variationType_[d] = variationType[d];
1294 }
1295 setActiveDims();
1296 }
1297
1298 template<size_t rank, class ...ViewProperties>
1299 Data(Kokkos::View<DataScalar*****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1300 :
1301 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1302 {
1303 data5_ = data;
1304 for (unsigned d=0; d<rank; d++)
1305 {
1306 extents_[d] = extents[d];
1307 variationType_[d] = variationType[d];
1308 }
1309 setActiveDims();
1310 }
1311
1312 template<size_t rank, class ...ViewProperties>
1313 Data(Kokkos::View<DataScalar******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1314 :
1315 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1316 {
1317 data6_ = data;
1318 for (unsigned d=0; d<rank; d++)
1319 {
1320 extents_[d] = extents[d];
1321 variationType_[d] = variationType[d];
1322 }
1323 setActiveDims();
1324 }
1325
1326 template<size_t rank, class ...ViewProperties>
1327 Data(Kokkos::View<DataScalar*******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1328 :
1329 dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1330 {
1331 data7_ = data;
1332 for (unsigned d=0; d<rank; d++)
1333 {
1334 extents_[d] = extents[d];
1335 variationType_[d] = variationType[d];
1336 }
1337 setActiveDims();
1338 }
1339
1341 template<size_t rank>
1342 Data(DataScalar constantValue, Kokkos::Array<int,rank> extents)
1343 :
1344 dataRank_(1), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(rank)
1345 {
1346 data1_ = Kokkos::View<DataScalar*,DeviceType>("Constant Data",1);
1347 Kokkos::deep_copy(data1_, constantValue);
1348 for (unsigned d=0; d<rank; d++)
1349 {
1350 extents_[d] = extents[d];
1351 }
1352 setActiveDims();
1353 }
1354
1357 :
1358 dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(0)
1359 {
1360 setActiveDims();
1361 }
1362
1364 KOKKOS_INLINE_FUNCTION
1366 {
1367 return blockPlusDiagonalLastNonDiagonal_;
1368 }
1369
1371 KOKKOS_INLINE_FUNCTION
1372 Kokkos::Array<int,7> getExtents() const
1373 {
1374 return extents_;
1375 }
1376
1378 KOKKOS_INLINE_FUNCTION
1379 DimensionInfo getDimensionInfo(const int &dim) const
1380 {
1381 DimensionInfo dimInfo;
1382
1383 dimInfo.logicalExtent = extent_int(dim);
1384 dimInfo.variationType = variationType_[dim];
1385 dimInfo.dataExtent = getDataExtent(dim);
1386 dimInfo.variationModulus = variationModulus_[dim];
1387
1388 if (dimInfo.variationType == BLOCK_PLUS_DIAGONAL)
1389 {
1390 dimInfo.blockPlusDiagonalLastNonDiagonal = blockPlusDiagonalLastNonDiagonal_;
1391 }
1392 return dimInfo;
1393 }
1394
1396 KOKKOS_INLINE_FUNCTION
1397 DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
1398 {
1399 const DimensionInfo myDimInfo = getDimensionInfo(dim);
1400 const DimensionInfo otherDimInfo = otherData.getDimensionInfo(dim);
1401
1402 return combinedDimensionInfo(myDimInfo, otherDimInfo);
1403 }
1404
1406 template<int rank>
1407 KOKKOS_INLINE_FUNCTION
1408 enable_if_t<rank==1, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1410 {
1411 #ifdef HAVE_INTREPID2_DEBUG
1412 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1413 #endif
1414 return data1_;
1415 }
1416
1418 template<int rank>
1419 KOKKOS_INLINE_FUNCTION
1420 enable_if_t<rank==2, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1422 {
1423 #ifdef HAVE_INTREPID2_DEBUG
1424 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1425 #endif
1426 return data2_;
1427 }
1428
1430 template<int rank>
1431 KOKKOS_INLINE_FUNCTION
1432 enable_if_t<rank==3, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1434 {
1435 #ifdef HAVE_INTREPID2_DEBUG
1436 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1437 #endif
1438 return data3_;
1439 }
1440
1442 template<int rank>
1443 KOKKOS_INLINE_FUNCTION
1444 enable_if_t<rank==4, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1446 {
1447 #ifdef HAVE_INTREPID2_DEBUG
1448 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1449 #endif
1450 return data4_;
1451 }
1452
1454 template<int rank>
1455 KOKKOS_INLINE_FUNCTION
1456 enable_if_t<rank==5, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1458 {
1459 #ifdef HAVE_INTREPID2_DEBUG
1460 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1461 #endif
1462 return data5_;
1463 }
1464
1466 template<int rank>
1467 KOKKOS_INLINE_FUNCTION
1468 enable_if_t<rank==6, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1470 {
1471 #ifdef HAVE_INTREPID2_DEBUG
1472 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1473 #endif
1474 return data6_;
1475 }
1476
1478 template<int rank>
1479 KOKKOS_INLINE_FUNCTION
1480 enable_if_t<rank==7, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1482 {
1483 #ifdef HAVE_INTREPID2_DEBUG
1484 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1485 #endif
1486 return data7_;
1487 }
1488
1490 KOKKOS_INLINE_FUNCTION
1491 const Kokkos::View<DataScalar*, DeviceType> & getUnderlyingView1() const
1492 {
1493 return getUnderlyingView<1>();
1494 }
1495
1497 KOKKOS_INLINE_FUNCTION
1498 const Kokkos::View<DataScalar**, DeviceType> & getUnderlyingView2() const
1499 {
1500 return getUnderlyingView<2>();
1501 }
1502
1504 KOKKOS_INLINE_FUNCTION
1505 const Kokkos::View<DataScalar***, DeviceType> & getUnderlyingView3() const
1506 {
1507 return getUnderlyingView<3>();
1508 }
1509
1511 KOKKOS_INLINE_FUNCTION
1512 const Kokkos::View<DataScalar****, DeviceType> & getUnderlyingView4() const
1513 {
1514 return getUnderlyingView<4>();
1515 }
1516
1518 KOKKOS_INLINE_FUNCTION
1519 const Kokkos::View<DataScalar*****, DeviceType> & getUnderlyingView5() const
1520 {
1521 return getUnderlyingView<5>();
1522 }
1523
1525 KOKKOS_INLINE_FUNCTION
1526 const Kokkos::View<DataScalar******, DeviceType> & getUnderlyingView6() const
1527 {
1528 return getUnderlyingView<6>();
1529 }
1530
1532 KOKKOS_INLINE_FUNCTION
1533 const Kokkos::View<DataScalar*******, DeviceType> & getUnderlyingView7() const
1534 {
1535 return getUnderlyingView<7>();
1536 }
1537
1539 KOKKOS_INLINE_FUNCTION
1540 void setUnderlyingView1(Kokkos::View<DataScalar*, DeviceType> & view) const
1541 {
1542 data1_ = view;
1543 }
1544
1546 KOKKOS_INLINE_FUNCTION
1547 void setUnderlyingView2(Kokkos::View<DataScalar**, DeviceType> & view) const
1548 {
1549 data2_ = view;
1550 }
1551
1553 KOKKOS_INLINE_FUNCTION
1554 void setUnderlyingView3(Kokkos::View<DataScalar***, DeviceType> & view) const
1555 {
1556 data3_ = view;
1557 }
1558
1560 KOKKOS_INLINE_FUNCTION
1561 void setUnderlyingView4(Kokkos::View<DataScalar****, DeviceType> & view) const
1562 {
1563 data4_ = view;
1564 }
1565
1567 KOKKOS_INLINE_FUNCTION
1568 void setUnderlyingView5(Kokkos::View<DataScalar*****, DeviceType> & view) const
1569 {
1570 data5_ = view;
1571 }
1572
1574 KOKKOS_INLINE_FUNCTION
1575 void setUnderlyingView6(Kokkos::View<DataScalar******, DeviceType> & view) const
1576 {
1577 data6_ = view;
1578 }
1579
1581 KOKKOS_INLINE_FUNCTION
1582 void setUnderlyingView7(Kokkos::View<DataScalar*******, DeviceType> & view) const
1583 {
1584 data7_ = view;
1585 }
1586
1588 ScalarView<DataScalar,DeviceType> getUnderlyingView() const
1589 {
1590 switch (dataRank_)
1591 {
1592 case 1: return data1_;
1593 case 2: return data2_;
1594 case 3: return data3_;
1595 case 4: return data4_;
1596 case 5: return data5_;
1597 case 6: return data6_;
1598 case 7: return data7_;
1599 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1600 }
1601 }
1602
1604 KOKKOS_INLINE_FUNCTION
1605 ordinal_type getUnderlyingViewRank() const
1606 {
1607 return dataRank_;
1608 }
1609
1611 KOKKOS_INLINE_FUNCTION
1612 ordinal_type getUnderlyingViewSize() const
1613 {
1614 ordinal_type size = 1;
1615 for (ordinal_type r=0; r<dataRank_; r++)
1616 {
1617 size *= getUnderlyingViewExtent(r);
1618 }
1619 return size;
1620 }
1621
1623 ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying() const
1624 {
1625 switch (dataRank_)
1626 {
1627 case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", data1_.extent_int(0));
1628 case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", data2_.extent_int(0), data2_.extent_int(1));
1629 case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", data3_.extent_int(0), data3_.extent_int(1), data3_.extent_int(2));
1630 case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", data4_.extent_int(0), data4_.extent_int(1), data4_.extent_int(2), data4_.extent_int(3));
1631 case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", data5_.extent_int(0), data5_.extent_int(1), data5_.extent_int(2), data5_.extent_int(3), data5_.extent_int(4));
1632 case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", data6_.extent_int(0), data6_.extent_int(1), data6_.extent_int(2), data6_.extent_int(3), data6_.extent_int(4), data6_.extent_int(5));
1633 case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", data7_.extent_int(0), data7_.extent_int(1), data7_.extent_int(2), data7_.extent_int(3), data7_.extent_int(4), data7_.extent_int(5), data7_.extent_int(6));
1634 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1635 }
1636 }
1637
1639 template<class ... DimArgs>
1640 ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
1641 {
1642 switch (dataRank_)
1643 {
1644 case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", dims...);
1645 case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", dims...);
1646 case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", dims...);
1647 case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", dims...);
1648 case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", dims...);
1649 case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", dims...);
1650 case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", dims...);
1651 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1652 }
1653 }
1654
1656 void clear() const
1657 {
1658 switch (dataRank_)
1659 {
1660 case 1: Kokkos::deep_copy(data1_, 0.0); break;
1661 case 2: Kokkos::deep_copy(data2_, 0.0); break;
1662 case 3: Kokkos::deep_copy(data3_, 0.0); break;
1663 case 4: Kokkos::deep_copy(data4_, 0.0); break;
1664 case 5: Kokkos::deep_copy(data5_, 0.0); break;
1665 case 6: Kokkos::deep_copy(data6_, 0.0); break;
1666 case 7: Kokkos::deep_copy(data7_, 0.0); break;
1667 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1668 }
1669 }
1670
1672 void copyDataFromDynRankViewMatchingUnderlying(const ScalarView<DataScalar,DeviceType> &dynRankView) const
1673 {
1674// std::cout << "Entered copyDataFromDynRankViewMatchingUnderlying().\n";
1675 switch (dataRank_)
1676 {
1677 case 1: copyContainer(data1_,dynRankView); break;
1678 case 2: copyContainer(data2_,dynRankView); break;
1679 case 3: copyContainer(data3_,dynRankView); break;
1680 case 4: copyContainer(data4_,dynRankView); break;
1681 case 5: copyContainer(data5_,dynRankView); break;
1682 case 6: copyContainer(data6_,dynRankView); break;
1683 case 7: copyContainer(data7_,dynRankView); break;
1684 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1685 }
1686 }
1687
1689 KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
1690 {
1691 for (unsigned i=0; i<activeDims_.size(); i++)
1692 {
1693 if (activeDims_[i] == d)
1694 {
1695 return getUnderlyingViewExtent(i);
1696 }
1697 else if (activeDims_[i] > d)
1698 {
1699 return 1; // data does not vary in the specified dimension
1700 }
1701 }
1702 return 1; // data does not vary in the specified dimension
1703 }
1704
1716 KOKKOS_INLINE_FUNCTION
1717 int getVariationModulus(const int &d) const
1718 {
1719 return variationModulus_[d];
1720 }
1721
1723 KOKKOS_INLINE_FUNCTION
1724 const Kokkos::Array<DataVariationType,7> & getVariationTypes() const
1725 {
1726 return variationType_;
1727 }
1728
1730 template<class ...IntArgs>
1731 KOKKOS_INLINE_FUNCTION
1732 return_type getEntry(const IntArgs&... intArgs) const
1733 {
1734 return getWritableEntry(intArgs...);
1735 }
1736
1737 template <bool...> struct bool_pack;
1738
1739 template <bool... v>
1740 using all_true = std::is_same<bool_pack<true, v...>, bool_pack<v..., true>>;
1741
1742 template <class ...IntArgs>
1743 using valid_args = all_true<std::is_integral<IntArgs>{}...>;
1744
1745 static_assert(valid_args<int,long,unsigned>::value, "valid args works");
1746
1748 template <class ...IntArgs>
1749 KOKKOS_INLINE_FUNCTION
1750#ifndef __INTEL_COMPILER
1751 // icc has a bug that prevents compilation with this enable_if_t
1752 // (possibly the same as https://community.intel.com/t5/Intel-C-Compiler/Intel-Compiler-bug-while-deducing-template-arguments-inside/m-p/1164358)
1753 // so with icc we'll just skip the argument type/count check
1754 enable_if_t<valid_args<IntArgs...>::value && (sizeof...(IntArgs) <= 7),return_type>
1755#else
1756 return_type
1757#endif
1758 operator()(const IntArgs&... intArgs) const {
1759 return getEntry(intArgs...);
1760 }
1761
1763 KOKKOS_INLINE_FUNCTION
1764 int extent_int(const int& r) const
1765 {
1766 return extents_[r];
1767 }
1768
1769 template <typename iType>
1770 KOKKOS_INLINE_FUNCTION constexpr
1771 typename std::enable_if<std::is_integral<iType>::value, size_t>::type
1772 extent(const iType& r) const {
1773 return extents_[r];
1774 }
1775
1777 KOKKOS_INLINE_FUNCTION bool isDiagonal() const
1778 {
1779 if (blockPlusDiagonalLastNonDiagonal_ >= 1) return false;
1780 int numBlockPlusDiagonalTypes = 0;
1781 for (unsigned r = 0; r<variationType_.size(); r++)
1782 {
1783 const auto &entryType = variationType_[r];
1784 if (entryType == BLOCK_PLUS_DIAGONAL) numBlockPlusDiagonalTypes++;
1785 }
1786 // 2 BLOCK_PLUS_DIAGONAL entries, combined with blockPlusDiagonalLastNonDiagonal being -1 or 0 indicates diagonal
1787 if (numBlockPlusDiagonalTypes == 2) return true;
1788 else if (numBlockPlusDiagonalTypes == 0) return false; // no BLOCK_PLUS_DIAGONAL --> not a diagonal matrix
1789 else INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "Unexpected number of ranks marked as BLOCK_PLUS_DIAGONAL (should be 0 or 2)");
1790 return false; // statement should be unreachable; included because compilers don't necessarily recognize that fact...
1791 }
1792
1799 {
1800 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape");
1801 const int rank = A.rank();
1802 std::vector<DimensionInfo> dimInfo(rank);
1803 for (int d=0; d<rank; d++)
1804 {
1805 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(d) != B.extent_int(d), std::invalid_argument, "A and B must have the same logical shape");
1806 dimInfo[d] = A.combinedDataDimensionInfo(B, d);
1807 }
1808 Data<DataScalar,DeviceType> result(dimInfo);
1809 return result;
1810 }
1811
1818 static Data<DataScalar,DeviceType> allocateMatMatResult( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
1819 {
1820 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
1821 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
1822
1823 const int D1_DIM = A_MatData.rank() - 2;
1824 const int D2_DIM = A_MatData.rank() - 1;
1825
1826 const int A_rows = A_MatData.extent_int(D1_DIM);
1827 const int A_cols = A_MatData.extent_int(D2_DIM);
1828 const int B_rows = B_MatData.extent_int(D1_DIM);
1829 const int B_cols = B_MatData.extent_int(D2_DIM);
1830
1831 const int leftRows = transposeA ? A_cols : A_rows;
1832 const int leftCols = transposeA ? A_rows : A_cols;
1833 const int rightRows = transposeB ? B_cols : B_rows;
1834 const int rightCols = transposeB ? B_rows : B_cols;
1835
1836 INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument, "incompatible matrix dimensions");
1837
1838 Kokkos::Array<int,7> resultExtents; // logical extents
1839 Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
1840
1841 resultExtents[D1_DIM] = leftRows;
1842 resultExtents[D2_DIM] = rightCols;
1843 int resultBlockPlusDiagonalLastNonDiagonal = -1;
1844 if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1845 {
1846 // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1847 resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1848 resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1849 resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1850 }
1851
1852 const int resultRank = A_MatData.rank();
1853
1854 auto A_VariationTypes = A_MatData.getVariationTypes();
1855 auto B_VariationTypes = B_MatData.getVariationTypes();
1856
1857 Kokkos::Array<int,7> resultActiveDims;
1858 Kokkos::Array<int,7> resultDataDims;
1859 int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
1860 // the following loop is over the dimensions *prior* to matrix dimensions
1861 for (int i=0; i<resultRank-2; i++)
1862 {
1863 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.extent_int(i) != B_MatData.extent_int(i), std::invalid_argument, "A and B extents must match in each non-matrix dimension");
1864
1865 resultExtents[i] = A_MatData.extent_int(i);
1866
1867 const DataVariationType &A_VariationType = A_VariationTypes[i];
1868 const DataVariationType &B_VariationType = B_VariationTypes[i];
1869
1870 // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
1871 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1872 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(B_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1873
1874 int dataSize = 0;
1875 DataVariationType resultVariationType;
1876 if ((A_VariationType == GENERAL) || (B_VariationType == GENERAL))
1877 {
1878 resultVariationType = GENERAL;
1879 dataSize = resultExtents[i];
1880 }
1881 else if ((B_VariationType == CONSTANT) && (A_VariationType == CONSTANT))
1882 {
1883 resultVariationType = CONSTANT;
1884 dataSize = 1;
1885 }
1886 else if ((B_VariationType == MODULAR) && (A_VariationType == CONSTANT))
1887 {
1888 resultVariationType = MODULAR;
1889 dataSize = B_MatData.getVariationModulus(i);
1890 }
1891 else if ((B_VariationType == CONSTANT) && (A_VariationType == MODULAR))
1892 {
1893 resultVariationType = MODULAR;
1894 dataSize = A_MatData.getVariationModulus(i);
1895 }
1896 else
1897 {
1898 // both are modular. We allow this if they agree on the modulus
1899 auto A_Modulus = A_MatData.getVariationModulus(i);
1900 auto B_Modulus = B_MatData.getVariationModulus(i);
1901 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_Modulus != B_Modulus, std::invalid_argument, "If both matrices have variation type MODULAR, they must agree on the modulus");
1902 resultVariationType = MODULAR;
1903 dataSize = A_Modulus;
1904 }
1905 resultVariationTypes[i] = resultVariationType;
1906
1907 if (resultVariationType != CONSTANT)
1908 {
1909 resultActiveDims[resultNumActiveDims] = i;
1910 resultDataDims[resultNumActiveDims] = dataSize;
1911 resultNumActiveDims++;
1912 }
1913 }
1914
1915 // set things for final dimensions:
1916 resultExtents[D1_DIM] = leftRows;
1917 resultExtents[D2_DIM] = rightCols;
1918
1919 if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1920 {
1921 // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1922 resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1923 resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1924 resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1925
1926 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1927
1928 const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1929 const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1930
1931 resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1932 resultNumActiveDims++;
1933 }
1934 else
1935 {
1936 // pretty much the only variation types that make sense for matrix dims are GENERAL and BLOCK_PLUS_DIAGONAL
1937 resultVariationTypes[D1_DIM] = GENERAL;
1938 resultVariationTypes[D2_DIM] = GENERAL;
1939
1940 resultActiveDims[resultNumActiveDims] = resultRank - 2;
1941 resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
1942
1943 resultDataDims[resultNumActiveDims] = leftRows;
1944 resultDataDims[resultNumActiveDims+1] = rightCols;
1945 resultNumActiveDims += 2;
1946 }
1947
1948 for (int i=resultRank; i<7; i++)
1949 {
1950 resultVariationTypes[i] = CONSTANT;
1951 resultExtents[i] = 1;
1952 }
1953
1954 ScalarView<DataScalar,DeviceType> data;
1955 if (resultNumActiveDims == 1)
1956 {
1957 auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any
1958 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]);
1959 }
1960 else if (resultNumActiveDims == 2)
1961 {
1962 auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any
1963 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]);
1964 }
1965 else if (resultNumActiveDims == 3)
1966 {
1967 auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any
1968 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
1969 }
1970 else if (resultNumActiveDims == 4)
1971 {
1972 auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any
1973 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1974 resultDataDims[3]);
1975 }
1976 else if (resultNumActiveDims == 5)
1977 {
1978 auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any
1979 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1980 resultDataDims[3], resultDataDims[4]);
1981 }
1982 else if (resultNumActiveDims == 6)
1983 {
1984 auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any
1985 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1986 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
1987 }
1988 else // resultNumActiveDims == 7
1989 {
1990 auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any
1991 data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1992 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
1993 }
1994
1995 return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal);
1996 }
1997
2001 {
2002 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2003 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matData.rank() != vecData.rank() + 1, std::invalid_argument, "matData and vecData have incompatible ranks");
2004 const int vecDim = vecData.extent_int(vecData.rank() - 1);
2005 const int matRows = matData.extent_int(matData.rank() - 2);
2006 const int matCols = matData.extent_int(matData.rank() - 1);
2007
2008 const int resultRank = vecData.rank();
2009
2010 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matCols != vecDim, std::invalid_argument, "matData column count != vecData dimension");
2011
2012 Kokkos::Array<int,7> resultExtents; // logical extents
2013 Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
2014 auto vecVariationTypes = vecData.getVariationTypes();
2015 auto matVariationTypes = matData.getVariationTypes();
2016
2017 Kokkos::Array<int,7> resultActiveDims;
2018 Kokkos::Array<int,7> resultDataDims;
2019 int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
2020 // the following loop is over the dimensions *prior* to matrix/vector dimensions
2021 for (int i=0; i<resultRank-1; i++)
2022 {
2023 resultExtents[i] = vecData.extent_int(i);
2024 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecData.extent_int(i) != matData.extent_int(i), std::invalid_argument, "matData and vecData extents must match in each non-matrix/vector dimension");
2025
2026 const DataVariationType &vecVariationType = vecVariationTypes[i];
2027 const DataVariationType &matVariationType = matVariationTypes[i];
2028
2029 // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
2030 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2031 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2032
2033 int dataSize = 0;
2034 DataVariationType resultVariationType;
2035 if ((vecVariationType == GENERAL) || (matVariationType == GENERAL))
2036 {
2037 resultVariationType = GENERAL;
2038 dataSize = resultExtents[i];
2039 }
2040 else if ((matVariationType == CONSTANT) && (vecVariationType == CONSTANT))
2041 {
2042 resultVariationType = CONSTANT;
2043 dataSize = 1;
2044 }
2045 else if ((matVariationType == MODULAR) && (vecVariationType == CONSTANT))
2046 {
2047 resultVariationType = MODULAR;
2048 dataSize = matData.getVariationModulus(i);
2049 }
2050 else if ((matVariationType == CONSTANT) && (vecVariationType == MODULAR))
2051 {
2052 resultVariationType = MODULAR;
2053 dataSize = matData.getVariationModulus(i);
2054 }
2055 else
2056 {
2057 // both are modular. We allow this if they agree on the modulus
2058 auto matModulus = matData.getVariationModulus(i);
2059 auto vecModulus = vecData.getVariationModulus(i);
2060 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matModulus != vecModulus, std::invalid_argument, "If matrix and vector both have variation type MODULAR, they must agree on the modulus");
2061 resultVariationType = MODULAR;
2062 dataSize = matModulus;
2063 }
2064 resultVariationTypes[i] = resultVariationType;
2065
2066 if (resultVariationType != CONSTANT)
2067 {
2068 resultActiveDims[resultNumActiveDims] = i;
2069 resultDataDims[resultNumActiveDims] = dataSize;
2070 resultNumActiveDims++;
2071 }
2072 }
2073 // for the final dimension, the variation type is always GENERAL
2074 // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…)
2075 resultVariationTypes[resultNumActiveDims] = GENERAL;
2076 resultActiveDims[resultNumActiveDims] = resultRank - 1;
2077 resultDataDims[resultNumActiveDims] = matRows;
2078 resultExtents[resultRank-1] = matRows;
2079 resultNumActiveDims++;
2080
2081 for (int i=resultRank; i<7; i++)
2082 {
2083 resultVariationTypes[i] = CONSTANT;
2084 resultExtents[i] = 1;
2085 }
2086
2087 ScalarView<DataScalar,DeviceType> data;
2088 if (resultNumActiveDims == 1)
2089 {
2090 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0]);
2091 }
2092 else if (resultNumActiveDims == 2)
2093 {
2094 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1]);
2095 }
2096 else if (resultNumActiveDims == 3)
2097 {
2098 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2099 }
2100 else if (resultNumActiveDims == 4)
2101 {
2102 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2103 resultDataDims[3]);
2104 }
2105 else if (resultNumActiveDims == 5)
2106 {
2107 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2108 resultDataDims[3], resultDataDims[4]);
2109 }
2110 else if (resultNumActiveDims == 6)
2111 {
2112 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2113 resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2114 }
2115 else // resultNumActiveDims == 7
2116 {
2117 data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2118 resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2119 }
2120
2121 return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes);
2122 }
2123
2125 template<int rank>
2126 enable_if_t<(rank!=1) && (rank!=7), Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<rank>> >
2128 {
2129 using ExecutionSpace = typename DeviceType::execution_space;
2130 Kokkos::Array<int,rank> startingOrdinals;
2131 Kokkos::Array<int,rank> extents;
2132
2133 for (int d=0; d<rank; d++)
2134 {
2135 startingOrdinals[d] = 0;
2136 extents[d] = getDataExtent(d);
2137 }
2138 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<rank>>(startingOrdinals,extents);
2139 return policy;
2140 }
2141
2143 template<int rank>
2144 enable_if_t<rank==7, Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<6>> >
2146 {
2147 using ExecutionSpace = typename DeviceType::execution_space;
2148 Kokkos::Array<int,6> startingOrdinals;
2149 Kokkos::Array<int,6> extents;
2150
2151 for (int d=0; d<6; d++)
2152 {
2153 startingOrdinals[d] = 0;
2154 extents[d] = getDataExtent(d);
2155 }
2156 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<6>>(startingOrdinals,extents);
2157 return policy;
2158 }
2159
2160 template<int rank>
2161 inline
2162 enable_if_t<rank==1, Kokkos::RangePolicy<typename DeviceType::execution_space> >
2164 {
2165 using ExecutionSpace = typename DeviceType::execution_space;
2166 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,getDataExtent(0));
2167 return policy;
2168 }
2169
2171 template<class BinaryOperator>
2172 void storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
2173 {
2174 using ExecutionSpace = typename DeviceType::execution_space;
2175
2176#ifdef INTREPID2_HAVE_DEBUG
2177 // check logical extents
2178 for (int d=0; d<rank_; d++)
2179 {
2180 INTREPID2_TEST_FOR_EXCEPTION(A.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2181 INTREPID2_TEST_FOR_EXCEPTION(B.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2182 }
2183 // TODO: add some checks that data extent of this suffices to accept combined A + B data.
2184#endif
2185
2186 const bool this_constant = (this->getUnderlyingViewRank() == 1) && (this->getUnderlyingViewSize() == 1);
2187
2188 // we special-case for constant output here; since the constant case is essentially all overhead, we want to avoid as much of the overhead of storeInPlaceCombination() as possible…
2189 if (this_constant)
2190 {
2191 // constant data
2192 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1); // just 1 entry
2193 auto this_underlying = this->getUnderlyingView<1>();
2194 auto A_underlying = A.getUnderlyingView<1>();
2195 auto B_underlying = B.getUnderlyingView<1>();
2196 Kokkos::parallel_for("compute in-place", policy,
2197 KOKKOS_LAMBDA (const int &i0) {
2198 auto & result = this_underlying(0);
2199 const auto & A_val = A_underlying(0);
2200 const auto & B_val = B_underlying(0);
2201
2202 result = binaryOperator(A_val,B_val);
2203 });
2204 }
2205 else
2206 {
2207 switch (rank_)
2208 {
2209 case 1: storeInPlaceCombination<BinaryOperator, 1>(A, B, binaryOperator); break;
2210 case 2: storeInPlaceCombination<BinaryOperator, 2>(A, B, binaryOperator); break;
2211 case 3: storeInPlaceCombination<BinaryOperator, 3>(A, B, binaryOperator); break;
2212 case 4: storeInPlaceCombination<BinaryOperator, 4>(A, B, binaryOperator); break;
2213 case 5: storeInPlaceCombination<BinaryOperator, 5>(A, B, binaryOperator); break;
2214 case 6: storeInPlaceCombination<BinaryOperator, 6>(A, B, binaryOperator); break;
2215 case 7: storeInPlaceCombination<BinaryOperator, 7>(A, B, binaryOperator); break;
2216 default:
2217 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "unhandled rank in switch");
2218 }
2219 }
2220 }
2221
2224 {
2225 auto sum = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2226 {
2227 return a + b;
2228 };
2229 storeInPlaceCombination(A, B, sum);
2230 }
2231
2234 {
2235 auto product = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2236 {
2237 return a * b;
2238 };
2239 storeInPlaceCombination(A, B, product);
2240 }
2241
2244 {
2245 auto difference = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2246 {
2247 return a - b;
2248 };
2249 storeInPlaceCombination(A, B, difference);
2250 }
2251
2254 {
2255 auto quotient = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2256 {
2257 return a / b;
2258 };
2259 storeInPlaceCombination(A, B, quotient);
2260 }
2261
2264 {
2265 // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2266 // TODO: check for invalidly shaped containers.
2267
2268 const int matRows = matData.extent_int(matData.rank() - 2);
2269 const int matCols = matData.extent_int(matData.rank() - 1);
2270
2271 // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2272 Data<DataScalar,DeviceType> thisData = *this;
2273
2274 using ExecutionSpace = typename DeviceType::execution_space;
2275 // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2276 if (rank_ == 3)
2277 {
2278 // typical case for e.g. gradient data: (C,P,D)
2279 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{getDataExtent(0),getDataExtent(1),matRows});
2280 Kokkos::parallel_for("compute mat-vec", policy,
2281 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &i) {
2282 auto & val_i = thisData.getWritableEntry(cellOrdinal, pointOrdinal, i);
2283 val_i = 0;
2284 for (int j=0; j<matCols; j++)
2285 {
2286 val_i += matData(cellOrdinal,pointOrdinal,i,j) * vecData(cellOrdinal,pointOrdinal,j);
2287 }
2288 });
2289 }
2290 else if (rank_ == 2)
2291 {
2292 //
2293 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{getDataExtent(0),matRows});
2294 Kokkos::parallel_for("compute mat-vec", policy,
2295 KOKKOS_LAMBDA (const int &vectorOrdinal, const int &i) {
2296 auto & val_i = thisData.getWritableEntry(vectorOrdinal, i);
2297 val_i = 0;
2298 for (int j=0; j<matCols; j++)
2299 {
2300 val_i += matData(vectorOrdinal,i,j) * vecData(vectorOrdinal,j);
2301 }
2302 });
2303 }
2304 else if (rank_ == 1)
2305 {
2306 // single-vector case
2307 Kokkos::RangePolicy<ExecutionSpace> policy(0,matRows);
2308 Kokkos::parallel_for("compute mat-vec", policy,
2309 KOKKOS_LAMBDA (const int &i) {
2310 auto & val_i = thisData.getWritableEntry(i);
2311 val_i = 0;
2312 for (int j=0; j<matCols; j++)
2313 {
2314 val_i += matData(i,j) * vecData(j);
2315 }
2316 });
2317 }
2318 else
2319 {
2320 // TODO: handle other cases
2321 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2322 }
2323 }
2324
2331 void storeMatMat( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
2332 {
2333 // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2334 // TODO: check for invalidly shaped containers.
2335
2336 // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2337 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
2338
2339 const int D1_DIM = A_MatData.rank() - 2;
2340 const int D2_DIM = A_MatData.rank() - 1;
2341
2342 const int A_rows = A_MatData.extent_int(D1_DIM);
2343 const int A_cols = A_MatData.extent_int(D2_DIM);
2344 const int B_rows = B_MatData.extent_int(D1_DIM);
2345 const int B_cols = B_MatData.extent_int(D2_DIM);
2346
2347 const int leftRows = transposeA ? A_cols : A_rows;
2348 const int leftCols = transposeA ? A_rows : A_cols;
2349 const int rightCols = transposeB ? B_rows : B_cols;
2350
2351#ifdef INTREPID2_HAVE_DEBUG
2352 const int rightRows = transposeB ? B_cols : B_rows;
2353 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(leftCols != rightRows, std::invalid_argument, "inner dimensions do not match");
2354#endif
2355
2356 // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2357 Data<DataScalar,DeviceType> thisData = *this;
2358
2359 using ExecutionSpace = typename DeviceType::execution_space;
2360
2361 const int diagonalStart = (variationType_[D1_DIM] == BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2362 // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2363 if (rank_ == 3)
2364 {
2365 // (C,D,D), say
2366 auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,getDataExtent(0));
2367 Kokkos::parallel_for("compute mat-mat", policy,
2368 KOKKOS_LAMBDA (const int &matrixOrdinal) {
2369 for (int i=0; i<diagonalStart; i++)
2370 {
2371 for (int j=0; j<rightCols; j++)
2372 {
2373 auto & val_ij = thisData.getWritableEntry(matrixOrdinal, i, j);
2374 val_ij = 0;
2375 for (int k=0; k<leftCols; k++)
2376 {
2377 const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2378 const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2379 val_ij += left * right;
2380 }
2381 }
2382 }
2383 for (int i=diagonalStart; i<leftRows; i++)
2384 {
2385 auto & val_ii = thisData.getWritableEntry(matrixOrdinal, i, i);
2386 const auto & left = A_MatData(matrixOrdinal,i,i);
2387 const auto & right = B_MatData(matrixOrdinal,i,i);
2388 val_ii = left * right;
2389 }
2390 });
2391 }
2392 else if (rank_ == 4)
2393 {
2394 // (C,P,D,D), perhaps
2395 auto policy = Kokkos::MDRangePolicy<ExecutionSpace, Kokkos::Rank<2> >({0,0},{getDataExtent(0),getDataExtent(1)});
2396 if (underlyingMatchesLogical_) // receiving data object is completely expanded
2397 {
2398 Kokkos::parallel_for("compute mat-mat", policy,
2399 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2400 for (int i=0; i<leftCols; i++)
2401 {
2402 for (int j=0; j<rightCols; j++)
2403 {
2404 auto & val_ij = thisData.getUnderlyingView4()(cellOrdinal,pointOrdinal, i, j);
2405 val_ij = 0;
2406 for (int k=0; k<leftCols; k++)
2407 {
2408 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2409 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2410 val_ij += left * right;
2411 }
2412 }
2413 }
2414 });
2415 }
2416 else
2417 {
2418 Kokkos::parallel_for("compute mat-mat", policy,
2419 KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2420 for (int i=0; i<diagonalStart; i++)
2421 {
2422 for (int j=0; j<rightCols; j++)
2423 {
2424 auto & val_ij = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, j);
2425 val_ij = 0;
2426 for (int k=0; k<leftCols; k++)
2427 {
2428 const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2429 const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2430 val_ij += left * right;
2431 }
2432 }
2433 }
2434 for (int i=diagonalStart; i<leftRows; i++)
2435 {
2436 auto & val_ii = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, i);
2437 const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2438 const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2439 val_ii = left * right;
2440 }
2441 });
2442 }
2443 }
2444 else
2445 {
2446 // TODO: handle other cases
2447 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2448 }
2449 }
2450
2452 KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
2453 {
2454 return extents_[0] > 0;
2455 }
2456
2458 KOKKOS_INLINE_FUNCTION
2459 unsigned rank() const
2460 {
2461 return rank_;
2462 }
2463
2470 void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
2471 {
2472 INTREPID2_TEST_FOR_EXCEPTION(variationType_[d] == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "setExtent is not supported for BLOCK_PLUS_DIAGONAL dimensions");
2473
2474 if (variationType_[d] == MODULAR)
2475 {
2476 bool dividesEvenly = ((newExtent / variationModulus_[d]) * variationModulus_[d] == newExtent);
2477 INTREPID2_TEST_FOR_EXCEPTION(!dividesEvenly, std::invalid_argument, "when setExtent is called on dimenisions with MODULAR variation, the modulus must divide the new extent evenly");
2478 }
2479
2480 if ((newExtent != extents_[d]) && (variationType_[d] == GENERAL))
2481 {
2482 // then we need to resize; let's determine the full set of new extents
2483 std::vector<ordinal_type> newExtents(dataRank_,-1);
2484 for (int r=0; r<dataRank_; r++)
2485 {
2486 if (activeDims_[r] == d)
2487 {
2488 // this is the changed dimension
2489 newExtents[r] = newExtent;
2490 }
2491 else
2492 {
2493 // unchanged; copy from existing
2494 newExtents[r] = getUnderlyingViewExtent(r);
2495 }
2496 }
2497
2498 switch (dataRank_)
2499 {
2500 case 1: Kokkos::resize(data1_,newExtents[0]);
2501 break;
2502 case 2: Kokkos::resize(data2_,newExtents[0],newExtents[1]);
2503 break;
2504 case 3: Kokkos::resize(data3_,newExtents[0],newExtents[1],newExtents[2]);
2505 break;
2506 case 4: Kokkos::resize(data4_,newExtents[0],newExtents[1],newExtents[2],newExtents[3]);
2507 break;
2508 case 5: Kokkos::resize(data5_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4]);
2509 break;
2510 case 6: Kokkos::resize(data6_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5]);
2511 break;
2512 case 7: Kokkos::resize(data7_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5],newExtents[6]);
2513 break;
2514 default: INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "Unexpected dataRank_ value");
2515 }
2516 }
2517
2518 extents_[d] = newExtent;
2519 }
2520
2522 KOKKOS_INLINE_FUNCTION
2524 {
2525 return underlyingMatchesLogical_;
2526 }
2527 };
2528}
2529
2530#endif /* Intrepid2_Data_h */
Header file with various static argument-extractor classes. These are useful for writing efficient,...
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDimensionInfo(const DimensionInfo &myData, const DimensionInfo &otherData)
Returns DimensionInfo for a Data container that combines (through multiplication, say,...
@ CONSTANT
does not vary
@ GENERAL
arbitrary variation
@ BLOCK_PLUS_DIAGONAL
one of two dimensions in a matrix; bottom-right part of matrix is diagonal
@ MODULAR
varies according to modulus of the index
Header function for Intrepid2::Util class and other utility functions.
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
void storeInPlaceProduct(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) product, A .* B, into this container.
static Data< DataScalar, DeviceType > allocateMatMatResult(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION int getVariationModulus(const int &d) const
Variation modulus accessor.
enable_if_t< rank==7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expect...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==6, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 6.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1.
KOKKOS_INLINE_FUNCTION reference_type getWritableEntry(const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view....
void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
void copyDataFromDynRankViewMatchingUnderlying(const ScalarView< DataScalar, DeviceType > &dynRankView) const
Copies from the provided DynRankView into the underlying Kokkos::View container storing the unique da...
enable_if_t<(rank!=1) &&(rank!=7), Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< rank > > > dataExtentRangePolicy()
returns an MDRangePolicy over the underlying data extents (but with the logical shape).
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar **, DeviceType > & getUnderlyingView2() const
returns the View that stores the unique data. For rank-2 underlying containers.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 2.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ****, DeviceType > & getUnderlyingView4() const
returns the View that stores the unique data. For rank-4 underlying containers.
ScalarView< DataScalar, DeviceType > getUnderlyingView() const
Returns a DynRankView constructed atop the same underlying data as the fixed-rank Kokkos::View used i...
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
returns true for containers that have data; false for those that don't (namely, those that have been ...
Data()
default constructor (empty data)
KOKKOS_INLINE_FUNCTION void setUnderlyingView1(Kokkos::View< DataScalar *, DeviceType > &view) const
sets the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView3(Kokkos::View< DataScalar ***, DeviceType > &view) const
sets the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ***, DeviceType > & getUnderlyingView3() const
returns the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView2(Kokkos::View< DataScalar **, DeviceType > &view) const
sets the View that stores the unique data. For rank-2 underlying containers.
void clear() const
Copies 0.0 to the underlying View.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==7, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 7.
KOKKOS_INLINE_FUNCTION bool isDiagonal() const
returns true for containers that have two dimensions marked as BLOCK_PLUS_DIAGONAL for which the non-...
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying() const
Returns a DynRankView that matches the underlying Kokkos::View object in value_type,...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 3.
KOKKOS_INLINE_FUNCTION return_type getEntry(const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ******, DeviceType > & getUnderlyingView6() const
returns the View that stores the unique data. For rank-6 underlying containers.
Data(const ScalarView< DataScalar, DeviceType > &data, int rank, Kokkos::Array< int, 7 > extents, Kokkos::Array< DataVariationType, 7 > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
DynRankView constructor. Will copy to a View of appropriate rank.
static void copyContainer(ToContainer to, FromContainer from)
Generic data copying method to allow construction of Data object from DynRankViews for which deep_cop...
KOKKOS_INLINE_FUNCTION const int & blockPlusDiagonalLastNonDiagonal() const
For a Data object containing data with variation type BLOCK_PLUS_DIAGONAL, returns the row and column...
void storeInPlaceSum(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) sum, A .+ B, into this container.
void applyOperator(UnaryOperator unaryOperator)
applies the specified unary operator to each entry
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
Returns flattened index of the specified (i,i) matrix entry, assuming that i > lastNondiagonal....
void storeMatVec(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
Places the result of a matrix-vector multiply corresponding to the two provided containers into this ...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
Returns the number of non-diagonal entries based on the last non-diagonal. Only applicable for BLOCK_...
Data(ScalarView< DataScalar, DeviceType > data)
copy constructor modeled after the copy-like constructor above. Not as efficient as the implicit copy...
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
KOKKOS_INLINE_FUNCTION enable_if_t< rank==5, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 5.
void storeInPlaceQuotient(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) quotient, A ./ B, into this container.
static Data< DataScalar, DeviceType > allocateInPlaceCombinationResult(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
KOKKOS_INLINE_FUNCTION const Kokkos::Array< DataVariationType, 7 > & getVariationTypes() const
Returns an array with the variation types in each logical dimension.
KOKKOS_INLINE_FUNCTION void setUnderlyingView6(Kokkos::View< DataScalar ******, DeviceType > &view) const
sets the View that stores the unique data. For rank-6 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView4(Kokkos::View< DataScalar ****, DeviceType > &view) const
sets the View that stores the unique data. For rank-4 underlying containers.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *, DeviceType > & getUnderlyingView1() const
returns the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *******, DeviceType > & getUnderlyingView7() const
returns the View that stores the unique data. For rank-7 underlying containers.
void storeMatMat(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION Kokkos::Array< int, 7 > getExtents() const
Returns an array containing the logical extents in each dimension.
KOKKOS_INLINE_FUNCTION void setUnderlyingView7(Kokkos::View< DataScalar *******, DeviceType > &view) const
sets the View that stores the unique data. For rank-7 underlying containers.
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
Returns a DynRankView that matches the underlying Kokkos::View object value_type and layout,...
KOKKOS_INLINE_FUNCTION void setUnderlyingView5(Kokkos::View< DataScalar *****, DeviceType > &view) const
sets the View that stores the unique data. For rank-5 underlying containers.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==4, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 4.
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
void allocateAndCopyFromDynRankView(ScalarView< DataScalar, DeviceType > data)
allocate an underlying View that matches the provided DynRankView in dimensions, and copy....
void storeInPlaceDifference(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) difference, A .- B, into this container.
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
//! Returns flattened index of the specified (i,j) matrix entry, assuming that i,j ≤ lastNondiagonal....
KOKKOS_INLINE_FUNCTION int getUnderlyingViewExtent(const int &dim) const
Returns the extent of the underlying view in the specified dimension.
void setActiveDims()
class initialization method. Called by constructors.
Data(const Data< DataScalar, OtherDeviceType > &data)
copy-like constructor for differing execution spaces. This does a deep_copy of the underlying view.
void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
sets the logical extent in the specified dimension. If needed, the underlying data container is resiz...
void storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
Places the result of an in-place combination (e.g., entrywise sum) into this data container.
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
Returns (DataVariationType, data extent) in the specified dimension for a Data container that combine...
static Data< DataScalar, DeviceType > allocateMatVecResult(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
KOKKOS_INLINE_FUNCTION DimensionInfo getDimensionInfo(const int &dim) const
Returns an object fully specifying the indicated dimension. This is used in determining appropriate s...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *****, DeviceType > & getUnderlyingView5() const
returns the View that stores the unique data. For rank-5 underlying containers.
Data(std::vector< DimensionInfo > dimInfoVector)
Constructor in terms of DimensionInfo for each logical dimension; does not require a View to be speci...
enable_if_t< rank==7, Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< 6 > > > dataExtentRangePolicy()
returns an MDRangePolicy over the first six underlying data extents (but with the logical shape).
Data(DataScalar constantValue, Kokkos::Array< int, rank > extents)
constructor for everywhere-constant data
Data(const Kokkos::DynRankView< DataScalar, DeviceType, DynRankViewProperties... > &data, Kokkos::Array< int, rank > extents, Kokkos::Array< DataVariationType, rank > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
Constructor that accepts a DynRankView as an argument. The data belonging to the DynRankView will be ...
enable_if_t< rank !=7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION enable_if_t< valid_args< IntArgs... >::value &&(sizeof...(IntArgs)<=7), return_type > operator()(const IntArgs &... intArgs) const
Returns a value corresponding to the specified logical data location.
A singleton class for a DynRankView containing exactly one zero entry. (Technically,...
Argument extractor class which ignores the input arguments in favor of passing a single 0 argument to...
For use with Data object into which a value will be stored.
Struct expressing all variation information about a Data object in a single dimension,...
Argument extractor class which passes all arguments to the provided container.
Argument extractor class which passes a single argument, indicated by the template parameter whichArg...