Tpetra parallel linear algebra Version of the Day
Tpetra_Details_FixedHashTable_def.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45#define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46
47#include "Tpetra_Details_FixedHashTable_decl.hpp"
49#ifdef TPETRA_USE_MURMUR_HASH
50# include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51#endif // TPETRA_USE_MURMUR_HASH
52#include "Kokkos_ArithTraits.hpp"
53#include "Teuchos_TypeNameTraits.hpp"
55#include <type_traits>
56
57namespace Tpetra {
58namespace Details {
59//
60// This namespace stores utility functions and Kokkos
61// functors for use in FixedHashTable construction.
62//
63namespace FHT {
64
65
66// Is it worth actually using building the FixedHashTable using
67// parallel threads, instead of just counting in a sequential loop?
68//
69// The parallel version of FixedHashTable construction isn't just a
70// parallelization of the sequential loops. It incurs additional
71// overheads. For example, the CountBuckets kernel uses atomic update
72// instructions to count the number of "buckets" per offsets array
73// (ptr) entry. Atomic updates have overhead, even if only one thread
74// issues them. The Kokkos kernels are still correct in that case,
75// but I would rather not incur overhead then. It might make sense to
76// set the minimum number of threads to something greater than 1, but
77// we would need experiments to find out.
78template<class ExecSpace>
79bool worthBuildingFixedHashTableInParallel () {
80 return ExecSpace::concurrency() > 1;
81}
82
83//
84// Functors for FixedHashTable initialization
85//
86// NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
87// should consider replacing all of these functors with in-line
88// lambdas. The only issue is that we would need to bake the
89// execution space into the policy, since the default execution space
90// might differ from the one Tpetra wants to use.
91
101template<class CountsViewType,
102 class KeysViewType,
103 class SizeType = typename KeysViewType::size_type>
105public:
106 typedef CountsViewType counts_view_type;
107 typedef KeysViewType keys_view_type;
108 typedef typename CountsViewType::execution_space execution_space;
109 typedef typename CountsViewType::memory_space memory_space;
110 typedef SizeType size_type;
111 typedef typename keys_view_type::non_const_value_type key_type;
112 // mfh 21 May 2015: Having a device_type typedef in the functor
113 // along with an execution_space typedef causes compilation issues.
114 // This is because one of Kokkos' partial specializations picks up
115 // on the device_type typedef, and another picks up on the
116 // execution_space typedef. The former is a legacy of a previous
117 // design iteration of Kokkos, which did not separate memory and
118 // execution spaces.
120
126 CountBuckets (const counts_view_type& counts,
127 const keys_view_type& keys,
128 const size_type size) :
129 counts_ (counts),
130 keys_ (keys),
131 size_ (size)
132 {}
133
138 KOKKOS_INLINE_FUNCTION void
139 operator () (const size_type& i) const
140 {
141 typedef typename hash_type::result_type hash_value_type;
142
143 const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
144 Kokkos::atomic_increment (&counts_[hashVal]);
145 }
146
147 using value_type = Kokkos::pair<int, key_type>;
148
152 KOKKOS_INLINE_FUNCTION void
153 operator () (const size_type& i, value_type& dst) const
154 {
155 using hash_value_type = typename hash_type::result_type;
156
157 const key_type keyVal = keys_[i];
158 const hash_value_type hashVal = hash_type::hashFunc (keyVal, size_);
159 if (hashVal < hash_value_type (0) ||
160 hashVal >= hash_value_type (counts_.extent (0))) {
161 dst.first = 1;
162 dst.second = keyVal;
163 }
164 else {
165 Kokkos::atomic_increment (&counts_[hashVal]);
166 }
167 }
168
170 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
171 {
172 dst.first = 0;
173 dst.second = key_type (0);
174 }
175
176 KOKKOS_INLINE_FUNCTION void
177 join (volatile value_type& dst,
178 const volatile value_type& src) const
180 const bool good = dst.first == 0 || src.first == 0;
181 dst.first = good ? 0 : dst.first;
182 // leave dst.second as it is, to get the "first" key
183 }
184
185private:
187 counts_view_type counts_;
189 keys_view_type keys_;
191 size_type size_;
192};
193
204template<class KeyType>
206 KOKKOS_INLINE_FUNCTION
207 FillPairsResult () :
208 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
209 // min() for a floating-point type returns the minimum _positive_
210 // normalized value. This is different than for integer types.
211 // lowest() is new in C++11 and returns the least value, always
212 // negative for signed finite types.
213 //
214 // mfh 23 May 2015: I have heard reports that
215 // std::numeric_limits<int>::lowest() does not exist with the
216 // Intel compiler. I'm not sure if the users in question actually
217 // enabled C++11. However, it's easy enough to work around this
218 // issue. The standard floating-point types are signed and have a
219 // sign bit, so lowest() is just -max(). For integer types, we
220 // can use min() instead.
221 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
222 ::Kokkos::Details::ArithTraits<KeyType>::min () :
223 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
224 success_ (true)
225 {
226 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
227 "KeyType must be some kind of number type.");
228 }
229
230 KOKKOS_INLINE_FUNCTION
231 FillPairsResult (const KeyType& initMinKey,
232 const KeyType& initMaxKey) :
233 minKey_ (initMinKey),
234 maxKey_ (initMaxKey),
235 success_ (true)
236 {
237 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
238 "KeyType must be some kind of number type.");
239 }
240
241 KeyType minKey_;
242 KeyType maxKey_;
243 bool success_;
244};
245
275template<class PairsViewType,
276 class KeysViewType,
277 class CountsViewType,
278 class SizeType = typename KeysViewType::size_type>
280public:
281 typedef typename CountsViewType::non_const_type counts_view_type;
282 typedef typename counts_view_type::const_type offsets_view_type;
283
284 typedef PairsViewType pairs_view_type;
285 typedef typename KeysViewType::const_type keys_view_type;
286 typedef typename offsets_view_type::execution_space execution_space;
287 typedef typename offsets_view_type::memory_space memory_space;
288 typedef SizeType size_type;
289
290 typedef typename keys_view_type::non_const_value_type key_type;
291 typedef typename pairs_view_type::non_const_value_type pair_type;
292
294
295 // mfh 23 May 2015: Having a device_type typedef in the functor
296 // along with an execution_space typedef causes compilation issues.
297 // This is because one of Kokkos' partial specializations picks up
298 // on the device_type typedef, and another picks up on the
299 // execution_space typedef. The former is a legacy of a previous
300 // design iteration of Kokkos, which did not separate memory and
301 // execution spaces.
303
314 FillPairs (const pairs_view_type& pairs,
315 const counts_view_type& counts,
316 const offsets_view_type& ptr,
317 const keys_view_type& keys,
318 const typename pair_type::second_type startingValue) :
319 pairs_ (pairs),
320 counts_ (counts),
321 ptr_ (ptr),
322 keys_ (keys),
323 size_ (counts.extent (0)),
324 startingValue_ (startingValue),
325 initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
326 initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
327 ::Kokkos::Details::ArithTraits<key_type>::min () :
328 -::Kokkos::Details::ArithTraits<key_type>::max ())
329 {}
330
349 FillPairs (const pairs_view_type& pairs,
350 const counts_view_type& counts,
351 const offsets_view_type& ptr,
352 const keys_view_type& keys,
353 const typename pair_type::second_type startingValue,
354 const key_type initMinKey,
355 const key_type initMaxKey) :
356 pairs_ (pairs),
357 counts_ (counts),
358 ptr_ (ptr),
359 keys_ (keys),
360 size_ (counts.extent (0)),
361 startingValue_ (startingValue),
362 initMinKey_ (initMinKey),
363 initMaxKey_ (initMaxKey)
364 {}
365
367 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
368 {
369 dst.minKey_ = initMinKey_;
370 dst.maxKey_ = initMaxKey_;
371 dst.success_ = true;
372 }
373
374 KOKKOS_INLINE_FUNCTION void
375 join (volatile value_type& dst,
376 const volatile value_type& src) const
377 {
378 if (src.maxKey_ > dst.maxKey_) {
379 dst.maxKey_ = src.maxKey_;
380 }
381 if (src.minKey_ < dst.minKey_) {
382 dst.minKey_ = src.minKey_;
383 }
384 dst.success_ = dst.success_ && src.success_;
385 }
386
391 KOKKOS_INLINE_FUNCTION void
392 operator () (const size_type& i, value_type& dst) const
393 {
394 typedef typename hash_type::result_type hash_value_type;
395 typedef typename offsets_view_type::non_const_value_type offset_type;
396 typedef typename pair_type::second_type val_type;
397 typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
399 const key_type key = keys_[i];
400 if (key > dst.maxKey_) {
401 dst.maxKey_ = key;
403 if (key < dst.minKey_) {
404 dst.minKey_ = key;
405 }
406 const val_type theVal = startingValue_ + static_cast<val_type> (i);
407 const hash_value_type hashVal = hash_type::hashFunc (key, size_);
408
409 // Return the old count; decrement afterwards.
410 const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
411 if (count == 0) {
412 dst.success_ = false; // FAILURE!
413 }
414 else {
415 const offset_type curPos = ptr_[hashVal+1] - count;
416
417 pairs_[curPos].first = key;
418 pairs_[curPos].second = theVal;
419 }
420 }
421
422private:
423 pairs_view_type pairs_;
424 counts_view_type counts_;
425 offsets_view_type ptr_;
426 keys_view_type keys_;
427 size_type size_;
428 typename pair_type::second_type startingValue_;
430 key_type initMinKey_;
432 key_type initMaxKey_;
433};
434
458template<class OffsetsViewType,
459 class PairsViewType,
460 class SizeType = typename OffsetsViewType::size_type>
462public:
463 typedef typename OffsetsViewType::const_type offsets_view_type;
464 typedef typename PairsViewType::const_type pairs_view_type;
465 typedef typename offsets_view_type::execution_space execution_space;
466 typedef typename offsets_view_type::memory_space memory_space;
467 typedef SizeType size_type;
468
469 // The result of the check is whether the table has one or more duplicates.
470 typedef int value_type;
471
476 CheckForDuplicateKeys (const pairs_view_type& pairs,
477 const offsets_view_type& ptr) :
478 pairs_ (pairs),
479 ptr_ (ptr),
480 size_ (ptr_.extent (0) == 0 ?
481 size_type (0) :
482 ptr_.extent (0) - 1)
483 {}
484
486 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
487 {
488 dst = 0;
489 }
490
492 KOKKOS_INLINE_FUNCTION void
493 join (volatile value_type& dst,
494 const volatile value_type& src) const
495 {
496 dst = dst + src > 0?1:0;
497 }
498
500 KOKKOS_INLINE_FUNCTION void
501 operator () (const size_type& i, value_type& dst) const
502 {
503 typedef typename offsets_view_type::non_const_value_type offset_type;
504 typedef typename pairs_view_type::non_const_value_type pair_type;
505 typedef typename pair_type::first_type key_type;
506
507 if (dst>0) {
508 return; // we've already found duplicate keys elsewhere
509 }
510 else {
511 const offset_type beg = ptr_[i];
512 const offset_type end = ptr_[i+1];
513 bool foundDuplicateKey = false;
514 // This is an ~ n^2 algorithm in the worst case, where n is the
515 // max number of keys that hash to the same bucket. However, if
516 // the hash function is reasonable, n should be much less than
517 // the total number of keys.
518 for (offset_type j = beg + 1; j < end; ++j) {
519 const key_type curKey = pairs_[j].first;
520 for (offset_type k = beg; k < j; ++k) {
521 if (pairs_[k].first == curKey) {
522 foundDuplicateKey = true;
523 break;
524 }
525 }
526 }
527 dst = (dst>0) || foundDuplicateKey?1:0;
528 }
529 }
530
531private:
532 pairs_view_type pairs_;
533 offsets_view_type ptr_;
534 size_type size_;
535};
536
537} // namespace FHT
538
539//
540// Here begins the actual implementation of FixedHashTable.
541//
542
543template<class KeyType, class ValueType, class DeviceType>
546 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
547 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
548 ::Kokkos::Details::ArithTraits<KeyType>::min () :
549 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
550 minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
551 maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
552 ::Kokkos::Details::ArithTraits<ValueType>::min () :
553 -::Kokkos::Details::ArithTraits<ValueType>::max ()),
554 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
555 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
556 ::Kokkos::Details::ArithTraits<KeyType>::min () :
557 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
558 contiguousValues_ (true), // trivially
559 checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
560 hasDuplicateKeys_ (false)
561{
562}
563
564template<class KeyType, class ValueType, class DeviceType>
566FixedHashTable (const keys_type& keys) :
567 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
568 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
569 ::Kokkos::Details::ArithTraits<KeyType>::min () :
570 -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
571 minVal_ (0),
572 maxVal_ (keys.size () == 0 ?
573 static_cast<ValueType> (0) :
574 static_cast<ValueType> (keys.size () - 1)),
575 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
576 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
577 ::Kokkos::Details::ArithTraits<KeyType>::min () :
578 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
579 contiguousValues_ (true),
580 checkedForDuplicateKeys_ (false),
581 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
582{
583 const ValueType startingValue = static_cast<ValueType> (0);
584 const KeyType initMinKey = this->minKey_;
585 const KeyType initMaxKey = this->maxKey_;
586 this->init (keys, startingValue, initMinKey, initMaxKey,
587 initMinKey, initMinKey, false);
588}
589
590template<class KeyType, class ValueType, class DeviceType>
592FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys) :
593 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
594 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
595 ::Kokkos::Details::ArithTraits<KeyType>::min () :
596 -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
597 minVal_ (0),
598 maxVal_ (keys.size () == 0 ?
599 static_cast<ValueType> (0) :
600 static_cast<ValueType> (keys.size () - 1)),
601 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
602 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
603 ::Kokkos::Details::ArithTraits<KeyType>::min () :
604 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
605 contiguousValues_ (true),
606 checkedForDuplicateKeys_ (false),
607 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
608{
609 typedef typename keys_type::non_const_type nonconst_keys_type;
610
611 // mfh 01 May 2015: I don't trust that
612 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
613 // so I ensure this manually.
614 const ValueType startingValue = static_cast<ValueType> (0);
615 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
616 keys.size ());
617 using Kokkos::ViewAllocateWithoutInitializing;
618 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
619 keys_k.extent (0));
620 Kokkos::deep_copy (keys_d, keys_k);
621 const KeyType initMinKey = this->minKey_;
622 const KeyType initMaxKey = this->maxKey_;
623 this->init (keys_d, startingValue, initMinKey, initMaxKey,
624 initMinKey, initMinKey, false);
625}
626
627template<class KeyType, class ValueType, class DeviceType>
629FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
630 const ValueType startingValue) :
631 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
632 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
633 ::Kokkos::Details::ArithTraits<KeyType>::min () :
634 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
635 minVal_ (startingValue),
636 maxVal_ (keys.size () == 0 ?
637 startingValue :
638 static_cast<ValueType> (startingValue + keys.size () - 1)),
639 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
640 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
641 ::Kokkos::Details::ArithTraits<KeyType>::min () :
642 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
643 contiguousValues_ (true),
644 checkedForDuplicateKeys_ (false),
645 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
646{
647 typedef typename keys_type::non_const_type nonconst_keys_type;
648
649 // mfh 01 May 2015: I don't trust that
650 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
651 // so I ensure this manually.
652 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
653 keys.size ());
654 using Kokkos::ViewAllocateWithoutInitializing;
655 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
656 keys_k.extent (0));
657 Kokkos::deep_copy (keys_d, keys_k);
658
659 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
660 // min() for a floating-point type returns the minimum _positive_
661 // normalized value. This is different than for integer types.
662 // lowest() is new in C++11 and returns the least value, always
663 // negative for signed finite types.
664 //
665 // mfh 23 May 2015: I have heard reports that
666 // std::numeric_limits<int>::lowest() does not exist with the Intel
667 // compiler. I'm not sure if the users in question actually enabled
668 // C++11. However, it's easy enough to work around this issue. The
669 // standard floating-point types are signed and have a sign bit, so
670 // lowest() is just -max(). For integer types, we can use min()
671 // instead.
672 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
673 ::Kokkos::Details::ArithTraits<KeyType>::min () :
674 -::Kokkos::Details::ArithTraits<KeyType>::max ();
675 this->init (keys_d, startingValue, initMinKey, initMaxKey,
676 initMinKey, initMinKey, false);
677
678}
679
680template<class KeyType, class ValueType, class DeviceType>
682FixedHashTable (const keys_type& keys,
683 const KeyType firstContigKey,
684 const KeyType lastContigKey,
685 const ValueType startingValue) :
686 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
687 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
688 ::Kokkos::Details::ArithTraits<KeyType>::min () :
689 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
690 minVal_ (startingValue),
691 maxVal_ (keys.size () == 0 ?
692 startingValue :
693 static_cast<ValueType> (startingValue + keys.size () - 1)),
694 firstContigKey_ (firstContigKey),
695 lastContigKey_ (lastContigKey),
696 contiguousValues_ (true),
697 checkedForDuplicateKeys_ (false),
698 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
699{
700 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
701 // min() for a floating-point type returns the minimum _positive_
702 // normalized value. This is different than for integer types.
703 // lowest() is new in C++11 and returns the least value, always
704 // negative for signed finite types.
705 //
706 // mfh 23 May 2015: I have heard reports that
707 // std::numeric_limits<int>::lowest() does not exist with the Intel
708 // compiler. I'm not sure if the users in question actually enabled
709 // C++11. However, it's easy enough to work around this issue. The
710 // standard floating-point types are signed and have a sign bit, so
711 // lowest() is just -max(). For integer types, we can use min()
712 // instead.
713 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
714 ::Kokkos::Details::ArithTraits<KeyType>::min () :
715 -::Kokkos::Details::ArithTraits<KeyType>::max ();
716 this->init (keys, startingValue, initMinKey, initMaxKey,
717 firstContigKey, lastContigKey, true);
718}
719
720template<class KeyType, class ValueType, class DeviceType>
722FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
723 const KeyType firstContigKey,
724 const KeyType lastContigKey,
725 const ValueType startingValue) :
726 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
727 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
728 ::Kokkos::Details::ArithTraits<KeyType>::min () :
729 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
730 minVal_ (startingValue),
731 maxVal_ (keys.size () == 0 ?
732 startingValue :
733 static_cast<ValueType> (startingValue + keys.size () - 1)),
734 firstContigKey_ (firstContigKey),
735 lastContigKey_ (lastContigKey),
736 contiguousValues_ (true),
737 checkedForDuplicateKeys_ (false),
738 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
739{
740 typedef typename keys_type::non_const_type nonconst_keys_type;
741
742 // mfh 01 May 2015: I don't trust that
743 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
744 // so I ensure this manually.
745 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
746 keys.size ());
747 using Kokkos::ViewAllocateWithoutInitializing;
748 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
749 keys_k.extent (0));
750 Kokkos::deep_copy (keys_d, keys_k);
751
752 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
753 // min() for a floating-point type returns the minimum _positive_
754 // normalized value. This is different than for integer types.
755 // lowest() is new in C++11 and returns the least value, always
756 // negative for signed finite types.
757 //
758 // mfh 23 May 2015: I have heard reports that
759 // std::numeric_limits<int>::lowest() does not exist with the Intel
760 // compiler. I'm not sure if the users in question actually enabled
761 // C++11. However, it's easy enough to work around this issue. The
762 // standard floating-point types are signed and have a sign bit, so
763 // lowest() is just -max(). For integer types, we can use min()
764 // instead.
765 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
766 ::Kokkos::Details::ArithTraits<KeyType>::min () :
767 -::Kokkos::Details::ArithTraits<KeyType>::max ();
768 this->init (keys_d, startingValue, initMinKey, initMaxKey,
769 firstContigKey, lastContigKey, true);
770}
771
772template<class KeyType, class ValueType, class DeviceType>
774FixedHashTable (const keys_type& keys,
775 const ValueType startingValue) :
776 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
777 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
778 ::Kokkos::Details::ArithTraits<KeyType>::min () :
779 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
780 minVal_ (startingValue),
781 maxVal_ (keys.size () == 0 ?
782 startingValue :
783 static_cast<ValueType> (startingValue + keys.size () - 1)),
784 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
785 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
786 ::Kokkos::Details::ArithTraits<KeyType>::min () :
787 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
788 contiguousValues_ (true),
789 checkedForDuplicateKeys_ (false),
790 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
791{
792 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
793 // min() for a floating-point type returns the minimum _positive_
794 // normalized value. This is different than for integer types.
795 // lowest() is new in C++11 and returns the least value, always
796 // negative for signed finite types.
797 //
798 // mfh 23 May 2015: I have heard reports that
799 // std::numeric_limits<int>::lowest() does not exist with the Intel
800 // compiler. I'm not sure if the users in question actually enabled
801 // C++11. However, it's easy enough to work around this issue. The
802 // standard floating-point types are signed and have a sign bit, so
803 // lowest() is just -max(). For integer types, we can use min()
804 // instead.
805 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
806 ::Kokkos::Details::ArithTraits<KeyType>::min () :
807 -::Kokkos::Details::ArithTraits<KeyType>::max ();
808 this->init (keys, startingValue, initMinKey, initMaxKey,
809 initMinKey, initMinKey, false);
810}
811
812template<class KeyType, class ValueType, class DeviceType>
814FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
815 const Teuchos::ArrayView<const ValueType>& vals) :
816 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
817 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
818 ::Kokkos::Details::ArithTraits<KeyType>::min () :
819 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
820 minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
821 maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
822 ::Kokkos::Details::ArithTraits<ValueType>::min () :
823 -::Kokkos::Details::ArithTraits<ValueType>::max ()),
824 firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
825 lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
826 ::Kokkos::Details::ArithTraits<KeyType>::min () :
827 -::Kokkos::Details::ArithTraits<KeyType>::max ()),
828 contiguousValues_ (false),
829 checkedForDuplicateKeys_ (false),
830 hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
831{
832 // mfh 01 May 2015: I don't trust that
833 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
834 // so I ensure this manually.
835 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
836 keys.size ());
837 host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
838 vals.size ());
839 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
840 // min() for a floating-point type returns the minimum _positive_
841 // normalized value. This is different than for integer types.
842 // lowest() is new in C++11 and returns the least value, always
843 // negative for signed finite types.
844 //
845 // mfh 23 May 2015: I have heard reports that
846 // std::numeric_limits<int>::lowest() does not exist with the Intel
847 // compiler. I'm not sure if the users in question actually enabled
848 // C++11. However, it's easy enough to work around this issue. The
849 // standard floating-point types are signed and have a sign bit, so
850 // lowest() is just -max(). For integer types, we can use min()
851 // instead.
852 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
853 ::Kokkos::Details::ArithTraits<KeyType>::min () :
854 -::Kokkos::Details::ArithTraits<KeyType>::max ();
855 this->init (keys_k, vals_k, initMinKey, initMaxKey);
856}
857
858template<class KeyType, class ValueType, class DeviceType>
859void
861init (const keys_type& keys,
862 ValueType startingValue,
863 KeyType initMinKey,
864 KeyType initMaxKey,
865 KeyType firstContigKey,
866 KeyType lastContigKey,
867 const bool computeInitContigKeys)
868{
869 using Kokkos::subview;
870 using Kokkos::ViewAllocateWithoutInitializing;
871 using Teuchos::TypeNameTraits;
872 typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
873 const char prefix[] = "Tpetra::Details::FixedHashTable: ";
874
875 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
876 {
877 const offset_type theMaxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
878 const size_type maxValST = static_cast<size_type> (theMaxVal);
879 TEUCHOS_TEST_FOR_EXCEPTION
880 (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
881 "number of keys " << keys.extent (0) << " does not fit in "
882 "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
883 "max value is " << theMaxVal << ". This means that it is not possible to "
884 "use this constructor.");
885 }
886 TEUCHOS_TEST_FOR_EXCEPTION
887 (static_cast<unsigned long long> (numKeys) >
888 static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
889 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
890 "keys " << numKeys << " is greater than the maximum representable "
891 "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
892 "This means that it is not possible to use this constructor.");
893 TEUCHOS_TEST_FOR_EXCEPTION
894 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
895 "This class currently only works when the number of keys is <= INT_MAX = "
896 << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
897 "developers.");
898
899 const bool buildInParallel =
900 FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
901 const bool debug = ::Tpetra::Details::Behavior::debug ();
902
903 // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
904 // could change that by setting up ptr and val as Kokkos::DualView
905 // instances. If we do that, since we are filling on host for now,
906 // we want to make sure that we only zero-fill ptr on host
907 // initially, and that we don't fill val at all. Once we finish
908 // Kokkos-izing all the set-up kernels, we won't need DualView for
909 // either ptr or val.
910
911 if (computeInitContigKeys) {
912 // Find the first and last initial contiguous keys. If we find a
913 // long sequence of initial contiguous keys, we can save space by
914 // not storing them explicitly as pairs in the hash table.
915 //
916 // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
917 // ("min index such that the difference between the current key and
918 // the next != 1"), which takes multiple passes over the data. We
919 // could fuse it with CountBuckets (only update counts on 'final'
920 // pass). However, we're really just moving this sequential search
921 // out of Map's constructor here, so there is no loss in doing it
922 // sequentially for now. Later, we can work on parallelization.
923 if (numKeys > 0) {
924 // FIXME: make it a parallel kernel with no host copy
925 auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
926 keys);
927 firstContigKey_ = keys_h[0];
928 // Start with one plus, then decrement at the end. That lets us do
929 // only one addition per loop iteration, rather than two (if we test
930 // against lastContigKey + 1 and then increment lastContigKey).
931 lastContigKey_ = firstContigKey_ + 1;
932
933 // We will only store keys in the table that are not part of the
934 // initial contiguous sequence. It's possible for the initial
935 // contiguous sequence to be trivial, which for a nonzero number of
936 // keys means that the "sequence" has length 1.
937 for (offset_type k = 1; k < numKeys; ++k) {
938 if (lastContigKey_ != keys_h[k]) {
939 break;
940 }
941 ++lastContigKey_;
942 }
943 --lastContigKey_;
944 }
945 }
946 else {
947 firstContigKey_ = firstContigKey;
948 lastContigKey_ = lastContigKey;
949 }
950
951 offset_type startIndex;
952 if (numKeys > 0) {
953 initMinKey = std::min (initMinKey, firstContigKey_);
954 initMaxKey = std::max (initMaxKey, lastContigKey_);
955 startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
956 } else {
957 startIndex = 0;
958 }
959
960 const offset_type theNumKeys = numKeys - startIndex;
961 const offset_type size = hash_type::getRecommendedSize (theNumKeys);
962#ifdef HAVE_TPETRA_DEBUG
963 TEUCHOS_TEST_FOR_EXCEPTION(
964 size == 0 && numKeys != 0, std::logic_error,
965 "Tpetra::Details::FixedHashTable constructor: "
966 "getRecommendedSize(" << numKeys << ") returned zero, "
967 "even though the number of keys " << numKeys << " is nonzero. "
968 "Please report this bug to the Tpetra developers.");
969#endif // HAVE_TPETRA_DEBUG
970 keys_type theKeys =
971 subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
972
973 // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
974 // fence here, but we do before other allocations.
975
976 // The array of counts must be separate from the array of offsets,
977 // in order for parallel_scan to work correctly.
978 typedef typename ptr_type::non_const_type counts_type;
979 counts_type counts ("Tpetra::FixedHashTable::counts", size);
980
981 //
982 // Count the number of "buckets" per offsets array (ptr) entry.
983 //
984
985 // Will only create the mirror for buildInParallel false - but then use it in two places
986 typename keys_type::HostMirror theKeysHost;
987
988 // The Kokkos kernel uses atomic update instructions to count the
989 // number of "buckets" per offsets array (ptr) entry. Atomic
990 // updates incur overhead, even in the sequential case. The Kokkos
991 // kernel is still correct in that case, but I would rather not
992 // incur overhead then.
993 if (buildInParallel) {
994 FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
995 using execution_space = typename counts_type::execution_space;
996 using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
997 const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
998 if (debug) {
999 using key_type = typename keys_type::non_const_value_type;
1000 Kokkos::pair<int, key_type> err;
1001 Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
1002 functor, err);
1003 TEUCHOS_TEST_FOR_EXCEPTION
1004 (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
1005 "constructor: CountBuckets found a key " << err.second << " that "
1006 "results in an out-of-bounds hash value.");
1007 }
1008 else {
1009 Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
1010 }
1011 }
1012 else {
1013 Kokkos::HostSpace hostMemSpace;
1014 theKeysHost = Kokkos::create_mirror_view(theKeys);
1015 Kokkos::deep_copy(theKeysHost, theKeys);
1016 auto countsHost = Kokkos::create_mirror_view (hostMemSpace, counts);
1017
1018 for (offset_type k = 0; k < theNumKeys; ++k) {
1019 using key_type = typename keys_type::non_const_value_type;
1020 const key_type key = theKeysHost[k];
1021
1022 using hash_value_type = typename hash_type::result_type;
1023 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1024 TEUCHOS_TEST_FOR_EXCEPTION
1025 (hashVal < hash_value_type (0) ||
1026 hashVal >= hash_value_type (countsHost.extent (0)),
1027 std::logic_error, "Tpetra::Details::FixedHashTable "
1028 "constructor: Sequential CountBuckets found a key " << key
1029 << " that results in an out-of-bounds hash value.");
1030
1031 ++countsHost[hashVal];
1032 }
1033 Kokkos::deep_copy (counts, countsHost);
1034 }
1035
1036 // KJ: This fence is not required for the 2-argument deep_copy which calls
1037 // fence, but will be required if switched to the 3-argumemt deep_copy which
1038 // passes a space. The 3-argument form does not fence.
1039 execution_space().fence ();
1040
1041 // Kokkos::View fills with zeros by default.
1042 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size+1);
1043
1044 // Compute row offsets via prefix sum:
1045 //
1046 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1047 //
1048 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1049 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1050 // formula is ptr[i+1] += ptr[i].
1051 //
1052 // parallel_scan does not incur overhead with Kokkos::Serial, but
1053 // with actual parallel execution spaces, it does require multiple
1054 // passes over the data. Thus, it still makes sense to have a
1055 // sequential fall-back.
1056
1058 if (buildInParallel) {
1059 computeOffsetsFromCounts (ptr, counts);
1060 }
1061
1062 if (! buildInParallel || debug) {
1063 Kokkos::HostSpace hostMemSpace;
1064 auto counts_h = Kokkos::create_mirror_view_and_copy (hostMemSpace, counts);
1065 auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
1066
1067#ifdef KOKKOS_ENABLE_SERIAL
1068 Kokkos::Serial hostExecSpace;
1069#else
1070 Kokkos::DefaultHostExecutionSpace hostExecSpace;
1071#endif // KOKKOS_ENABLE_SERIAL
1072
1073 computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
1074 Kokkos::deep_copy (ptr, ptr_h);
1075
1076 if (debug) {
1077 bool bad = false;
1078 for (offset_type i = 0; i < size; ++i) {
1079 if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
1080 bad = true;
1081 }
1082 }
1083 TEUCHOS_TEST_FOR_EXCEPTION
1084 (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
1085 "constructor: computeOffsetsFromCounts gave an incorrect "
1086 "result.");
1087 }
1088 }
1089
1090 // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
1091 // This fence is necessary as we need to make sure that the offset view
1092 // completes before the view is used in the next functor.
1093 execution_space().fence ();
1094
1095 // Allocate the array of (key,value) pairs. Don't fill it with
1096 // zeros, because we will fill it with actual data below.
1097 typedef typename val_type::non_const_type nonconst_val_type;
1098 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1099 theNumKeys);
1100
1101 // Fill in the hash table's "values" (the (key,value) pairs).
1102 typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1103 typename ptr_type::non_const_type> functor_type;
1104 typename functor_type::value_type result (initMinKey, initMaxKey);
1105
1106 const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1107 if (buildInParallel) {
1108 functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1109 initMinKey, initMaxKey);
1110 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1111 Kokkos::parallel_reduce (range_type (0, theNumKeys), functor, result);
1112 }
1113 else {
1114 Kokkos::HostSpace hostMemSpace;
1115 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1116 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1117 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1118 for (offset_type k = 0; k < theNumKeys; ++k) {
1119 typedef typename hash_type::result_type hash_value_type;
1120 const KeyType key = theKeysHost[k];
1121 if (key > result.maxKey_) {
1122 result.maxKey_ = key;
1123 }
1124 if (key < result.minKey_) {
1125 result.minKey_ = key;
1126 }
1127 const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1128 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1129
1130 // Return the old count; decrement afterwards.
1131 const offset_type count = counts_h[hashVal];
1132 --counts_h[hashVal];
1133 if (count == 0) {
1134 result.success_ = false; // FAILURE!
1135 break;
1136 }
1137 else {
1138 const offset_type curPos = ptr_h[hashVal+1] - count;
1139 val_h[curPos].first = key;
1140 val_h[curPos].second = theVal;
1141 }
1142 }
1143 Kokkos::deep_copy(counts, counts_h); // restore
1144 Kokkos::deep_copy(val, val_h); // restore
1145 }
1146
1147 // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1148 // reports of exceptions being thrown in Albany.
1149 //
1150 // TEUCHOS_TEST_FOR_EXCEPTION
1151 // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1152 // "init: Filling the hash table failed! Please report this bug to the "
1153 // "Tpetra developers.");
1154 (void) result; // prevent build warning (set but unused variable)
1155
1156 // "Commit" the computed arrays and other computed quantities.
1157 ptr_ = ptr;
1158 val_ = val;
1159 minKey_ = result.minKey_;
1160 maxKey_ = result.maxKey_;
1161 // We've already set firstContigKey_ and lastContigKey_ above.
1162}
1163
1164
1165template<class KeyType, class ValueType, class DeviceType>
1166void
1167FixedHashTable<KeyType, ValueType, DeviceType>::
1168init (const host_input_keys_type& keys,
1169 const host_input_vals_type& vals,
1170 KeyType initMinKey,
1171 KeyType initMaxKey)
1172{
1173 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1174 TEUCHOS_TEST_FOR_EXCEPTION
1175 (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1176 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1177 "keys " << numKeys << " is greater than the maximum representable "
1178 "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1179 TEUCHOS_TEST_FOR_EXCEPTION
1180 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1181 "Details::FixedHashTable: This class currently only works when the number "
1182 "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1183 ", please talk to the Tpetra developers.");
1184
1185 // There's no need to find the first and last initial contiguous
1186 // keys in this case, because if we reach this init() function, then
1187 // hasContiguousValues() is false and so get() doesn't use the
1188 // initial contiguous sequence of keys.
1189
1190 const offset_type size = hash_type::getRecommendedSize (numKeys);
1191#ifdef HAVE_TPETRA_DEBUG
1192 TEUCHOS_TEST_FOR_EXCEPTION(
1193 size == 0 && numKeys != 0, std::logic_error,
1194 "Tpetra::Details::FixedHashTable constructor: "
1195 "getRecommendedSize(" << numKeys << ") returned zero, "
1196 "even though the number of keys " << numKeys << " is nonzero. "
1197 "Please report this bug to the Tpetra developers.");
1198#endif // HAVE_TPETRA_DEBUG
1199
1200 // FIXME: Investigate a couple options:
1201 // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1202 // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1203 // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1204 // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1205 // been "Kokkos-ized".
1206 Kokkos::HostSpace hostMemSpace;
1207 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size + 1);
1208 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1209
1210 // Allocate the array of key,value pairs. Don't waste time filling
1211 // it with zeros, because we will fill it with actual data below.
1212 using Kokkos::ViewAllocateWithoutInitializing;
1213 typedef typename val_type::non_const_type nonconst_val_type;
1214 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1215 numKeys);
1216 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1217
1218 // Compute number of entries in each hash table position.
1219 for (offset_type k = 0; k < numKeys; ++k) {
1220 const typename hash_type::result_type hashVal =
1221 hash_type::hashFunc (keys[k], size);
1222 // Shift over one, so that counts[j] = ptr[j+1]. See below.
1223 ++ptr_h[hashVal+1];
1224 }
1225
1226 // Compute row offsets via prefix sum:
1227 //
1228 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1229 //
1230 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1231 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1232 // formula is ptr[i+1] += ptr[i].
1233 for (offset_type i = 0; i < size; ++i) {
1234 ptr_h[i+1] += ptr_h[i];
1235 }
1236 //ptr[0] = 0; // We've already done this when initializing ptr above.
1237
1238 // curRowStart[i] is the offset of the next element in row i.
1239 typename ptr_type::non_const_type::HostMirror curRowStart ("Tpetra::FixedHashTable::curRowStart", size);
1240
1241 // Fill in the hash table.
1242 FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1243 for (offset_type k = 0; k < numKeys; ++k) {
1244 typedef typename hash_type::result_type hash_value_type;
1245 const KeyType key = keys[k];
1246 if (key > result.maxKey_) {
1247 result.maxKey_ = key;
1248 }
1249 if (key < result.minKey_) {
1250 result.minKey_ = key;
1251 }
1252 const ValueType theVal = vals[k];
1253 if (theVal > maxVal_) {
1254 maxVal_ = theVal;
1255 }
1256 if (theVal < minVal_) {
1257 minVal_ = theVal;
1258 }
1259 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1260
1261 const offset_type offset = curRowStart[hashVal];
1262 const offset_type curPos = ptr_h[hashVal] + offset;
1263 if (curPos >= ptr_h[hashVal+1]) {
1264 result.success_ = false; // FAILURE!
1265 }
1266 else {
1267 val_h[curPos].first = key;
1268 val_h[curPos].second = theVal;
1269 ++curRowStart[hashVal];
1270 }
1271 }
1272
1273 TEUCHOS_TEST_FOR_EXCEPTION
1274 (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1275 "init: Filling the hash table failed! Please report this bug to the "
1276 "Tpetra developers.");
1277
1278 // "Commit" the computed arrays and other computed quantities.
1279 Kokkos::deep_copy(ptr, ptr_h);
1280 Kokkos::deep_copy(val, val_h);
1281
1282 ptr_ = ptr;
1283 val_ = val;
1284 minKey_ = result.minKey_;
1285 maxKey_ = result.maxKey_;
1286 // We've already assigned to minVal_ and maxVal_ above.
1287}
1288
1289template <class KeyType, class ValueType, class DeviceType>
1290bool
1293{
1294 if (! checkedForDuplicateKeys_) {
1295 hasDuplicateKeys_ = checkForDuplicateKeys ();
1296 checkedForDuplicateKeys_ = true;
1297 }
1298 return hasDuplicateKeys_;
1299}
1300
1301template <class KeyType, class ValueType, class DeviceType>
1302bool
1304checkForDuplicateKeys () const
1305{
1306 const offset_type size = this->getSize ();
1307 // It's allowed for the hash table to have a positive number of
1308 // buckets (getSize()), but still store no entries (numPairs()).
1309 // Both cases trivially entail no duplicates.
1310 if (size == 0 || this->numPairs () == 0) {
1311 return false;
1312 }
1313 else {
1314 typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1315 functor_type functor (val_, ptr_);
1316 int hasDupKeys = 0;
1317 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1318 Kokkos::parallel_reduce (range_type (0, size), functor, hasDupKeys);
1319 return hasDupKeys > 0;
1320 }
1321}
1322
1323template <class KeyType, class ValueType, class DeviceType>
1324std::string
1326description () const
1327{
1328 std::ostringstream oss;
1329 oss << "FixedHashTable<"
1330 << Teuchos::TypeNameTraits<KeyType>::name () << ","
1331 << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1332 << "{ numKeys: " << val_.extent (0)
1333 << ", tableSize: " << this->getSize () << " }";
1334 return oss.str();
1335}
1336
1337template <class KeyType, class ValueType, class DeviceType>
1338void
1340describe (Teuchos::FancyOStream& out,
1341 const Teuchos::EVerbosityLevel verbLevel) const
1342{
1343 using std::endl;
1344 using std::setw;
1345 using Teuchos::OSTab;
1346 using Teuchos::rcpFromRef;
1347 using Teuchos::TypeNameTraits;
1348 using Teuchos::VERB_DEFAULT;
1349 using Teuchos::VERB_NONE;
1350 using Teuchos::VERB_LOW;
1351 using Teuchos::VERB_EXTREME;
1352
1353 // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1354 // access to ptr_ and val_ from the host.
1355
1356 Teuchos::EVerbosityLevel vl = verbLevel;
1357 if (vl == VERB_DEFAULT) vl = VERB_LOW;
1358
1359 if (vl == VERB_NONE) {
1360 // do nothing
1361 }
1362 else if (vl == VERB_LOW) {
1363 out << this->description() << endl;
1364 }
1365 else { // MEDIUM, HIGH or EXTREME
1366 out << "FixedHashTable:" << endl;
1367 {
1368 OSTab tab1 (rcpFromRef (out));
1369
1370 // const std::string label = this->getObjectLabel ();
1371 // if (label != "") {
1372 // out << "label: " << label << endl;
1373 // }
1374 out << "Template parameters:" << endl;
1375 {
1376 OSTab tab2 (rcpFromRef (out));
1377 out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1378 << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1379 }
1380
1381 const offset_type tableSize = this->getSize ();
1382 const offset_type numKeys = val_.extent (0);
1383
1384 out << "Table parameters:" << endl;
1385 {
1386 OSTab tab2 (rcpFromRef (out));
1387 out << "numKeys: " << numKeys << endl
1388 << "tableSize: " << tableSize << endl;
1389 }
1390
1391 if (vl >= VERB_EXTREME) {
1392 out << "Contents: ";
1393 if (tableSize == 0 || numKeys == 0) {
1394 out << "[]" << endl;
1395 } else {
1396 out << "[ " << endl;
1397 {
1398 OSTab tab2 (rcpFromRef (out));
1399 for (offset_type i = 0; i < tableSize; ++i) {
1400 OSTab tab3 (rcpFromRef (out));
1401 out << "[";
1402 for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1403 out << "(" << val_[k].first << "," << val_[k].second << ")";
1404 if (k + 1 < ptr_[i+1]) {
1405 out << ", ";
1406 }
1407 }
1408 out << "]" << endl;
1409 } // for each table position i
1410 }
1411 out << "]" << endl;
1412 } // The table contains entries
1413 } // vl >= VERB_EXTREME
1414 }
1415 out << endl;
1416 } // if vl > VERB_LOW
1417}
1418
1419} // namespace Details
1420} // namespace Tpetra
1421
1422// Macro that explicitly instantiates FixedHashTable for the given local
1423// ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1424// template parameters occur in the opposite order of most Tpetra
1425// classes. This is because FixedHashTable performs global-to-local
1426// lookup, and the convention in templated C++ lookup tables (such as
1427// std::map) is <KeyType, ValueType>.
1428//
1429// This macro must be explanded within the Tpetra::Details namespace.
1430#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1431 template class FixedHashTable< GO , LO >;
1432
1433// Macro that explicitly instantiates FixedHashTable for the given
1434// local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1435// types. Note that FixedHashTable's first two template parameters
1436// occur in the opposite order of most Tpetra classes. This is
1437// because FixedHashTable performs global-to-local lookup, and the
1438// convention in templated C++ lookup tables (such as std::map) is
1439// <KeyType, ValueType>.
1440//
1441// This macro must be explanded within the Tpetra::Details namespace.
1442#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1443 template class FixedHashTable< GO , LO , DEVICE >;
1444
1445#endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
static bool debug()
Whether Tpetra is in debug mode.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Implementation details of Tpetra.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug)
The hash function for FixedHashTable.
ResultType result_type
Type of the return value of the hash function.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.