Kokkos Core Kernels Package Version of the Day
Kokkos_MemoryPool.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
45#ifndef KOKKOS_MEMORYPOOL_HPP
46#define KOKKOS_MEMORYPOOL_HPP
47
48#include <Kokkos_Core_fwd.hpp>
49#include <Kokkos_Parallel.hpp>
50#include <Kokkos_Atomic.hpp>
51#include <impl/Kokkos_ConcurrentBitset.hpp>
52#include <impl/Kokkos_Error.hpp>
53#include <impl/Kokkos_SharedAlloc.hpp>
54
55#include <iostream>
56
57namespace Kokkos {
58namespace Impl {
59/* Report violation of size constraints:
60 * min_block_alloc_size <= max_block_alloc_size
61 * max_block_alloc_size <= min_superblock_size
62 * min_superblock_size <= max_superblock_size
63 * min_superblock_size <= min_total_alloc_size
64 * min_superblock_size <= min_block_alloc_size *
65 * max_block_per_superblock
66 */
67void memory_pool_bounds_verification(size_t min_block_alloc_size,
68 size_t max_block_alloc_size,
69 size_t min_superblock_size,
70 size_t max_superblock_size,
71 size_t max_block_per_superblock,
72 size_t min_total_alloc_size);
73} // namespace Impl
74} // namespace Kokkos
75
76namespace Kokkos {
77
78namespace Impl {
79
80void _print_memory_pool_state(std::ostream &s, uint32_t const *sb_state_ptr,
81 int32_t sb_count, uint32_t sb_size_lg2,
82 uint32_t sb_state_size, uint32_t state_shift,
83 uint32_t state_used_mask);
84
85} // end namespace Impl
86
87template <typename DeviceType>
88class MemoryPool {
89 private:
90 using CB = Kokkos::Impl::concurrent_bitset;
91
92 enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 };
93 enum : uint32_t { state_shift = CB::state_shift };
94 enum : uint32_t { state_used_mask = CB::state_used_mask };
95 enum : uint32_t { state_header_mask = CB::state_header_mask };
96 enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 };
97 enum : uint32_t { max_bit_count = CB::max_bit_count };
98
99 enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 };
100
101 /* Each superblock has a concurrent bitset state
102 * which is an array of uint32_t integers.
103 * [ { block_count_lg2 : state_shift bits
104 * , used_block_count : ( 32 - state_shift ) bits
105 * }
106 * , { block allocation bit set }* ]
107 *
108 * As superblocks are assigned (allocated) to a block size
109 * and released (deallocated) back to empty the superblock state
110 * is concurrently updated.
111 */
112
113 /* Mapping between block_size <-> block_state
114 *
115 * block_state = ( m_sb_size_lg2 - block_size_lg2 ) << state_shift
116 * block_size = m_sb_size_lg2 - ( block_state >> state_shift )
117 *
118 * Thus A_block_size < B_block_size <=> A_block_state > B_block_state
119 */
120
121 using base_memory_space = typename DeviceType::memory_space;
122
123 enum {
125 base_memory_space>::accessible
126 };
127
128 using Tracker = Kokkos::Impl::SharedAllocationTracker;
129 using Record = Kokkos::Impl::SharedAllocationRecord<base_memory_space>;
130
131 Tracker m_tracker;
132 uint32_t *m_sb_state_array;
133 uint32_t m_sb_state_size;
134 uint32_t m_sb_size_lg2;
135 uint32_t m_max_block_size_lg2;
136 uint32_t m_min_block_size_lg2;
137 int32_t m_sb_count;
138 int32_t m_hint_offset; // Offset to K * #block_size array of hints
139 int32_t m_data_offset; // Offset to 0th superblock data
140 int32_t m_unused_padding;
141
142 public:
143 using memory_space = typename DeviceType::memory_space;
144
146 enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ };
147 enum : uint32_t { max_block_per_superblock = max_bit_count };
148
149 //--------------------------------------------------------------------------
150
151 KOKKOS_INLINE_FUNCTION
152 bool operator==(MemoryPool const &other) const {
153 return m_sb_state_array == other.m_sb_state_array;
154 }
155
156 KOKKOS_INLINE_FUNCTION
157 size_t capacity() const noexcept {
158 return size_t(m_sb_count) << m_sb_size_lg2;
159 }
160
161 KOKKOS_INLINE_FUNCTION
162 size_t min_block_size() const noexcept {
163 return (1LU << m_min_block_size_lg2);
164 }
165
166 KOKKOS_INLINE_FUNCTION
167 size_t max_block_size() const noexcept {
168 return (1LU << m_max_block_size_lg2);
169 }
170
171 struct usage_statistics {
172 size_t capacity_bytes;
173 size_t superblock_bytes;
174 size_t max_block_bytes;
175 size_t min_block_bytes;
176 size_t capacity_superblocks;
177 size_t consumed_superblocks;
178 size_t consumed_blocks;
179 size_t consumed_bytes;
180 size_t reserved_blocks;
181 size_t reserved_bytes;
182 };
183
184 void get_usage_statistics(usage_statistics &stats) const {
186
187 const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
188
189 uint32_t *const sb_state_array =
190 accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
191
192 if (!accessible) {
193 Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
194 sb_state_array, m_sb_state_array, alloc_size);
195 }
196
197 stats.superblock_bytes = (1LU << m_sb_size_lg2);
198 stats.max_block_bytes = (1LU << m_max_block_size_lg2);
199 stats.min_block_bytes = (1LU << m_min_block_size_lg2);
200 stats.capacity_bytes = stats.superblock_bytes * m_sb_count;
201 stats.capacity_superblocks = m_sb_count;
202 stats.consumed_superblocks = 0;
203 stats.consumed_blocks = 0;
204 stats.consumed_bytes = 0;
205 stats.reserved_blocks = 0;
206 stats.reserved_bytes = 0;
207
208 const uint32_t *sb_state_ptr = sb_state_array;
209
210 for (int32_t i = 0; i < m_sb_count; ++i, sb_state_ptr += m_sb_state_size) {
211 const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift;
212
213 if (block_count_lg2) {
214 const uint32_t block_count = 1u << block_count_lg2;
215 const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2;
216 const uint32_t block_size = 1u << block_size_lg2;
217 const uint32_t block_used = (*sb_state_ptr) & state_used_mask;
218
219 stats.consumed_superblocks++;
220 stats.consumed_blocks += block_used;
221 stats.consumed_bytes += block_used * block_size;
222 stats.reserved_blocks += block_count - block_used;
223 stats.reserved_bytes += (block_count - block_used) * block_size;
224 }
225 }
226
227 if (!accessible) {
228 host.deallocate(sb_state_array, alloc_size);
229 }
230 }
231
232 void print_state(std::ostream &s) const {
234
235 const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
236
237 uint32_t *const sb_state_array =
238 accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
239
240 if (!accessible) {
241 Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
242 sb_state_array, m_sb_state_array, alloc_size);
243 }
244
245 Impl::_print_memory_pool_state(s, sb_state_array, m_sb_count, m_sb_size_lg2,
246 m_sb_state_size, state_shift,
247 state_used_mask);
248
249 if (!accessible) {
250 host.deallocate(sb_state_array, alloc_size);
251 }
252 }
253
254 //--------------------------------------------------------------------------
255
256 KOKKOS_DEFAULTED_FUNCTION MemoryPool(MemoryPool &&) = default;
257 KOKKOS_DEFAULTED_FUNCTION MemoryPool(const MemoryPool &) = default;
258 KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(MemoryPool &&) = default;
259 KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(const MemoryPool &) = default;
260
261 KOKKOS_INLINE_FUNCTION MemoryPool()
262 : m_tracker(),
263 m_sb_state_array(nullptr),
264 m_sb_state_size(0),
265 m_sb_size_lg2(0),
266 m_max_block_size_lg2(0),
267 m_min_block_size_lg2(0),
268 m_sb_count(0),
269 m_hint_offset(0),
270 m_data_offset(0),
271 m_unused_padding(0) {}
272
287 MemoryPool(const base_memory_space &memspace,
288 const size_t min_total_alloc_size, size_t min_block_alloc_size = 0,
289 size_t max_block_alloc_size = 0, size_t min_superblock_size = 0)
290 : m_tracker(),
291 m_sb_state_array(nullptr),
292 m_sb_state_size(0),
293 m_sb_size_lg2(0),
294 m_max_block_size_lg2(0),
295 m_min_block_size_lg2(0),
296 m_sb_count(0),
297 m_hint_offset(0),
298 m_data_offset(0),
299 m_unused_padding(0) {
300 const uint32_t int_align_lg2 = 3; /* align as int[8] */
301 const uint32_t int_align_mask = (1u << int_align_lg2) - 1;
302 const uint32_t default_min_block_size = 1u << 6; /* 64 bytes */
303 const uint32_t default_max_block_size = 1u << 12; /* 4k bytes */
304 const uint32_t default_min_superblock_size = 1u << 20; /* 1M bytes */
305
306 //--------------------------------------------------
307 // Default block and superblock sizes:
308
309 if (0 == min_block_alloc_size) {
310 // Default all sizes:
311
312 min_superblock_size =
313 std::min(size_t(default_min_superblock_size), min_total_alloc_size);
314
315 min_block_alloc_size =
316 std::min(size_t(default_min_block_size), min_superblock_size);
317
318 max_block_alloc_size =
319 std::min(size_t(default_max_block_size), min_superblock_size);
320 } else if (0 == min_superblock_size) {
321 // Choose superblock size as minimum of:
322 // max_block_per_superblock * min_block_size
323 // max_superblock_size
324 // min_total_alloc_size
325
326 const size_t max_superblock =
327 min_block_alloc_size * max_block_per_superblock;
328
329 min_superblock_size =
330 std::min(max_superblock,
331 std::min(size_t(max_superblock_size), min_total_alloc_size));
332 }
333
334 if (0 == max_block_alloc_size) {
335 max_block_alloc_size = min_superblock_size;
336 }
337
338 //--------------------------------------------------
339
340 /* Enforce size constraints:
341 * min_block_alloc_size <= max_block_alloc_size
342 * max_block_alloc_size <= min_superblock_size
343 * min_superblock_size <= max_superblock_size
344 * min_superblock_size <= min_total_alloc_size
345 * min_superblock_size <= min_block_alloc_size *
346 * max_block_per_superblock
347 */
348
349 Kokkos::Impl::memory_pool_bounds_verification(
350 min_block_alloc_size, max_block_alloc_size, min_superblock_size,
351 max_superblock_size, max_block_per_superblock, min_total_alloc_size);
352
353 //--------------------------------------------------
354 // Block and superblock size is power of two:
355 // Maximum value is 'max_superblock_size'
356
357 m_min_block_size_lg2 =
358 Kokkos::Impl::integral_power_of_two_that_contains(min_block_alloc_size);
359
360 m_max_block_size_lg2 =
361 Kokkos::Impl::integral_power_of_two_that_contains(max_block_alloc_size);
362
363 m_sb_size_lg2 =
364 Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
365
366 {
367 // number of superblocks is multiple of superblock size that
368 // can hold min_total_alloc_size.
369
370 const uint64_t sb_size_mask = (1LU << m_sb_size_lg2) - 1;
371
372 m_sb_count = (min_total_alloc_size + sb_size_mask) >> m_sb_size_lg2;
373 }
374
375 {
376 // Any superblock can be assigned to the smallest size block
377 // Size the block bitset to maximum number of blocks
378
379 const uint32_t max_block_count_lg2 = m_sb_size_lg2 - m_min_block_size_lg2;
380
381 m_sb_state_size =
382 (CB::buffer_bound_lg2(max_block_count_lg2) + int_align_mask) &
383 ~int_align_mask;
384 }
385
386 // Array of all superblock states
387
388 const size_t all_sb_state_size =
389 (m_sb_count * m_sb_state_size + int_align_mask) & ~int_align_mask;
390
391 // Number of block sizes
392
393 const int32_t number_block_sizes =
394 1 + m_max_block_size_lg2 - m_min_block_size_lg2;
395
396 // Array length for possible block sizes
397 // Hint array is one uint32_t per block size
398
399 const int32_t block_size_array_size =
400 (number_block_sizes + int_align_mask) & ~int_align_mask;
401
402 m_hint_offset = all_sb_state_size;
403 m_data_offset = m_hint_offset + block_size_array_size * HINT_PER_BLOCK_SIZE;
404
405 // Allocation:
406
407 const size_t header_size = m_data_offset * sizeof(uint32_t);
408 const size_t alloc_size =
409 header_size + (size_t(m_sb_count) << m_sb_size_lg2);
410
411 Record *rec = Record::allocate(memspace, "Kokkos::MemoryPool", alloc_size);
412
413 m_tracker.assign_allocated_record_to_uninitialized(rec);
414
415 m_sb_state_array = (uint32_t *)rec->data();
416
418
419 uint32_t *const sb_state_array =
420 accessible ? m_sb_state_array : (uint32_t *)host.allocate(header_size);
421
422 for (int32_t i = 0; i < m_data_offset; ++i) sb_state_array[i] = 0;
423
424 // Initial assignment of empty superblocks to block sizes:
425
426 for (int32_t i = 0; i < number_block_sizes; ++i) {
427 const uint32_t block_size_lg2 = i + m_min_block_size_lg2;
428 const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
429 const uint32_t block_state = block_count_lg2 << state_shift;
430 const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE;
431
432 // for block size index 'i':
433 // sb_id_hint = sb_state_array[ hint_begin ];
434 // sb_id_begin = sb_state_array[ hint_begin + 1 ];
435
436 const int32_t jbeg = (i * m_sb_count) / number_block_sizes;
437 const int32_t jend = ((i + 1) * m_sb_count) / number_block_sizes;
438
439 sb_state_array[hint_begin] = uint32_t(jbeg);
440 sb_state_array[hint_begin + 1] = uint32_t(jbeg);
441
442 for (int32_t j = jbeg; j < jend; ++j) {
443 sb_state_array[j * m_sb_state_size] = block_state;
444 }
445 }
446
447 // Write out initialized state:
448
449 if (!accessible) {
450 Kokkos::Impl::DeepCopy<base_memory_space, Kokkos::HostSpace>(
451 m_sb_state_array, sb_state_array, header_size);
452
453 host.deallocate(sb_state_array, header_size);
454 } else {
455 Kokkos::memory_fence();
456 }
457 }
458
459 //--------------------------------------------------------------------------
460
461 private:
462 /* Given a size 'n' get the block size in which it can be allocated.
463 * Restrict lower bound to minimum block size.
464 */
465 KOKKOS_FORCEINLINE_FUNCTION
466 uint32_t get_block_size_lg2(uint32_t n) const noexcept {
467 const unsigned i = Kokkos::Impl::integral_power_of_two_that_contains(n);
468
469 return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i;
470 }
471
472 public:
473 /* Return 0 for invalid block size */
474 KOKKOS_INLINE_FUNCTION
475 uint32_t allocate_block_size(uint64_t alloc_size) const noexcept {
476 return alloc_size <= (1UL << m_max_block_size_lg2)
477 ? (1UL << get_block_size_lg2(uint32_t(alloc_size)))
478 : 0;
479 }
480
481 //--------------------------------------------------------------------------
491 KOKKOS_FUNCTION
492 void *allocate(size_t alloc_size, int32_t attempt_limit = 1) const noexcept {
493 if (size_t(1LU << m_max_block_size_lg2) < alloc_size) {
494 Kokkos::abort(
495 "Kokkos MemoryPool allocation request exceeded specified maximum "
496 "allocation size");
497 }
498
499 if (0 == alloc_size) return nullptr;
500
501 void *p = nullptr;
502
503 const uint32_t block_size_lg2 = get_block_size_lg2(alloc_size);
504
505 // Allocation will fit within a superblock
506 // that has block sizes ( 1 << block_size_lg2 )
507
508 const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
509 const uint32_t block_state = block_count_lg2 << state_shift;
510 const uint32_t block_count = 1u << block_count_lg2;
511
512 // Superblock hints for this block size:
513 // hint_sb_id_ptr[0] is the dynamically changing hint
514 // hint_sb_id_ptr[1] is the static start point
515
516 volatile uint32_t *const hint_sb_id_ptr =
517 m_sb_state_array /* memory pool state array */
518 + m_hint_offset /* offset to hint portion of array */
519 + HINT_PER_BLOCK_SIZE /* number of hints per block size */
520 * (block_size_lg2 - m_min_block_size_lg2); /* block size id */
521
522 const int32_t sb_id_begin = int32_t(hint_sb_id_ptr[1]);
523
524 // Fast query clock register 'tic' to pseudo-randomize
525 // the guess for which block within a superblock should
526 // be claimed. If not available then a search occurs.
527#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GEN)
528 const uint32_t block_id_hint = alloc_size;
529#else
530 const uint32_t block_id_hint =
531 (uint32_t)(Kokkos::Impl::clock_tic()
532#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
533 // Spread out potentially concurrent access
534 // by threads within a warp or thread block.
535 + (threadIdx.x + blockDim.x * threadIdx.y)
536#endif
537 );
538#endif
539
540 // expected state of superblock for allocation
541 uint32_t sb_state = block_state;
542
543 int32_t sb_id = -1;
544
545 volatile uint32_t *sb_state_array = nullptr;
546
547 while (attempt_limit) {
548 int32_t hint_sb_id = -1;
549
550 if (sb_id < 0) {
551 // No superblock specified, try the hint for this block size
552
553 sb_id = hint_sb_id = int32_t(*hint_sb_id_ptr);
554
555 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
556 }
557
558 // Require:
559 // 0 <= sb_id
560 // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id
561
562 if (sb_state == (state_header_mask & *sb_state_array)) {
563 // This superblock state is as expected, for the moment.
564 // Attempt to claim a bit. The attempt updates the state
565 // so have already made sure the state header is as expected.
566
567 const uint32_t count_lg2 = sb_state >> state_shift;
568 const uint32_t mask = (1u << count_lg2) - 1;
569
570 const Kokkos::pair<int, int> result = CB::acquire_bounded_lg2(
571 sb_state_array, count_lg2, block_id_hint & mask, sb_state);
572
573 // If result.first < 0 then failed to acquire
574 // due to either full or buffer was wrong state.
575 // Could be wrong state if a deallocation raced the
576 // superblock to empty before the acquire could succeed.
577
578 if (0 <= result.first) { // acquired a bit
579
580 const uint32_t size_lg2 = m_sb_size_lg2 - count_lg2;
581
582 // Set the allocated block pointer
583
584 p = ((char *)(m_sb_state_array + m_data_offset)) +
585 (uint64_t(sb_id) << m_sb_size_lg2) // superblock memory
586 + (uint64_t(result.first) << size_lg2); // block memory
587
588#if 0
589 printf( " MemoryPool(0x%lx) pointer(0x%lx) allocate(%lu) sb_id(%d) sb_state(0x%x) block_size(%d) block_capacity(%d) block_id(%d) block_claimed(%d)\n"
590 , (uintptr_t)m_sb_state_array
591 , (uintptr_t)p
592 , alloc_size
593 , sb_id
594 , sb_state
595 , (1u << size_lg2)
596 , (1u << count_lg2)
597 , result.first
598 , result.second );
599#endif
600
601 break; // Success
602 }
603 }
604 //------------------------------------------------------------------
605 // Arrive here if failed to acquire a block.
606 // Must find a new superblock.
607
608 // Start searching at designated index for this block size.
609 // Look for superblock that, in preferential order,
610 // 1) part-full superblock of this block size
611 // 2) empty superblock to claim for this block size
612 // 3) part-full superblock of the next larger block size
613
614 sb_state = block_state; // Expect to find the desired state
615 sb_id = -1;
616
617 bool update_hint = false;
618 int32_t sb_id_empty = -1;
619 int32_t sb_id_large = -1;
620 uint32_t sb_state_large = 0;
621
622 sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size;
623
624 for (int32_t i = 0, id = sb_id_begin; i < m_sb_count; ++i) {
625 // Query state of the candidate superblock.
626 // Note that the state may change at any moment
627 // as concurrent allocations and deallocations occur.
628
629 const uint32_t full_state = *sb_state_array;
630 const uint32_t used = full_state & state_used_mask;
631 const uint32_t state = full_state & state_header_mask;
632
633 if (state == block_state) {
634 // Superblock is assigned to this block size
635
636 if (used < block_count) {
637 // There is room to allocate one block
638
639 sb_id = id;
640
641 // Is there room to allocate more than one block?
642
643 update_hint = used + 1 < block_count;
644
645 break;
646 }
647 } else if (0 == used) {
648 // Superblock is empty
649
650 if (-1 == sb_id_empty) {
651 // Superblock is not assigned to this block size
652 // and is the first empty superblock encountered.
653 // Save this id to use if a partfull superblock is not found.
654
655 sb_id_empty = id;
656 }
657 } else if ((-1 == sb_id_empty /* have not found an empty */) &&
658 (-1 == sb_id_large /* have not found a larger */) &&
659 (state < block_state /* a larger block */) &&
660 // is not full:
661 (used < (1u << (state >> state_shift)))) {
662 // First superblock encountered that is
663 // larger than this block size and
664 // has room for an allocation.
665 // Save this id to use of partfull or empty superblock not found
666 sb_id_large = id;
667 sb_state_large = state;
668 }
669
670 // Iterate around the superblock array:
671
672 if (++id < m_sb_count) {
673 sb_state_array += m_sb_state_size;
674 } else {
675 id = 0;
676 sb_state_array = m_sb_state_array;
677 }
678 }
679
680 // printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)
681 // sb_id_large(%d)\n" , m_sb_count , sb_id , sb_id_empty , sb_id_large);
682
683 if (sb_id < 0) {
684 // Did not find a partfull superblock for this block size.
685
686 if (0 <= sb_id_empty) {
687 // Found first empty superblock following designated superblock
688 // Attempt to claim it for this block size.
689 // If the claim fails assume that another thread claimed it
690 // for this block size and try to use it anyway,
691 // but do not update hint.
692
693 sb_id = sb_id_empty;
694
695 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
696
697 // If successfully changed assignment of empty superblock 'sb_id'
698 // to this block_size then update the hint.
699
700 const uint32_t state_empty = state_header_mask & *sb_state_array;
701
702 // If this thread claims the empty block then update the hint
703 update_hint =
704 state_empty == Kokkos::atomic_compare_exchange(
705 sb_state_array, state_empty, block_state);
706 } else if (0 <= sb_id_large) {
707 // Found a larger superblock with space available
708
709 sb_id = sb_id_large;
710 sb_state = sb_state_large;
711
712 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
713 } else {
714 // Did not find a potentially usable superblock
715 --attempt_limit;
716 }
717 }
718
719 if (update_hint) {
720 Kokkos::atomic_compare_exchange(hint_sb_id_ptr, uint32_t(hint_sb_id),
721 uint32_t(sb_id));
722 }
723 } // end allocation attempt loop
724 //--------------------------------------------------------------------
725
726 return p;
727 }
728 // end allocate
729 //--------------------------------------------------------------------------
730
737 KOKKOS_INLINE_FUNCTION
738 void deallocate(void *p, size_t /* alloc_size */) const noexcept {
739 if (nullptr == p) return;
740
741 // Determine which superblock and block
742 const ptrdiff_t d =
743 ((char *)p) - ((char *)(m_sb_state_array + m_data_offset));
744
745 // Verify contained within the memory pool's superblocks:
746 const int ok_contains =
747 (0 <= d) && (size_t(d) < (size_t(m_sb_count) << m_sb_size_lg2));
748
749 int ok_block_aligned = 0;
750 int ok_dealloc_once = 0;
751
752 if (ok_contains) {
753 const int sb_id = d >> m_sb_size_lg2;
754
755 // State array for the superblock.
756 volatile uint32_t *const sb_state_array =
757 m_sb_state_array + (sb_id * m_sb_state_size);
758
759 const uint32_t block_state = (*sb_state_array) & state_header_mask;
760 const uint32_t block_size_lg2 =
761 m_sb_size_lg2 - (block_state >> state_shift);
762
763 ok_block_aligned = 0 == (d & ((1UL << block_size_lg2) - 1));
764
765 if (ok_block_aligned) {
766 // Map address to block's bit
767 // mask into superblock and then shift down for block index
768
769 const uint32_t bit =
770 (d & (ptrdiff_t(1LU << m_sb_size_lg2) - 1)) >> block_size_lg2;
771
772 const int result = CB::release(sb_state_array, bit, block_state);
773
774 ok_dealloc_once = 0 <= result;
775
776#if 0
777 printf( " MemoryPool(0x%lx) pointer(0x%lx) deallocate sb_id(%d) block_size(%d) block_capacity(%d) block_id(%d) block_claimed(%d)\n"
778 , (uintptr_t)m_sb_state_array
779 , (uintptr_t)p
780 , sb_id
781 , (1u << block_size_lg2)
782 , (1u << (m_sb_size_lg2 - block_size_lg2))
783 , bit
784 , result );
785#endif
786 }
787 }
788
789 if (!ok_contains || !ok_block_aligned || !ok_dealloc_once) {
790#if 0
791 printf( " MemoryPool(0x%lx) pointer(0x%lx) deallocate ok_contains(%d) ok_block_aligned(%d) ok_dealloc_once(%d)\n"
792 , (uintptr_t)m_sb_state_array
793 , (uintptr_t)p
794 , int(ok_contains)
795 , int(ok_block_aligned)
796 , int(ok_dealloc_once) );
797#endif
798 Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer");
799 }
800 }
801 // end deallocate
802 //--------------------------------------------------------------------------
803
804 KOKKOS_INLINE_FUNCTION
805 int number_of_superblocks() const noexcept { return m_sb_count; }
806
807 KOKKOS_INLINE_FUNCTION
808 void superblock_state(int sb_id, int &block_size, int &block_count_capacity,
809 int &block_count_used) const noexcept {
810 block_size = 0;
811 block_count_capacity = 0;
812 block_count_used = 0;
813
815 Kokkos::Impl::ActiveExecutionMemorySpace,
816 base_memory_space>::accessible) {
817 // Can access the state array
818
819 const uint32_t state =
820 ((uint32_t volatile *)m_sb_state_array)[sb_id * m_sb_state_size];
821
822 const uint32_t block_count_lg2 = state >> state_shift;
823 const uint32_t block_used = state & state_used_mask;
824
825 block_size = 1LU << (m_sb_size_lg2 - block_count_lg2);
826 block_count_capacity = 1LU << block_count_lg2;
827 block_count_used = block_used;
828 }
829 }
830};
831
832} // namespace Kokkos
833
834#endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */
Atomic functions.
Declaration of parallel operators.
Memory management for host memory.
void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const
Deallocate untracked memory in the space.
void * allocate(const size_t arg_alloc_size) const
Allocate untracked memory in the space.
Access relationship between DstMemorySpace and SrcMemorySpace.
Replacement for std::pair that works on CUDA devices.
Definition: Kokkos_Pair.hpp:65
first_type first
The first element of the pair.
Definition: Kokkos_Pair.hpp:72
second_type second
The second element of the pair.
Definition: Kokkos_Pair.hpp:74