Kokkos Core Kernels Package Version of the Day
Kokkos_Tuners.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
45#ifndef KOKKOS_KOKKOS_TUNERS_HPP
46#define KOKKOS_KOKKOS_TUNERS_HPP
47
48#include <Kokkos_Macros.hpp>
49#include <Kokkos_Core_fwd.hpp>
50#include <Kokkos_ExecPolicy.hpp>
51#include <KokkosExp_MDRangePolicy.hpp>
52#include <impl/Kokkos_Profiling_Interface.hpp>
53
54#include <array>
55#include <utility>
56#include <tuple>
57#include <string>
58#include <vector>
59#include <map>
60#include <cassert>
61
62namespace Kokkos {
63namespace Tools {
64
65namespace Experimental {
66
67// forward declarations
68SetOrRange make_candidate_set(size_t size, int64_t* data);
69bool have_tuning_tool();
70size_t declare_output_type(const std::string&,
71 Kokkos::Tools::Experimental::VariableInfo);
72void request_output_values(size_t, size_t,
73 Kokkos::Tools::Experimental::VariableValue*);
74VariableValue make_variable_value(size_t, int64_t);
75VariableValue make_variable_value(size_t, double);
76SetOrRange make_candidate_range(double lower, double upper, double step,
77 bool openLower, bool openUpper);
78size_t get_new_context_id();
79void begin_context(size_t context_id);
80void end_context(size_t context_id);
81namespace Impl {
82
88template <typename ValueType, typename ContainedType>
89struct ValueHierarchyNode;
90
91template <typename ValueType, typename ContainedType>
93 std::vector<ValueType> root_values;
94 std::vector<ContainedType> sub_values;
95 void add_root_value(const ValueType& in) noexcept {
96 root_values.push_back(in);
97 }
98 void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
99 const ValueType& get_root_value(const size_t index) const {
100 return root_values[index];
101 }
102 const ContainedType& get_sub_value(const size_t index) const {
103 return sub_values[index];
104 }
105};
106
107template <typename ValueType>
108struct ValueHierarchyNode<ValueType, void> {
109 std::vector<ValueType> root_values;
110 explicit ValueHierarchyNode(std::vector<ValueType> rv)
111 : root_values(std::move(rv)) {}
112 void add_root_value(const ValueType& in) noexcept {
113 root_values.push_back(in);
114 }
115 const ValueType& get_root_value(const size_t index) const {
116 return root_values[index];
117 }
118};
119
125template <class NestedMap>
127
128// Vectors are our lowest-level, no nested values
129template <class T>
130struct MapTypeConverter<std::vector<T>> {
131 using type = ValueHierarchyNode<T, void>;
132};
133
134// Maps contain both the "root" types and sub-vectors
135template <class K, class V>
136struct MapTypeConverter<std::map<K, V>> {
138};
139
145template <class NestedMap>
147
148// Vectors are our lowest-level, no nested values. Just fill in the fundamental
149// values
150template <class T>
151struct ValueHierarchyConstructor<std::vector<T>> {
152 using return_type = typename MapTypeConverter<std::vector<T>>::type;
153 static return_type build(const std::vector<T>& in) { return return_type{in}; }
154};
155
156// For maps, we need to fill in the fundamental values, and construct child
157// nodes
158template <class K, class V>
159struct ValueHierarchyConstructor<std::map<K, V>> {
160 using return_type = typename MapTypeConverter<std::map<K, V>>::type;
161 static return_type build(const std::map<K, V>& in) {
162 return_type node_to_build;
163 for (auto& entry : in) {
164 node_to_build.add_root_value(entry.first);
165 node_to_build.add_sub_container(
166 ValueHierarchyConstructor<V>::build(entry.second));
167 }
168 return node_to_build;
169 }
170};
171
180template <class InspectForDepth>
182
183// The dimensionality of a vector is 1
184template <class T>
185struct get_space_dimensionality<std::vector<T>> {
186 static constexpr int value = 1;
187};
188
189// The dimensionality of a map is 1 (the map) plus the dimensionality
190// of the map's value type
191template <class K, class V>
192struct get_space_dimensionality<std::map<K, V>> {
193 static constexpr int value = 1 + get_space_dimensionality<V>::value;
194};
195
196template <class T, int N>
197struct n_dimensional_sparse_structure;
198
199template <class T>
200struct n_dimensional_sparse_structure<T, 1> {
201 using type = std::vector<T>;
202};
203
204template <class T, int N>
205struct n_dimensional_sparse_structure {
206 using type =
207 std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
208};
209
216// First, a helper to get the value in one dimension
217template <class Container>
219
220// At any given level, just return your value at that level
221template <class RootType, class Subtype>
222struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
223 static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
224 double fraction_to_traverse) {
225 size_t index = dimension.root_values.size() * fraction_to_traverse;
226 return dimension.get_root_value(index);
227 }
228};
229
235// At the bottom level, we have one double and a base-level ValueHierarchyNode
236
237template <class HierarchyNode, class... InterpolationIndices>
239
240template <class ValueType>
241struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
242 using node_type = ValueHierarchyNode<ValueType, void>;
243 using return_type = std::tuple<ValueType>;
244 static return_type build(const node_type& in, double index) {
245 return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
246 }
247};
248
249// At levels above the bottom, we tuple_cat the result of our child on the end
250// of our own tuple
251template <class ValueType, class Subtype, class... Indices>
252struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
253 Indices...> {
254 using node_type = ValueHierarchyNode<ValueType, Subtype>;
255 using sub_tuple =
256 typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
257 using return_type = decltype(std::tuple_cat(
258 std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
259 static return_type build(const node_type& in, double fraction_to_traverse,
260 Indices... indices) {
261 size_t index = in.sub_values.size() * fraction_to_traverse;
262 auto dimension_value = std::make_tuple(
263 DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
264 return std::tuple_cat(dimension_value,
265 GetMultidimensionalPoint<Subtype, Indices...>::build(
266 in.get_sub_value(index), indices...));
267 }
268};
269
270template <typename PointType, class ArrayType, size_t... Is>
271auto get_point_helper(const PointType& in, const ArrayType& indices,
272 std::index_sequence<Is...>) {
273 using helper = GetMultidimensionalPoint<
274 PointType,
275 decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
276 return helper::build(in, std::get<Is>(indices).value.double_value...);
277}
278
279template <typename PointType, typename ArrayType>
280struct GetPoint;
281
282template <typename PointType, size_t X>
283struct GetPoint<PointType,
284 std::array<Kokkos::Tools::Experimental::VariableValue, X>> {
285 using index_set_type =
286 std::array<Kokkos::Tools::Experimental::VariableValue, X>;
287 static auto build(const PointType& in, const index_set_type& indices) {
288 return get_point_helper(in, indices, std::make_index_sequence<X>{});
289 }
290};
291
292template <typename PointType, typename ArrayType>
293auto get_point(const PointType& point, const ArrayType& indices) {
294 return GetPoint<PointType, ArrayType>::build(point, indices);
295}
296
297} // namespace Impl
298
299template <template <class...> class Container, size_t MaxDimensionSize = 100,
300 class... TemplateArguments>
301class MultidimensionalSparseTuningProblem {
302 public:
303 using ProblemSpaceInput = Container<TemplateArguments...>;
304 static constexpr int space_dimensionality =
305 Impl::get_space_dimensionality<ProblemSpaceInput>::value;
306 static constexpr size_t max_space_dimension_size = MaxDimensionSize;
307 static constexpr double tuning_min = 0.0;
308 static constexpr double tuning_max = 0.999;
309 static constexpr double tuning_step = tuning_max / max_space_dimension_size;
310
311 using StoredProblemSpace =
312 typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
313 using HierarchyConstructor =
314 typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
315
316 using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
317 space_dimensionality>;
318
319 private:
320 StoredProblemSpace m_space;
321 std::array<size_t, space_dimensionality> variable_ids;
322 size_t context;
323
324 public:
325 MultidimensionalSparseTuningProblem() = default;
326 MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
327 const std::vector<std::string>& names)
328 : m_space(HierarchyConstructor::build(space)) {
329 assert(names.size() == space_dimensionality);
330 for (unsigned long x = 0; x < names.size(); ++x) {
331 VariableInfo info;
332 info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
333 info.category = Kokkos::Tools::Experimental::StatisticalCategory::
334 kokkos_value_interval;
335 info.valueQuantity =
336 Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
337 info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
338 tuning_min, tuning_max, tuning_step, true, true);
339 variable_ids[x] = declare_output_type(names[x], info);
340 }
341 }
342
343 auto begin() {
344 context = Kokkos::Tools::Experimental::get_new_context_id();
345 ValueArray values;
346 for (int x = 0; x < space_dimensionality; ++x) {
347 values[x] = Kokkos::Tools::Experimental::make_variable_value(
348 variable_ids[x], 0.0);
349 }
350 begin_context(context);
351 request_output_values(context, space_dimensionality, values.data());
352 return get_point(m_space, values);
353 }
354
355 auto end() { end_context(context); }
356};
357
358template <size_t MaxDimensionSize = 100, template <class...> class Container,
359 class... TemplateArguments>
360auto make_multidimensional_sparse_tuning_problem(
361 const Container<TemplateArguments...>& in, std::vector<std::string> names) {
362 return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
363 TemplateArguments...>(in, names);
364}
365class TeamSizeTuner {
366 private:
367 using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
368 using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
369 std::declval<SpaceDescription>(),
370 std::declval<std::vector<std::string>>()));
371 TunerType tuner;
372
373 public:
374 TeamSizeTuner() = default;
375 TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
376 TeamSizeTuner(const TeamSizeTuner& other) = default;
377 TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
378 TeamSizeTuner(TeamSizeTuner&& other) = default;
379 template <typename ViableConfigurationCalculator, typename Functor,
380 typename TagType, typename... Properties>
381 TeamSizeTuner(const std::string& name,
383 const Functor& functor, const TagType& tag,
384 ViableConfigurationCalculator calc) {
385 using PolicyType = Kokkos::TeamPolicy<Properties...>;
386 auto initial_vector_length = policy.impl_vector_length();
387 if (initial_vector_length < 1) {
388 policy.impl_set_vector_length(1);
389 }
415 SpaceDescription space_description;
416
417 auto max_vector_length = PolicyType::vector_length_max();
418 std::vector<int64_t> allowed_vector_lengths;
419
420 if (policy.impl_auto_vector_length()) { // case 1 or 2
421 for (int vector_length = max_vector_length; vector_length >= 1;
422 vector_length /= 2) {
423 policy.impl_set_vector_length(vector_length);
436 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
437 if ((policy.impl_auto_team_size()) ||
438 (policy.team_size() <= max_team_size)) {
439 allowed_vector_lengths.push_back(vector_length);
440 }
441 }
442 } else { // case 3, there's only one vector length to care about
443 allowed_vector_lengths.push_back(policy.impl_vector_length());
444 }
445
446 for (const auto vector_length : allowed_vector_lengths) {
447 std::vector<int64_t> allowed_team_sizes;
448 policy.impl_set_vector_length(vector_length);
449 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
450 if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
451 // sizes
452 for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
453 allowed_team_sizes.push_back(team_size);
454 }
455 } else { // case 2, just try the provided team size
456 allowed_team_sizes.push_back(policy.team_size());
457 }
458 space_description[vector_length] = allowed_team_sizes;
459 }
460 tuner = make_multidimensional_sparse_tuning_problem<20>(
461 space_description, {std::string(name + "_vector_length"),
462 std::string(name + "_team_size")});
463 policy.impl_set_vector_length(initial_vector_length);
464 }
465
466 template <typename... Properties>
467 void tune(Kokkos::TeamPolicy<Properties...>& policy) {
468 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
469 auto configuration = tuner.begin();
470 auto team_size = std::get<1>(configuration);
471 auto vector_length = std::get<0>(configuration);
472 if (vector_length > 0) {
473 policy.impl_set_team_size(team_size);
474 policy.impl_set_vector_length(vector_length);
475 }
476 }
477 }
478 void end() {
479 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
480 tuner.end();
481 }
482 }
483
484 private:
485};
486
487namespace Impl {
488
489template <typename T>
490void fill_tile(std::vector<T>& cont, int tile_size) {
491 for (int x = 1; x < tile_size; x *= 2) {
492 cont.push_back(x);
493 }
494}
495template <typename T, typename Mapped>
496void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
497 for (int x = 1; x < tile_size; x *= 2) {
498 fill_tile(cont[x], tile_size / x);
499 }
500}
501} // namespace Impl
502
503template <int MDRangeRank>
504struct MDRangeTuner {
505 private:
506 static constexpr int rank = MDRangeRank;
507 static constexpr int max_slices = 15;
508 using SpaceDescription =
509 typename Impl::n_dimensional_sparse_structure<int, rank>::type;
510 using TunerType =
511 decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
512 std::declval<SpaceDescription>(),
513 std::declval<std::vector<std::string>>()));
514 TunerType tuner;
515
516 public:
517 MDRangeTuner() = default;
518 template <typename Functor, typename TagType, typename Calculator,
519 typename... Properties>
520 MDRangeTuner(const std::string& name,
521 const Kokkos::MDRangePolicy<Properties...>& policy,
522 const Functor& functor, const TagType& tag, Calculator calc) {
523 SpaceDescription desc;
524 int max_tile_size =
525 calc.get_mdrange_max_tile_size_product(policy, functor, tag);
526 Impl::fill_tile(desc, max_tile_size);
527 std::vector<std::string> feature_names;
528 for (int x = 0; x < rank; ++x) {
529 feature_names.push_back(name + "_tile_size_" + std::to_string(x));
530 }
531 tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
532 desc, feature_names);
533 }
534 template <typename Policy, typename Tuple, size_t... Indices>
535 void set_policy_tile(Policy& policy, const Tuple& tuple,
536 const std::index_sequence<Indices...>&) {
537 policy.impl_change_tile_size({std::get<Indices>(tuple)...});
538 }
539 template <typename... Properties>
540 void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
541 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
542 auto configuration = tuner.begin();
543 set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
544 }
545 }
546 void end() {
547 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
548 tuner.end();
549 }
550 }
551};
552
553} // namespace Experimental
554} // namespace Tools
555} // namespace Kokkos
556
557#endif
Execution policy for parallel work over a league of teams of threads.