48#ifndef KOKKOS_PARALLEL_HPP
49#define KOKKOS_PARALLEL_HPP
52#include <Kokkos_Core_fwd.hpp>
53#include <Kokkos_View.hpp>
54#include <Kokkos_ExecPolicy.hpp>
56#include <impl/Kokkos_Tools.hpp>
60#include <impl/Kokkos_Tags.hpp>
61#include <impl/Kokkos_Traits.hpp>
62#include <impl/Kokkos_FunctorAnalysis.hpp>
63#include <impl/Kokkos_FunctorAdapter.hpp>
65#ifdef KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
75template <
class T,
class =
void>
76struct is_detected_execution_space : std::false_type {
77 using type = not_a_type;
81struct is_detected_execution_space<T, void_t<typename T::execution_space>>
83 using type =
typename T::execution_space;
87using detected_execution_space_t =
88 typename is_detected_execution_space<T>::type;
90template <
class T,
class =
void>
91struct is_detected_device_type : std::false_type {
92 using type = not_a_type;
96struct is_detected_device_type<T, void_t<typename T::device_type>>
98 using type =
typename T::device_type;
102using detected_device_type_t =
typename is_detected_device_type<T>::type;
113template <
class Functor,
class Policy>
115 using execution_space = std::conditional_t<
116 is_detected_execution_space<Policy>::value,
117 detected_execution_space_t<Policy>,
119 is_detected_execution_space<Functor>::value,
120 detected_execution_space_t<Functor>,
122 is_detected_device_type<Functor>::value,
123 detected_execution_space_t<detected_device_type_t<Functor>>,
124 Kokkos::DefaultExecutionSpace>>>;
156template <
class ExecPolicy,
class FunctorType>
158 const ExecPolicy& policy,
const FunctorType& functor,
159 const std::string& str =
"",
160 typename std::enable_if<
161 Kokkos::Impl::is_execution_policy<ExecPolicy>::value>::type* =
165 ExecPolicy inner_policy = policy;
166 Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
168 Kokkos::Impl::shared_allocation_tracking_disable();
170 Kokkos::Impl::shared_allocation_tracking_enable();
174 Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
177template <
class FunctorType>
178inline void parallel_for(
const size_t work_count,
const FunctorType& functor,
179 const std::string& str =
"") {
180 using execution_space =
181 typename Impl::FunctorPolicyExecutionSpace<FunctorType,
182 void>::execution_space;
183 using policy = RangePolicy<execution_space>;
187 policy execution_policy = policy(0, work_count);
189 Kokkos::Tools::Impl::begin_parallel_for(execution_policy, functor, str, kpID);
191 Kokkos::Impl::shared_allocation_tracking_disable();
192 Impl::ParallelFor<FunctorType, policy> closure(functor, execution_policy);
193 Kokkos::Impl::shared_allocation_tracking_enable();
197 Kokkos::Tools::Impl::end_parallel_for(execution_policy, functor, str, kpID);
200template <
class ExecPolicy,
class FunctorType>
201inline void parallel_for(
const std::string& str,
const ExecPolicy& policy,
202 const FunctorType& functor) {
203#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
205 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
208 ::Kokkos::parallel_for(policy, functor, str);
210#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
212 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
219#include <Kokkos_Parallel_Reduce.hpp>
387template <
class ExecutionPolicy,
class FunctorType>
388inline void parallel_scan(
389 const ExecutionPolicy& policy,
const FunctorType& functor,
390 const std::string& str =
"",
391 typename std::enable_if<
392 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
395 ExecutionPolicy inner_policy = policy;
396 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
398 Kokkos::Impl::shared_allocation_tracking_disable();
399 Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
401 Kokkos::Impl::shared_allocation_tracking_enable();
405 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
408template <
class FunctorType>
409inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
410 const std::string& str =
"") {
411 using execution_space =
413 void>::execution_space;
418 policy execution_policy(0, work_count);
419 Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
421 Kokkos::Impl::shared_allocation_tracking_disable();
422 Impl::ParallelScan<FunctorType, policy> closure(functor, execution_policy);
423 Kokkos::Impl::shared_allocation_tracking_enable();
427 Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
430template <
class ExecutionPolicy,
class FunctorType>
431inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
432 const FunctorType& functor) {
433#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
435 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
438 ::Kokkos::parallel_scan(policy, functor, str);
440#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
442 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
447template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
448inline void parallel_scan(
449 const ExecutionPolicy& policy,
const FunctorType& functor,
450 ReturnType& return_value,
const std::string& str =
"",
451 typename std::enable_if<
452 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
455 ExecutionPolicy inner_policy = policy;
456 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
458 Kokkos::Impl::shared_allocation_tracking_disable();
459 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType> closure(
460 functor, inner_policy, return_value);
461 Kokkos::Impl::shared_allocation_tracking_enable();
465 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
467 policy.space().fence();
470template <
class FunctorType,
class ReturnType>
471inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
473 const std::string& str =
"") {
474 using execution_space =
476 void>::execution_space;
480 policy execution_policy(0, work_count);
482 Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
485 Kokkos::Impl::shared_allocation_tracking_disable();
486 Impl::ParallelScanWithTotal<FunctorType, policy, ReturnType> closure(
487 functor, execution_policy, return_value);
488 Kokkos::Impl::shared_allocation_tracking_enable();
492 Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
494 execution_space().fence();
497template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
498inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
499 const FunctorType& functor,
501#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
503 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
506 ::Kokkos::parallel_scan(policy, functor, return_value, str);
508#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
510 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
523template <
class FunctorType,
524 bool HasTeamShmemSize =
525 has_member_team_shmem_size<FunctorType>::value,
526 bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
527struct FunctorTeamShmemSize {
528 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType&,
int) {
533template <
class FunctorType>
534struct FunctorTeamShmemSize<FunctorType, true, false> {
535 static inline size_t value(
const FunctorType& f,
int team_size) {
536 return f.team_shmem_size(team_size);
540template <
class FunctorType>
541struct FunctorTeamShmemSize<FunctorType, false, true> {
542 static inline size_t value(
const FunctorType& f,
int team_size) {
543 return f.shmem_size(team_size);
546template <
class FunctorType>
547struct FunctorTeamShmemSize<FunctorType, true, true> {
548 static inline size_t value(
const FunctorType& ,
int ) {
550 "Functor with both team_shmem_size and shmem_size defined is "
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
Implementation of the ParallelFor operator that has a partial specialization for the device.
Execution policy for work over a range of an integral type.
Given a Functor and Execution Policy query an execution space.