44 #ifndef KOKKOS_EXECPOLICY_HPP 45 #define KOKKOS_EXECPOLICY_HPP 47 #include <Kokkos_Core_fwd.hpp> 48 #include <impl/Kokkos_Traits.hpp> 49 #include <impl/Kokkos_StaticAssert.hpp> 50 #include <impl/Kokkos_Error.hpp> 51 #include <impl/Kokkos_Tags.hpp> 52 #include <impl/Kokkos_AnalyzePolicy.hpp> 53 #include <Kokkos_Concepts.hpp> 81 template<
class ... Properties>
83 :
public Impl::PolicyTraits<Properties ... >
86 typedef Impl::PolicyTraits<Properties ... > traits;
88 typename traits::execution_space m_space ;
89 typename traits::index_type m_begin ;
90 typename traits::index_type m_end ;
91 typename traits::index_type m_granularity ;
92 typename traits::index_type m_granularity_mask ;
97 typedef typename traits::index_type member_type ;
99 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space & space()
const {
return m_space ; }
100 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
101 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
106 void operator()(
const int&)
const {}
108 RangePolicy(
const RangePolicy&) =
default;
109 RangePolicy(RangePolicy&&) =
default;
111 inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
116 ,
const member_type work_begin
117 ,
const member_type work_end
119 : m_space( work_space )
120 , m_begin( work_begin < work_end ? work_begin : 0 )
121 , m_end( work_begin < work_end ? work_end : 0 )
123 , m_granularity_mask(0)
125 set_auto_chunk_size();
131 ,
const member_type work_end
134 , work_begin , work_end )
140 return m_granularity;
146 p.m_granularity = chunk_size_;
147 p.m_granularity_mask = p.m_granularity - 1;
153 inline void set_auto_chunk_size() {
155 typename traits::index_type concurrency = traits::execution_space::concurrency();
156 if( concurrency==0 ) concurrency=1;
158 if(m_granularity > 0) {
159 if(!Impl::is_integral_power_of_two( m_granularity ))
160 Kokkos::abort(
"RangePolicy blocking granularity must be power of two" );
163 member_type new_chunk_size = 1;
164 while(new_chunk_size*100*concurrency < m_end-m_begin)
166 if(new_chunk_size < 128) {
168 while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
171 m_granularity = new_chunk_size;
172 m_granularity_mask = m_granularity - 1;
181 typedef typename RangePolicy::work_tag work_tag ;
182 typedef typename RangePolicy::member_type member_type ;
184 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
185 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
191 KOKKOS_INLINE_FUNCTION
193 ,
const int part_rank
194 ,
const int part_size
196 : m_begin(0), m_end(0)
201 const member_type work_part =
202 ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
203 + range.m_granularity_mask ) & ~member_type(range.m_granularity_mask);
205 m_begin = range.begin() + work_part * part_rank ;
206 m_end = m_begin + work_part ;
208 if ( range.end() < m_begin ) m_begin = range.end() ;
209 if ( range.end() < m_end ) m_end = range.end() ;
214 member_type m_begin ;
230 template<
class ExecSpace,
class ... Properties>
231 class TeamPolicyInternal:
public Impl::PolicyTraits<Properties ... > {
233 typedef Impl::PolicyTraits<Properties ... > traits;
248 template<
class FunctorType >
249 static int team_size_max(
const FunctorType & );
261 template<
class FunctorType >
262 static int team_size_recommended(
const FunctorType & );
264 template<
class FunctorType >
265 static int team_size_recommended(
const FunctorType & ,
const int&);
268 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
270 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
273 TeamPolicyInternal(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
275 TeamPolicyInternal(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
286 KOKKOS_INLINE_FUNCTION
int league_size()
const ;
293 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
295 inline typename traits::index_type chunk_size()
const ;
297 inline TeamPolicyInternal set_chunk_size(
int chunk_size)
const ;
305 KOKKOS_INLINE_FUNCTION
306 typename traits::execution_space::scratch_memory_space
team_shmem()
const ;
315 KOKKOS_INLINE_FUNCTION
int team_rank()
const ;
318 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
324 template<
class JoinOp >
325 KOKKOS_INLINE_FUNCTION
326 typename JoinOp::value_type
team_reduce(
const typename JoinOp::value_type
327 ,
const JoinOp & )
const ;
334 template<
typename Type >
335 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value )
const ;
346 template<
typename Type >
347 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value , Type *
const global_accum )
const ;
351 struct PerTeamValue {
353 PerTeamValue(
int arg);
356 struct PerThreadValue {
358 PerThreadValue(
int arg);
363 Impl::PerTeamValue PerTeam(
const int& arg);
364 Impl::PerThreadValue PerThread(
const int& arg);
388 template<
class ... Properties>
390 Impl::TeamPolicyInternal<
391 typename Impl::PolicyTraits<Properties ... >::execution_space,
393 typedef Impl::TeamPolicyInternal<
394 typename Impl::PolicyTraits<Properties ... >::execution_space,
395 Properties ...> internal_policy;
397 typedef Impl::PolicyTraits<Properties ... > traits;
405 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
406 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {}
408 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
409 : internal_policy(typename traits::execution_space(),league_size_request,
Kokkos::AUTO(), vector_length_request) {}
412 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
413 : internal_policy(league_size_request,team_size_request, vector_length_request) {}
415 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
416 : internal_policy(league_size_request,
Kokkos::AUTO(), vector_length_request) {}
425 TeamPolicy(
const internal_policy& p):internal_policy(p) {}
428 inline TeamPolicy set_chunk_size(
int chunk)
const {
429 return TeamPolicy(internal_policy::set_chunk_size(chunk));
432 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team)
const {
433 return TeamPolicy(internal_policy::set_scratch_size(level,per_team));
435 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread)
const {
436 return TeamPolicy(internal_policy::set_scratch_size(level,per_thread));
438 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team,
const Impl::PerThreadValue& per_thread)
const {
439 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
441 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread,
const Impl::PerTeamValue& per_team)
const {
442 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
449 template<
typename iType,
class TeamMemberType>
450 struct TeamThreadRangeBoundariesStruct {
453 KOKKOS_INLINE_FUNCTION
static 454 iType ibegin(
const iType & arg_begin
455 ,
const iType & arg_end
456 ,
const iType & arg_rank
457 ,
const iType & arg_size
460 return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
463 KOKKOS_INLINE_FUNCTION
static 464 iType iend(
const iType & arg_begin
465 ,
const iType & arg_end
466 ,
const iType & arg_rank
467 ,
const iType & arg_size
470 const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
471 return end_ < arg_end ? end_ : arg_end ;
476 typedef iType index_type;
479 enum {increment = 1};
480 const TeamMemberType& thread;
482 KOKKOS_INLINE_FUNCTION
483 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
484 ,
const iType& arg_end
486 : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
487 , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
488 , thread( arg_thread )
491 KOKKOS_INLINE_FUNCTION
492 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
493 ,
const iType& arg_begin
494 ,
const iType& arg_end
496 : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
497 , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
498 , thread( arg_thread )
502 template<
typename iType,
class TeamMemberType>
503 struct ThreadVectorRangeBoundariesStruct {
504 typedef iType index_type;
507 enum {increment = 1};
509 KOKKOS_INLINE_FUNCTION
510 ThreadVectorRangeBoundariesStruct (
const TeamMemberType& thread,
const iType& count ) : end( count ) {}
513 template<
class TeamMemberType>
514 struct ThreadSingleStruct {
515 const TeamMemberType& team_member;
516 KOKKOS_INLINE_FUNCTION
517 ThreadSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
520 template<
class TeamMemberType>
521 struct VectorSingleStruct {
522 const TeamMemberType& team_member;
523 KOKKOS_INLINE_FUNCTION
524 VectorSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
535 template<
typename iType,
class TeamMemberType>
536 KOKKOS_INLINE_FUNCTION
537 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
546 template<
typename iType1,
typename iType2,
class TeamMemberType>
547 KOKKOS_INLINE_FUNCTION
548 Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType>
549 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
const iType2& end );
557 template<
typename iType,
class TeamMemberType>
558 KOKKOS_INLINE_FUNCTION
559 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy execution_policy
Tag this class as an execution policy.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
TeamPolicy(const typename traits::execution_space &, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.