Kokkos Core Kernels Package  Version of the Day
Kokkos_Serial.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
46 
47 #ifndef KOKKOS_SERIAL_HPP
48 #define KOKKOS_SERIAL_HPP
49 
50 #include <cstddef>
51 #include <iosfwd>
52 #include <Kokkos_Parallel.hpp>
53 #include <Kokkos_TaskScheduler.hpp>
54 #include <Kokkos_Layout.hpp>
55 #include <Kokkos_HostSpace.hpp>
56 #include <Kokkos_ScratchSpace.hpp>
57 #include <Kokkos_MemoryTraits.hpp>
58 #include <impl/Kokkos_Tags.hpp>
59 #include <impl/Kokkos_FunctorAdapter.hpp>
60 #include <impl/Kokkos_Profiling_Interface.hpp>
61 
62 #include <KokkosExp_MDRangePolicy.hpp>
63 
64 #if defined( KOKKOS_HAVE_SERIAL )
65 
66 namespace Kokkos {
67 
80 class Serial {
81 public:
83 
84 
86  typedef Serial execution_space ;
88  typedef HostSpace::size_type size_type ;
90  typedef HostSpace memory_space ;
93 
95  typedef LayoutRight array_layout ;
96 
98  typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
99 
101 
108  inline static int in_parallel() { return false ; }
109 
121  static bool sleep();
122 
128  static bool wake();
129 
136  static void fence() {}
137 
138  static void initialize( unsigned threads_count = 1 ,
139  unsigned use_numa_count = 0 ,
140  unsigned use_cores_per_numa = 0 ,
141  bool allow_asynchronous_threadpool = false) {
142  (void) threads_count;
143  (void) use_numa_count;
144  (void) use_cores_per_numa;
145  (void) allow_asynchronous_threadpool;
146 
147  // Init the array of locks used for arbitrarily sized atomics
148  Impl::init_lock_array_host_space();
149  #if (KOKKOS_ENABLE_PROFILING)
150  Kokkos::Profiling::initialize();
151  #endif
152  }
153 
154  static int is_initialized() { return 1 ; }
155 
157  static int concurrency() {return 1;};
158 
160  static void finalize() {
161  #if (KOKKOS_ENABLE_PROFILING)
162  Kokkos::Profiling::finalize();
163  #endif
164  }
165 
167  static void print_configuration( std::ostream & , const bool /* detail */ = false ) {}
168 
169  //--------------------------------------------------------------------------
170 
171  inline static int thread_pool_size( int = 0 ) { return 1 ; }
172  KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
173 
174  //--------------------------------------------------------------------------
175 
176  KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
177  inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
178 
179  //--------------------------------------------------------------------------
180 
181  static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size );
182 
183  //--------------------------------------------------------------------------
184 };
185 
186 } // namespace Kokkos
187 
188 /*--------------------------------------------------------------------------*/
189 /*--------------------------------------------------------------------------*/
190 
191 namespace Kokkos {
192 namespace Impl {
193 
194 template<>
195 struct VerifyExecutionCanAccessMemorySpace
196  < Kokkos::Serial::memory_space
197  , Kokkos::Serial::scratch_memory_space
198  >
199 {
200  enum { value = true };
201  inline static void verify( void ) { }
202  inline static void verify( const void * ) { }
203 };
204 
205 namespace SerialImpl {
206 
207 struct Sentinel {
208 
209  void * m_scratch ;
210  unsigned m_reduce_end ;
211  unsigned m_shared_end ;
212 
213  Sentinel();
214  ~Sentinel();
215  static Sentinel & singleton();
216 };
217 
218 inline
219 unsigned align( unsigned n );
220 }
221 } // namespace Impl
222 } // namespace Kokkos
223 
224 /*--------------------------------------------------------------------------*/
225 /*--------------------------------------------------------------------------*/
226 
227 namespace Kokkos {
228 namespace Impl {
229 
230 class SerialTeamMember {
231 private:
232  typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
233  const scratch_memory_space m_space ;
234  const int m_league_rank ;
235  const int m_league_size ;
236 
237  SerialTeamMember & operator = ( const SerialTeamMember & );
238 
239 public:
240 
241  KOKKOS_INLINE_FUNCTION
242  const scratch_memory_space & team_shmem() const { return m_space ; }
243 
244  KOKKOS_INLINE_FUNCTION
245  const scratch_memory_space & team_scratch(int) const
246  { return m_space ; }
247 
248  KOKKOS_INLINE_FUNCTION
249  const scratch_memory_space & thread_scratch(int) const
250  { return m_space ; }
251 
252  KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
253  KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
254  KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
255  KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
256 
257  KOKKOS_INLINE_FUNCTION void team_barrier() const {}
258 
259  template<class ValueType>
260  KOKKOS_INLINE_FUNCTION
261  void team_broadcast(const ValueType& , const int& ) const {}
262 
263  template< class ValueType, class JoinOp >
264  KOKKOS_INLINE_FUNCTION
265  ValueType team_reduce( const ValueType & value , const JoinOp & ) const
266  {
267  return value ;
268  }
269 
279  template< typename Type >
280  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
281  {
282  const Type tmp = global_accum ? *global_accum : Type(0) ;
283  if ( global_accum ) { *global_accum += value ; }
284  return tmp ;
285  }
286 
292  template< typename Type >
293  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const
294  { return Type(0); }
295 
296  //----------------------------------------
297  // Execution space specific:
298 
299  SerialTeamMember( int arg_league_rank
300  , int arg_league_size
301  , int arg_shared_size
302  );
303 };
304 
305 } // namespace Impl
306 
307 /*
308  * < Kokkos::Serial , WorkArgTag >
309  * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
310  *
311  */
312 namespace Impl {
313 template< class ... Properties >
314 class TeamPolicyInternal< Kokkos::Serial , Properties ... >:public PolicyTraits<Properties...>
315 {
316 private:
317 
318  size_t m_team_scratch_size[2] ;
319  size_t m_thread_scratch_size[2] ;
320  int m_league_size ;
321  int m_chunk_size;
322 
323 public:
324 
326  typedef TeamPolicyInternal execution_policy ;
327 
328  typedef PolicyTraits<Properties ... > traits;
329 
331  typedef Kokkos::Serial execution_space ;
332 
333  TeamPolicyInternal& operator = (const TeamPolicyInternal& p) {
334  m_league_size = p.m_league_size;
335  m_team_scratch_size[0] = p.m_team_scratch_size[0];
336  m_thread_scratch_size[0] = p.m_thread_scratch_size[0];
337  m_team_scratch_size[1] = p.m_team_scratch_size[1];
338  m_thread_scratch_size[1] = p.m_thread_scratch_size[1];
339  m_chunk_size = p.m_chunk_size;
340  return *this;
341  }
342 
343  //----------------------------------------
344 
345  template< class FunctorType >
346  static
347  int team_size_max( const FunctorType & ) { return 1 ; }
348 
349  template< class FunctorType >
350  static
351  int team_size_recommended( const FunctorType & ) { return 1 ; }
352 
353  template< class FunctorType >
354  static
355  int team_size_recommended( const FunctorType & , const int& ) { return 1 ; }
356 
357  //----------------------------------------
358 
359  inline int team_size() const { return 1 ; }
360  inline int league_size() const { return m_league_size ; }
361  inline size_t scratch_size(const int& level, int = 0) const { return m_team_scratch_size[level] + m_thread_scratch_size[level]; }
362 
364  TeamPolicyInternal( execution_space &
365  , int league_size_request
366  , int /* team_size_request */
367  , int /* vector_length_request */ = 1 )
368  : m_team_scratch_size { 0 , 0 }
369  , m_thread_scratch_size { 0 , 0 }
370  , m_league_size( league_size_request )
371  , m_chunk_size ( 32 )
372  {}
373 
374  TeamPolicyInternal( execution_space &
375  , int league_size_request
376  , const Kokkos::AUTO_t & /* team_size_request */
377  , int /* vector_length_request */ = 1 )
378  : m_team_scratch_size { 0 , 0 }
379  , m_thread_scratch_size { 0 , 0 }
380  , m_league_size( league_size_request )
381  , m_chunk_size ( 32 )
382  {}
383 
384  TeamPolicyInternal( int league_size_request
385  , int /* team_size_request */
386  , int /* vector_length_request */ = 1 )
387  : m_team_scratch_size { 0 , 0 }
388  , m_thread_scratch_size { 0 , 0 }
389  , m_league_size( league_size_request )
390  , m_chunk_size ( 32 )
391  {}
392 
393  TeamPolicyInternal( int league_size_request
394  , const Kokkos::AUTO_t & /* team_size_request */
395  , int /* vector_length_request */ = 1 )
396  : m_team_scratch_size { 0 , 0 }
397  , m_thread_scratch_size { 0 , 0 }
398  , m_league_size( league_size_request )
399  , m_chunk_size ( 32 )
400  {}
401 
402  inline int chunk_size() const { return m_chunk_size ; }
403 
405  inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const {
406  TeamPolicyInternal p = *this;
407  p.m_chunk_size = chunk_size_;
408  return p;
409  }
410 
412  inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const {
413  TeamPolicyInternal p = *this;
414  p.m_team_scratch_size[level] = per_team.value;
415  return p;
416  };
417 
419  inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const {
420  TeamPolicyInternal p = *this;
421  p.m_thread_scratch_size[level] = per_thread.value;
422  return p;
423  };
424 
426  inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const {
427  TeamPolicyInternal p = *this;
428  p.m_team_scratch_size[level] = per_team.value;
429  p.m_thread_scratch_size[level] = per_thread.value;
430  return p;
431  };
432 
433  typedef Impl::SerialTeamMember member_type ;
434 };
435 } /* namespace Impl */
436 } /* namespace Kokkos */
437 
438 /*--------------------------------------------------------------------------*/
439 /*--------------------------------------------------------------------------*/
440 
441 /*--------------------------------------------------------------------------*/
442 /*--------------------------------------------------------------------------*/
443 /* Parallel patterns for Kokkos::Serial with RangePolicy */
444 
445 namespace Kokkos {
446 namespace Impl {
447 
448 template< class FunctorType , class ... Traits >
449 class ParallelFor< FunctorType ,
450  Kokkos::RangePolicy< Traits ... > ,
451  Kokkos::Serial
452  >
453 {
454 private:
455 
456  typedef Kokkos::RangePolicy< Traits ... > Policy ;
457 
458  const FunctorType m_functor ;
459  const Policy m_policy ;
460 
461  template< class TagType >
462  typename std::enable_if< std::is_same< TagType , void >::value >::type
463  exec() const
464  {
465  const typename Policy::member_type e = m_policy.end();
466  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
467  m_functor( i );
468  }
469  }
470 
471  template< class TagType >
472  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
473  exec() const
474  {
475  const TagType t{} ;
476  const typename Policy::member_type e = m_policy.end();
477  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
478  m_functor( t , i );
479  }
480  }
481 
482 public:
483 
484  inline
485  void execute() const
486  { this-> template exec< typename Policy::work_tag >(); }
487 
488  inline
489  ParallelFor( const FunctorType & arg_functor
490  , const Policy & arg_policy )
491  : m_functor( arg_functor )
492  , m_policy( arg_policy )
493  {}
494 };
495 
496 /*--------------------------------------------------------------------------*/
497 
498 template< class FunctorType , class ReducerType , class ... Traits >
499 class ParallelReduce< FunctorType
500  , Kokkos::RangePolicy< Traits ... >
501  , ReducerType
502  , Kokkos::Serial
503  >
504 {
505 private:
506 
507  typedef Kokkos::RangePolicy< Traits ... > Policy ;
508  typedef typename Policy::work_tag WorkTag ;
509 
510  typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
511  typedef typename ReducerConditional::type ReducerTypeFwd;
512 
513  typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
514  typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
515 
516  typedef typename ValueTraits::pointer_type pointer_type ;
517  typedef typename ValueTraits::reference_type reference_type ;
518 
519  const FunctorType m_functor ;
520  const Policy m_policy ;
521  const ReducerType m_reducer ;
522  const pointer_type m_result_ptr ;
523 
524  template< class TagType >
525  inline
526  typename std::enable_if< std::is_same< TagType , void >::value >::type
527  exec( pointer_type ptr ) const
528  {
529  reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr );
530 
531  const typename Policy::member_type e = m_policy.end();
532  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
533  m_functor( i , update );
534  }
535 
536  Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >::
537  final( ReducerConditional::select(m_functor , m_reducer) , ptr );
538  }
539 
540  template< class TagType >
541  inline
542  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
543  exec( pointer_type ptr ) const
544  {
545  const TagType t{} ;
546  reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr );
547 
548  const typename Policy::member_type e = m_policy.end();
549  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
550  m_functor( t , i , update );
551  }
552 
553  Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >::
554  final( ReducerConditional::select(m_functor , m_reducer) , ptr );
555  }
556 
557 public:
558 
559  inline
560  void execute() const
561  {
562  pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
563  ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 );
564 
565  this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
566  }
567 
568  template< class HostViewType >
569  ParallelReduce( const FunctorType & arg_functor ,
570  const Policy & arg_policy ,
571  const HostViewType & arg_result_view ,
572  typename std::enable_if<
573  Kokkos::is_view< HostViewType >::value &&
574  !Kokkos::is_reducer_type<ReducerType>::value
575  ,void*>::type = NULL)
576  : m_functor( arg_functor )
577  , m_policy( arg_policy )
578  , m_reducer( InvalidType() )
579  , m_result_ptr( arg_result_view.ptr_on_device() )
580  {
581  static_assert( Kokkos::is_view< HostViewType >::value
582  , "Kokkos::Serial reduce result must be a View" );
583 
584  static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value
585  , "Kokkos::Serial reduce result must be a View in HostSpace" );
586  }
587 
588  inline
589  ParallelReduce( const FunctorType & arg_functor
590  , Policy arg_policy
591  , const ReducerType& reducer )
592  : m_functor( arg_functor )
593  , m_policy( arg_policy )
594  , m_reducer( reducer )
595  , m_result_ptr( reducer.result_view().data() )
596  {
597  /*static_assert( std::is_same< typename ViewType::memory_space
598  , Kokkos::HostSpace >::value
599  , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/
600  }
601 };
602 
603 /*--------------------------------------------------------------------------*/
604 
605 template< class FunctorType , class ... Traits >
606 class ParallelScan< FunctorType
607  , Kokkos::RangePolicy< Traits ... >
608  , Kokkos::Serial
609  >
610 {
611 private:
612 
613  typedef Kokkos::RangePolicy< Traits ... > Policy ;
614  typedef typename Policy::work_tag WorkTag ;
615  typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
616  typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
617 
618  typedef typename ValueTraits::pointer_type pointer_type ;
619  typedef typename ValueTraits::reference_type reference_type ;
620 
621  const FunctorType m_functor ;
622  const Policy m_policy ;
623 
624  template< class TagType >
625  inline
626  typename std::enable_if< std::is_same< TagType , void >::value >::type
627  exec( pointer_type ptr ) const
628  {
629  reference_type update = ValueInit::init( m_functor , ptr );
630 
631  const typename Policy::member_type e = m_policy.end();
632  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
633  m_functor( i , update , true );
634  }
635  }
636 
637  template< class TagType >
638  inline
639  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
640  exec( pointer_type ptr ) const
641  {
642  const TagType t{} ;
643  reference_type update = ValueInit::init( m_functor , ptr );
644 
645  const typename Policy::member_type e = m_policy.end();
646  for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
647  m_functor( t , i , update , true );
648  }
649  }
650 
651 public:
652 
653  inline
654  void execute() const
655  {
656  pointer_type ptr = (pointer_type)
657  Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 );
658  this-> template exec< WorkTag >( ptr );
659  }
660 
661  inline
662  ParallelScan( const FunctorType & arg_functor
663  , const Policy & arg_policy
664  )
665  : m_functor( arg_functor )
666  , m_policy( arg_policy )
667  {}
668 };
669 
670 } // namespace Impl
671 } // namespace Kokkos
672 
673 /*--------------------------------------------------------------------------*/
674 /*--------------------------------------------------------------------------*/
675 /* Parallel patterns for Kokkos::Serial with TeamPolicy */
676 
677 namespace Kokkos {
678 namespace Impl {
679 
680 template< class FunctorType , class ... Properties >
681 class ParallelFor< FunctorType
682  , Kokkos::TeamPolicy< Properties ... >
683  , Kokkos::Serial
684  >
685 {
686 private:
687 
688  typedef TeamPolicyInternal< Kokkos::Serial , Properties ...> Policy ;
689  typedef typename Policy::member_type Member ;
690 
691  const FunctorType m_functor ;
692  const int m_league ;
693  const int m_shared ;
694 
695  template< class TagType >
696  inline
697  typename std::enable_if< std::is_same< TagType , void >::value >::type
698  exec() const
699  {
700  for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
701  m_functor( Member(ileague,m_league,m_shared) );
702  }
703  }
704 
705  template< class TagType >
706  inline
707  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
708  exec() const
709  {
710  const TagType t{} ;
711  for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
712  m_functor( t , Member(ileague,m_league,m_shared) );
713  }
714  }
715 
716 public:
717 
718  inline
719  void execute() const
720  {
721  Kokkos::Serial::scratch_memory_resize( 0 , m_shared );
722  this-> template exec< typename Policy::work_tag >();
723  }
724 
725  ParallelFor( const FunctorType & arg_functor
726  , const Policy & arg_policy )
727  : m_functor( arg_functor )
728  , m_league( arg_policy.league_size() )
729  , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) )
730  { }
731 };
732 
733 /*--------------------------------------------------------------------------*/
734 
735 template< class FunctorType , class ReducerType , class ... Properties >
736 class ParallelReduce< FunctorType
737  , Kokkos::TeamPolicy< Properties ... >
738  , ReducerType
739  , Kokkos::Serial
740  >
741 {
742 private:
743 
744  typedef TeamPolicyInternal< Kokkos::Serial, Properties ... > Policy ;
745  typedef typename Policy::member_type Member ;
746  typedef typename Policy::work_tag WorkTag ;
747 
748  typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
749  typedef typename ReducerConditional::type ReducerTypeFwd;
750 
751  typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
752  typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
753 
754  typedef typename ValueTraits::pointer_type pointer_type ;
755  typedef typename ValueTraits::reference_type reference_type ;
756 
757  const FunctorType m_functor ;
758  const int m_league ;
759  const ReducerType m_reducer ;
760  pointer_type m_result_ptr ;
761  const int m_shared ;
762 
763  template< class TagType >
764  inline
765  typename std::enable_if< std::is_same< TagType , void >::value >::type
766  exec( pointer_type ptr ) const
767  {
768  reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr );
769 
770  for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
771  m_functor( Member(ileague,m_league,m_shared) , update );
772  }
773 
774  Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >::
775  final( ReducerConditional::select(m_functor , m_reducer) , ptr );
776  }
777 
778  template< class TagType >
779  inline
780  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
781  exec( pointer_type ptr ) const
782  {
783  const TagType t{} ;
784 
785  reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr );
786 
787  for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
788  m_functor( t , Member(ileague,m_league,m_shared) , update );
789  }
790 
791  Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >::
792  final( ReducerConditional::select(m_functor , m_reducer) , ptr );
793  }
794 
795 public:
796 
797  inline
798  void execute() const
799  {
800  pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
801  ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , m_shared );
802 
803  this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
804  }
805 
806  template< class ViewType >
807  ParallelReduce( const FunctorType & arg_functor
808  , const Policy & arg_policy
809  , const ViewType & arg_result ,
810  typename std::enable_if<
811  Kokkos::is_view< ViewType >::value &&
812  !Kokkos::is_reducer_type<ReducerType>::value
813  ,void*>::type = NULL)
814  : m_functor( arg_functor )
815  , m_league( arg_policy.league_size() )
816  , m_reducer( InvalidType() )
817  , m_result_ptr( arg_result.ptr_on_device() )
818  , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) )
819  {
820  static_assert( Kokkos::is_view< ViewType >::value
821  , "Reduction result on Kokkos::Serial must be a Kokkos::View" );
822 
823  static_assert( std::is_same< typename ViewType::memory_space
824  , Kokkos::HostSpace >::value
825  , "Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" );
826  }
827 
828  inline
829  ParallelReduce( const FunctorType & arg_functor
830  , Policy arg_policy
831  , const ReducerType& reducer )
832  : m_functor( arg_functor )
833  , m_league( arg_policy.league_size() )
834  , m_reducer( reducer )
835  , m_result_ptr( reducer.result_view().data() )
836  , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) )
837  {
838  /*static_assert( std::is_same< typename ViewType::memory_space
839  , Kokkos::HostSpace >::value
840  , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/
841  }
842 
843 };
844 
845 } // namespace Impl
846 } // namespace Kokkos
847 
848 /*--------------------------------------------------------------------------*/
849 /*--------------------------------------------------------------------------*/
850 /* Nested parallel patterns for Kokkos::Serial with TeamPolicy */
851 
852 namespace Kokkos {
853 namespace Impl {
854 
855 template<typename iType>
856 struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
857  typedef iType index_type;
858  const iType begin ;
859  const iType end ;
860  enum {increment = 1};
861  const SerialTeamMember& thread;
862 
863  KOKKOS_INLINE_FUNCTION
864  TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count)
865  : begin(0)
866  , end(arg_count)
867  , thread(arg_thread)
868  {}
869 
870  KOKKOS_INLINE_FUNCTION
871  TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end )
872  : begin( arg_begin )
873  , end( arg_end)
874  , thread( arg_thread )
875  {}
876 };
877 
878  template<typename iType>
879  struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
880  typedef iType index_type;
881  enum {start = 0};
882  const iType end;
883  enum {increment = 1};
884 
885  KOKKOS_INLINE_FUNCTION
886  ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count):
887  end( count )
888  {}
889  };
890 
891 } // namespace Impl
892 
893 template< typename iType >
894 KOKKOS_INLINE_FUNCTION
895 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
896 TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count )
897 {
898  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, count );
899 }
900 
901 template< typename iType1, typename iType2 >
902 KOKKOS_INLINE_FUNCTION
903 Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type,
904  Impl::SerialTeamMember >
905 TeamThreadRange( const Impl::SerialTeamMember& thread, const iType1 & begin, const iType2 & end )
906 {
907  typedef typename std::common_type< iType1, iType2 >::type iType;
908  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, iType(begin), iType(end) );
909 }
910 
911 template<typename iType>
912 KOKKOS_INLINE_FUNCTION
913 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
914  ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) {
915  return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
916 }
917 
918 KOKKOS_INLINE_FUNCTION
919 Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) {
920  return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
921 }
922 
923 KOKKOS_INLINE_FUNCTION
924 Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) {
925  return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
926 }
927 
928 } // namespace Kokkos
929 
930 namespace Kokkos {
931 
936 template<typename iType, class Lambda>
937 KOKKOS_INLINE_FUNCTION
938 void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) {
939  for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
940  lambda(i);
941 }
942 
947 template< typename iType, class Lambda, typename ValueType >
948 KOKKOS_INLINE_FUNCTION
949 void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
950  const Lambda & lambda, ValueType& result) {
951 
952  result = ValueType();
953 
954  for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
955  ValueType tmp = ValueType();
956  lambda(i,tmp);
957  result+=tmp;
958  }
959 
960  result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
961 }
962 
970 template< typename iType, class Lambda, typename ValueType, class JoinType >
971 KOKKOS_INLINE_FUNCTION
972 void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
973  const Lambda & lambda, const JoinType& join, ValueType& init_result) {
974 
975  ValueType result = init_result;
976 
977  for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
978  ValueType tmp = ValueType();
979  lambda(i,tmp);
980  join(result,tmp);
981  }
982 
983  init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
984 }
985 
986 } //namespace Kokkos
987 
988 namespace Kokkos {
993 template<typename iType, class Lambda>
994 KOKKOS_INLINE_FUNCTION
995 void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
996  loop_boundaries, const Lambda& lambda) {
997  #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
998  #pragma ivdep
999  #endif
1000  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
1001  lambda(i);
1002 }
1003 
1008 template< typename iType, class Lambda, typename ValueType >
1009 KOKKOS_INLINE_FUNCTION
1010 void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
1011  loop_boundaries, const Lambda & lambda, ValueType& result) {
1012  result = ValueType();
1013 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1014 #pragma ivdep
1015 #endif
1016  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
1017  ValueType tmp = ValueType();
1018  lambda(i,tmp);
1019  result+=tmp;
1020  }
1021 }
1022 
1030 template< typename iType, class Lambda, typename ValueType, class JoinType >
1031 KOKKOS_INLINE_FUNCTION
1032 void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
1033  loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
1034 
1035  ValueType result = init_result;
1036 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1037 #pragma ivdep
1038 #endif
1039  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
1040  ValueType tmp = ValueType();
1041  lambda(i,tmp);
1042  join(result,tmp);
1043  }
1044  init_result = result;
1045 }
1046 
1057 template< typename iType, class FunctorType >
1058 KOKKOS_INLINE_FUNCTION
1059 void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
1060  loop_boundaries, const FunctorType & lambda) {
1061 
1062  typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
1063  typedef typename ValueTraits::value_type value_type ;
1064 
1065  value_type scan_val = value_type();
1066 
1067 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1068 #pragma ivdep
1069 #endif
1070  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
1071  lambda(i,scan_val,true);
1072  }
1073 }
1074 
1075 } // namespace Kokkos
1076 
1077 namespace Kokkos {
1078 
1079 template<class FunctorType>
1080 KOKKOS_INLINE_FUNCTION
1081 void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
1082  lambda();
1083 }
1084 
1085 template<class FunctorType>
1086 KOKKOS_INLINE_FUNCTION
1087 void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
1088  lambda();
1089 }
1090 
1091 template<class FunctorType, class ValueType>
1092 KOKKOS_INLINE_FUNCTION
1093 void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
1094  lambda(val);
1095 }
1096 
1097 template<class FunctorType, class ValueType>
1098 KOKKOS_INLINE_FUNCTION
1099 void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
1100  lambda(val);
1101 }
1102 }
1103 
1104 //----------------------------------------------------------------------------
1105 
1106 #include <impl/Kokkos_Serial_Task.hpp>
1107 
1108 #endif // defined( KOKKOS_HAVE_SERIAL )
1109 #endif /* #define KOKKOS_SERIAL_HPP */
1110 
1111 //----------------------------------------------------------------------------
1112 //----------------------------------------------------------------------------
Scratch memory space associated with an execution space.
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Parallel reduction.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
Memory space for main process and CPU execution spaces.
Memory management for host memory.
Declaration of various MemoryLayout options.
Declaration of parallel operators.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
void finalize()
Finalize the spaces that were initialized via Kokkos::initialize.
void update(double alpha, const BlockedMultiVector &x, double beta, BlockedMultiVector &y)
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
Execution policy for work over a range of an integral type.