Kokkos Core Kernels Package  Version of the Day
Kokkos_Atomic.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
66 
67 #ifndef KOKKOS_ATOMIC_HPP
68 #define KOKKOS_ATOMIC_HPP
69 
70 #include <Kokkos_Macros.hpp>
71 #include <Kokkos_HostSpace.hpp>
72 #include <impl/Kokkos_Traits.hpp>
73 
74 //----------------------------------------------------------------------------
75 #if defined(_WIN32)
76 #define KOKKOS_ATOMICS_USE_WINDOWS
77 #else
78 #if defined( KOKKOS_HAVE_CUDA )
79 
80 // Compiling NVIDIA device code, must use Cuda atomics:
81 
82 #define KOKKOS_ATOMICS_USE_CUDA
83 #endif
84 
85 #if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
86  ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
87  ! defined( KOKKOS_ATOMICS_USE_OMP31 )
88 
89 // Compiling for non-Cuda atomic implementation has not been pre-selected.
90 // Choose the best implementation for the detected compiler.
91 // Preference: GCC, INTEL, OMP31
92 
93 #if defined( KOKKOS_COMPILER_GNU ) || \
94  defined( KOKKOS_COMPILER_CLANG ) || \
95  ( defined ( KOKKOS_COMPILER_NVCC ) )
96 
97 #define KOKKOS_ATOMICS_USE_GCC
98 
99 #elif defined( KOKKOS_COMPILER_INTEL ) || \
100  defined( KOKKOS_COMPILER_CRAYC )
101 
102 #define KOKKOS_ATOMICS_USE_INTEL
103 
104 #elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
105 
106 #define KOKKOS_ATOMICS_USE_OMP31
107 
108 #else
109 
110 #error "KOKKOS_ATOMICS_USE : Unsupported compiler"
111 
112 #endif
113 
114 #endif /* Not pre-selected atomic implementation */
115 #endif
116 
117 //----------------------------------------------------------------------------
118 
119 // Forward decalaration of functions supporting arbitrary sized atomics
120 // This is necessary since Kokkos_Atomic.hpp is internally included very early
121 // through Kokkos_HostSpace.hpp as well as the allocation tracker.
122 #ifdef KOKKOS_HAVE_CUDA
123 namespace Kokkos {
124 namespace Impl {
130 #ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
131 extern
132 #endif
133 __device__ inline
134 bool lock_address_cuda_space(void* ptr);
135 
142 #ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
143 extern
144 #endif
145 __device__ inline
146 void unlock_address_cuda_space(void* ptr);
147 }
148 }
149 #endif
150 
151 
152 namespace Kokkos {
153 template <typename T>
154 KOKKOS_INLINE_FUNCTION
155 void atomic_add(volatile T * const dest, const T src);
156 
157 // Atomic increment
158 template<typename T>
159 KOKKOS_INLINE_FUNCTION
160 void atomic_increment(volatile T* a);
161 
162 template<typename T>
163 KOKKOS_INLINE_FUNCTION
164 void atomic_decrement(volatile T* a);
165 }
166 
167 namespace Kokkos {
168 
169 
170 inline
171 const char * atomic_query_version()
172 {
173 #if defined( KOKKOS_ATOMICS_USE_CUDA )
174  return "KOKKOS_ATOMICS_USE_CUDA" ;
175 #elif defined( KOKKOS_ATOMICS_USE_GCC )
176  return "KOKKOS_ATOMICS_USE_GCC" ;
177 #elif defined( KOKKOS_ATOMICS_USE_INTEL )
178  return "KOKKOS_ATOMICS_USE_INTEL" ;
179 #elif defined( KOKKOS_ATOMICS_USE_OMP31 )
180  return "KOKKOS_ATOMICS_USE_OMP31" ;
181 #elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
182  return "KOKKOS_ATOMICS_USE_WINDOWS";
183 #endif
184 }
185 
186 } // namespace Kokkos
187 
188 #ifdef _WIN32
189 #include "impl/Kokkos_Atomic_Windows.hpp"
190 #else
191 
192 //----------------------------------------------------------------------------
193 // Atomic Assembly
194 //
195 // Implements CAS128-bit in assembly
196 
197 #include "impl/Kokkos_Atomic_Assembly.hpp"
198 
199 //----------------------------------------------------------------------------
200 // Atomic exchange
201 //
202 // template< typename T >
203 // T atomic_exchange( volatile T* const dest , const T val )
204 // { T tmp = *dest ; *dest = val ; return tmp ; }
205 
206 #include "impl/Kokkos_Atomic_Exchange.hpp"
207 
208 //----------------------------------------------------------------------------
209 // Atomic compare-and-exchange
210 //
211 // template<class T>
212 // bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
213 // { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; }
214 
215 #include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp"
216 
217 //----------------------------------------------------------------------------
218 // Atomic fetch and add
219 //
220 // template<class T>
221 // T atomic_fetch_add(volatile T* const dest, const T val)
222 // { T tmp = *dest ; *dest += val ; return tmp ; }
223 
224 #include "impl/Kokkos_Atomic_Fetch_Add.hpp"
225 
226 //----------------------------------------------------------------------------
227 // Atomic increment
228 //
229 // template<class T>
230 // T atomic_increment(volatile T* const dest)
231 // { dest++; }
232 
233 #include "impl/Kokkos_Atomic_Increment.hpp"
234 
235 //----------------------------------------------------------------------------
236 // Atomic Decrement
237 //
238 // template<class T>
239 // T atomic_decrement(volatile T* const dest)
240 // { dest--; }
241 
242 #include "impl/Kokkos_Atomic_Decrement.hpp"
243 
244 //----------------------------------------------------------------------------
245 // Atomic fetch and sub
246 //
247 // template<class T>
248 // T atomic_fetch_sub(volatile T* const dest, const T val)
249 // { T tmp = *dest ; *dest -= val ; return tmp ; }
250 
251 #include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
252 
253 //----------------------------------------------------------------------------
254 // Atomic fetch and or
255 //
256 // template<class T>
257 // T atomic_fetch_or(volatile T* const dest, const T val)
258 // { T tmp = *dest ; *dest = tmp | val ; return tmp ; }
259 
260 #include "impl/Kokkos_Atomic_Fetch_Or.hpp"
261 
262 //----------------------------------------------------------------------------
263 // Atomic fetch and and
264 //
265 // template<class T>
266 // T atomic_fetch_and(volatile T* const dest, const T val)
267 // { T tmp = *dest ; *dest = tmp & val ; return tmp ; }
268 
269 #include "impl/Kokkos_Atomic_Fetch_And.hpp"
270 #endif /*Not _WIN32*/
271 
272 //----------------------------------------------------------------------------
273 // Memory fence
274 //
275 // All loads and stores from this thread will be globally consistent before continuing
276 //
277 // void memory_fence() {...};
278 #include "impl/Kokkos_Memory_Fence.hpp"
279 
280 //----------------------------------------------------------------------------
281 // Provide volatile_load and safe_load
282 //
283 // T volatile_load(T const volatile * const ptr);
284 //
285 // T const& safe_load(T const * const ptr);
286 // XEON PHI
287 // T safe_load(T const * const ptr
288 
289 #include "impl/Kokkos_Volatile_Load.hpp"
290 
291 #ifndef _WIN32
292 #include "impl/Kokkos_Atomic_Generic.hpp"
293 #endif
294 //----------------------------------------------------------------------------
295 // This atomic-style macro should be an inlined function, not a macro
296 
297 #if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__) && !defined(__CUDA_ARCH__)
298 
299  #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
300  #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
301 
302 #else
303 
304  #define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) ((void)0)
305  #define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) ((void)0)
306 
307 #endif
308 
309 //----------------------------------------------------------------------------
310 
311 #endif /* KOKKOS_ATOMIC_HPP */
312