diff options
Diffstat (limited to 'include/qemu/atomic.h')
-rw-r--r-- | include/qemu/atomic.h | 141 |
1 files changed, 61 insertions, 80 deletions
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index c4f6950fcb..c09fce704f 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -72,16 +72,16 @@ * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). */ -#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) -#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) -#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) +#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) +#define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) +#define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) /* Most compilers currently treat consume and acquire the same, but really * no processors except Alpha need a barrier here. Leave it in if * using Thread Sanitizer to avoid warnings, otherwise optimize it away. */ #if defined(__SANITIZE_THREAD__) -#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) +#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) #elif defined(__alpha__) #define smp_read_barrier_depends() asm volatile("mb":::"memory") #else @@ -135,44 +135,18 @@ __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) -/* atomic_mb_read/set semantics map Java volatile variables. They are - * less expensive on some platforms (notably POWER & ARMv7) than fully - * sequentially consistent operations. - * - * As long as they are used as paired operations they are safe to - * use. See docs/atomic.txt for more discussion. - */ - -#if defined(_ARCH_PPC) -#define atomic_mb_read(ptr) \ +#define atomic_load_acquire(ptr) \ ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ typeof_strip_qual(*ptr) _val; \ - __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ - smp_rmb(); \ + __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ _val; \ }) -#define atomic_mb_set(ptr, i) do { \ +#define atomic_store_release(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - smp_wmb(); \ - __atomic_store_n(ptr, i, __ATOMIC_RELAXED); \ - smp_mb(); \ + __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) -#else -#define atomic_mb_read(ptr) \ - ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof_strip_qual(*ptr) _val; \ - __atomic_load(ptr, &_val, __ATOMIC_SEQ_CST); \ - _val; \ - }) - -#define atomic_mb_set(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - __atomic_store_n(ptr, i, __ATOMIC_SEQ_CST); \ -} while(0) -#endif /* All the remaining operations are fully sequentially consistent */ @@ -238,8 +212,8 @@ * here (a compiler barrier only). QEMU doesn't do accesses to write-combining * qemu memory or non-temporal load/stores from C code. */ -#define smp_wmb() barrier() -#define smp_rmb() barrier() +#define smp_mb_release() barrier() +#define smp_mb_acquire() barrier() /* * __sync_lock_test_and_set() is documented to be an acquire barrier only, @@ -248,11 +222,6 @@ */ #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) -/* - * Load/store with Java volatile semantics. - */ -#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) - #elif defined(_ARCH_PPC) /* @@ -263,13 +232,15 @@ * smp_mb has the same problem as on x86 for not-very-new GCC * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). */ -#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) +#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) #if defined(__powerpc64__) -#define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) +#define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) +#define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) #else -#define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; }) +#define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; }) +#define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; }) #endif -#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) +#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) #endif /* _ARCH_PPC */ @@ -277,18 +248,18 @@ * For (host) platforms we don't have explicit barrier definitions * for, we use the gcc __sync_synchronize() primitive to generate a * full barrier. This should be safe on all platforms, though it may - * be overkill for smp_wmb() and smp_rmb(). + * be overkill for smp_mb_acquire() and smp_mb_release(). */ #ifndef smp_mb -#define smp_mb() __sync_synchronize() +#define smp_mb() __sync_synchronize() #endif -#ifndef smp_wmb -#define smp_wmb() __sync_synchronize() +#ifndef smp_mb_acquire +#define smp_mb_acquire() __sync_synchronize() #endif -#ifndef smp_rmb -#define smp_rmb() __sync_synchronize() +#ifndef smp_mb_release +#define smp_mb_release() __sync_synchronize() #endif #ifndef smp_read_barrier_depends @@ -341,41 +312,16 @@ atomic_set(ptr, i); \ } while (0) -/* These have the same semantics as Java volatile variables. - * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: - * "1. Issue a StoreStore barrier (wmb) before each volatile store." - * 2. Issue a StoreLoad barrier after each volatile store. - * Note that you could instead issue one before each volatile load, but - * this would be slower for typical programs using volatiles in which - * reads greatly outnumber writes. Alternatively, if available, you - * can implement volatile store as an atomic instruction (for example - * XCHG on x86) and omit the barrier. This may be more efficient if - * atomic instructions are cheaper than StoreLoad barriers. - * 3. Issue LoadLoad and LoadStore barriers after each volatile load." - * - * If you prefer to think in terms of "pairing" of memory barriers, - * an atomic_mb_read pairs with an atomic_mb_set. - * - * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq, - * while an atomic_mb_set is a st.rel followed by a memory barrier. - * - * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST - * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough. - * Just always use the barriers manually by the rules above. - */ -#define atomic_mb_read(ptr) ({ \ +#define atomic_load_acquire(ptr) ({ \ typeof(*ptr) _val = atomic_read(ptr); \ - smp_rmb(); \ + smp_mb_acquire(); \ _val; \ }) -#ifndef atomic_mb_set -#define atomic_mb_set(ptr, i) do { \ - smp_wmb(); \ +#define atomic_store_release(ptr, i) do { \ + smp_mb_release(); \ atomic_set(ptr, i); \ - smp_mb(); \ } while (0) -#endif #ifndef atomic_xchg #if defined(__clang__) @@ -404,4 +350,39 @@ #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) #endif /* __ATOMIC_RELAXED */ + +#ifndef smp_wmb +#define smp_wmb() smp_mb_release() +#endif +#ifndef smp_rmb +#define smp_rmb() smp_mb_acquire() +#endif + +/* This is more efficient than a store plus a fence. */ +#if !defined(__SANITIZE_THREAD__) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) +#endif +#endif + +/* atomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. + * + * As long as they are used as paired operations they are safe to + * use. See docs/atomic.txt for more discussion. + */ + +#ifndef atomic_mb_read +#define atomic_mb_read(ptr) \ + atomic_load_acquire(ptr) +#endif + +#ifndef atomic_mb_set +#define atomic_mb_set(ptr, i) do { \ + atomic_store_release(ptr, i); \ + smp_mb(); \ +} while(0) +#endif + #endif /* QEMU_ATOMIC_H */ |