atomic/atomic_arm.h
0db44da7
 /* 
  * Copyright (C) 2006 iptelorg GmbH
  *
ccab6f01
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
0db44da7
  *
ccab6f01
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
0db44da7
  */
3ca1a53f
 
6aaba8e9
 /**
  * @file
  * @brief Atomic ops and memory barriers for ARM (>= v3)
  * 
  * Atomic ops and memory barriers for ARM architecture (starting from version 3)
  * see atomic_ops.h for more info.
  * 
  * Config defines:
  * - NOSMP
  * - __CPU_arm
  * - __CPU_arm6    - armv6 support (supports atomic ops via ldrex/strex)
  * @ingroup atomic
  */
 
0db44da7
 /* 
  * History:
  * --------
  *  2006-03-31  created by andrei
35f127ee
  *  2007-05-10  added atomic_add and atomic_cmpxchg (andrei)
ebc5ec7b
  *  2007-05-29  added membar_depends(), membar_*_atomic_op and
  *                membar_*_atomic_setget (andrei)
0db44da7
  */
 
 
 #ifndef _atomic_arm_h
 #define _atomic_arm_h
 
 
 
 
 #ifdef NOSMP
 #define HAVE_ASM_INLINE_MEMBAR
 #define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
 #define membar_read()  membar()
 #define membar_write() membar()
ebc5ec7b
 #define membar_depends()   do {} while(0) /* really empty, not even a cc bar.*/
118433b0
 /* lock barriers: empty, not needed for NOSMP; the lock/unlock should already
  * contain gcc barriers*/
ebc5ec7b
 #define membar_enter_lock() do {} while(0)
 #define membar_leave_lock() do {} while(0)
 /* membars after or before atomic_ops or atomic_setget -> use these or
  *  mb_<atomic_op_name>() if you need a memory barrier in one of these
  *  situations (on some archs where the atomic operations imply memory
  *   barriers is better to use atomic_op_x(); membar_atomic_op() then
  *    atomic_op_x(); membar()) */
 #define membar_atomic_op()				membar()
 #define membar_atomic_setget()			membar()
 #define membar_write_atomic_op()		membar_write()
 #define membar_write_atomic_setget()	membar_write()
 #define membar_read_atomic_op()			membar_read()
 #define membar_read_atomic_setget()		membar_read()
0db44da7
 #else /* SMP */
 #warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
 /* fall back to default lock based barriers (don't define HAVE_ASM...) */
 #endif /* NOSMP */
 
 
 #ifdef __CPU_arm6
 
 
 #define HAVE_ASM_INLINE_ATOMIC_OPS
 
 /* hack to get some membars */
 #ifndef NOSMP
 #include "atomic_unknown.h"
 #endif
 
 /* main asm block 
  *  use %0 as input and write the output in %1*/
 #define ATOMIC_ASM_OP(op) \
 			"1:   ldrex %0, [%3] \n\t" \
 			"     " op "\n\t" \
 			"     strex %0, %1, [%3] \n\t" \
 			"     cmp %0, #0 \n\t" \
 			"     bne 1b \n\t"
 
 /* same as above but writes %4 instead of %1, and %0 will contain 
  * the prev. val*/
 #define ATOMIC_ASM_OP2(op) \
 			"1:   ldrex %0, [%3] \n\t" \
 			"     " op "\n\t" \
 			"     strex %1, %4, [%3] \n\t" \
 			"     cmp %1, #0 \n\t" \
 			"     bne 1b \n\t"
 
 /* no extra param, %0 contains *var, %1 should contain the result */
 #define ATOMIC_FUNC_DECL(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
 	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var) \
 	{ \
 		P_TYPE ret, tmp; \
 		asm volatile( \
 			ATOMIC_ASM_OP(OP) \
 			: "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var)  : "cc" \
 			); \
 		return RET_EXPR; \
 	}
 
 /* one extra param in %4 */
 #define ATOMIC_FUNC_DECL1(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
 	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
 														P_TYPE v) \
 	{ \
 		P_TYPE ret, tmp; \
 		asm volatile( \
 			ATOMIC_ASM_OP(OP) \
 			: "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var), "r"(v) : "cc" \
 			); \
 		return RET_EXPR; \
 	}
 
 
 /* as above, but %4 should contain the result, and %0 is returned*/
 #define ATOMIC_FUNC_DECL2(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
 	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
 														P_TYPE v) \
 	{ \
 		P_TYPE ret, tmp; \
 		asm volatile( \
 			ATOMIC_ASM_OP2(OP) \
 			: "=&r"(ret), "=&r"(tmp), "=m"(*var) : "r"(var), "r"(v) : "cc" \
 			); \
 		return RET_EXPR; \
 	}
 
 
35f127ee
 #define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
 	inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
 														P_TYPE v ) \
 	{ \
 		P_TYPE ret; \
 		asm volatile( \
 			"     swp %0, %2, [%3] \n\t" \
 			: "=&r"(ret),  "=m"(*var) :\
 				"r"(v), "r"(var) \
 			); \
 		return ret; \
 	}
 
 
 /* cmpxchg: %5=old, %4=new_v, %3=var
  * if (*var==old) *var=new_v
  * returns the original *var (can be used to check if it succeeded: 
  *  if old==cmpxchg(var, old, new_v) -> success
  */
 #define ATOMIC_CMPXCHG_DECL(NAME, P_TYPE) \
 	inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
 														P_TYPE old, \
 														P_TYPE new_v) \
 	{ \
 		P_TYPE ret, tmp; \
 		asm volatile( \
 			"1:   ldrex %0, [%3] \n\t" \
 			"     cmp %0, %5 \n\t" \
 			"     strexeq %1, %4, [%3] \n\t" \
 			"     cmp %1, #0 \n\t" \
 			"     bne 1b \n\t" \
d10328a8
 			/* strexeq is exec. only if cmp was successful \
 			 * => if not successful %1 is not changed and remains 0 */ \
35f127ee
 			: "=&r"(ret), "=&r"(tmp), "=m"(*var) :\
d10328a8
 				"r"(var), "r"(new_v), "r"(old), "1"(0) : "cc" \
35f127ee
 			); \
 		return ret; \
 	}
 
 
0db44da7
 
 ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", int, void, /* no return */ )
 ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", int, void, /* no return */ )
 ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", int, void, /* no return */ )
 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", int, void, /* no return */ )
d10328a8
 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", int, int, ret==0 )
 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", int, int, ret==0 )
35f127ee
 //ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , int, int,  ret)
 ATOMIC_XCHG_DECL(get_and_set, int)
 ATOMIC_CMPXCHG_DECL(cmpxchg, int)
 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", int, int, ret )
0db44da7
 
 ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", long, void, /* no return */ )
 ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", long, void, /* no return */ )
 ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", long, void, /* no return */ )
 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", long, void, /* no return */ )
d10328a8
 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", long, long, ret==0 )
 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", long, long, ret==0 )
35f127ee
 //ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , long, long,  ret)
 ATOMIC_XCHG_DECL(get_and_set, long)
 ATOMIC_CMPXCHG_DECL(cmpxchg, long)
 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", long, long, ret )
0db44da7
 
 #define atomic_inc(var) atomic_inc_int(&(var)->val)
 #define atomic_dec(var) atomic_dec_int(&(var)->val)
 #define atomic_and(var, mask) atomic_and_int(&(var)->val, (mask))
 #define atomic_or(var, mask)  atomic_or_int(&(var)->val, (mask))
 #define atomic_dec_and_test(var) atomic_dec_and_test_int(&(var)->val)
 #define atomic_inc_and_test(var) atomic_inc_and_test_int(&(var)->val)
 #define atomic_get_and_set(var, i) atomic_get_and_set_int(&(var)->val, i)
35f127ee
 #define atomic_cmpxchg(var, old, new_v) \
 	atomic_cmpxchg_int(&(var)->val, old, new_v)
 #define atomic_add(var, v) atomic_add_int(&(var)->val, (v))
0db44da7
 
 
 /* with integrated membar */
 
 #define mb_atomic_set_int(v, i) \
 	do{ \
 		membar(); \
 		atomic_set_int(v, i); \
 	}while(0)
 
 
 
 inline static int mb_atomic_get_int(volatile int* v)
 {
 	membar();
 	return atomic_get_int(v);
 }
 
 
 #define mb_atomic_inc_int(v) \
 	do{ \
 		membar(); \
 		atomic_inc_int(v); \
 	}while(0)
 
 #define mb_atomic_dec_int(v) \
 	do{ \
 		membar(); \
 		atomic_dec_int(v); \
 	}while(0)
 
 #define mb_atomic_or_int(v, m) \
 	do{ \
 		membar(); \
 		atomic_or_int(v, m); \
 	}while(0)
 
 #define mb_atomic_and_int(v, m) \
 	do{ \
 		membar(); \
 		atomic_and_int(v, m); \
 	}while(0)
 
 inline static int mb_atomic_inc_and_test_int(volatile int* v)
 {
 	membar();
 	return atomic_inc_and_test_int(v);
 }
 
 inline static int mb_atomic_dec_and_test_int(volatile int* v)
 {
 	membar();
 	return atomic_dec_and_test_int(v);
 }
 
 
 inline static int mb_atomic_get_and_set_int(volatile int* v, int i)
 {
 	membar();
 	return atomic_get_and_set_int(v, i);
 }
 
ccab6f01
 inline static int mb_atomic_cmpxchg_int(volatile int* v, int o, int n)
 {
 	membar();
 	return atomic_cmpxchg_int(v, o, n);
 }
 
 inline static int mb_atomic_add_int(volatile int* v, int i)
 {
 	membar();
 	return atomic_add_int(v, i);
 }
 
0db44da7
 
 
 #define mb_atomic_set_long(v, i) \
 	do{ \
 		membar(); \
 		atomic_set_long(v, i); \
 	}while(0)
 
 
 
 inline static long mb_atomic_get_long(volatile long* v)
 {
 	membar();
 	return atomic_get_long(v);
 }
 
 
 #define mb_atomic_inc_long(v) \
 	do{ \
 		membar(); \
 		atomic_inc_long(v); \
 	}while(0)
 
 
 #define mb_atomic_dec_long(v) \
 	do{ \
 		membar(); \
 		atomic_dec_long(v); \
 	}while(0)
 
 #define mb_atomic_or_long(v, m) \
 	do{ \
 		membar(); \
 		atomic_or_long(v, m); \
 	}while(0)
 
 #define mb_atomic_and_long(v, m) \
 	do{ \
 		membar(); \
 		atomic_and_long(v, m); \
 	}while(0)
 
 inline static long mb_atomic_inc_and_test_long(volatile long* v)
 {
 	membar();
 	return atomic_inc_and_test_long(v);
 }
 
 inline static long mb_atomic_dec_and_test_long(volatile long* v)
 {
 	membar();
 	return atomic_dec_and_test_long(v);
 }
 
 
 inline static long mb_atomic_get_and_set_long(volatile long* v, long l)
 {
 	membar();
 	return atomic_get_and_set_long(v, l);
 }
 
ccab6f01
 inline static long mb_atomic_cmpxchg_long(volatile long* v, long o, long n)
 {
 	membar();
 	return atomic_cmpxchg_long(v, o, n);
 }
 
 inline static long mb_atomic_add_long(volatile long* v, long i)
 {
 	membar();
 	return atomic_add_long(v, i);
 }
0db44da7
 
 #define mb_atomic_inc(var) mb_atomic_inc_int(&(var)->val)
 #define mb_atomic_dec(var) mb_atomic_dec_int(&(var)->val)
 #define mb_atomic_and(var, mask) mb_atomic_and_int(&(var)->val, (mask))
 #define mb_atomic_or(var, mask)  mb_atomic_or_int(&(var)->val, (mask))
 #define mb_atomic_dec_and_test(var) mb_atomic_dec_and_test_int(&(var)->val)
 #define mb_atomic_inc_and_test(var) mb_atomic_inc_and_test_int(&(var)->val)
 #define mb_atomic_get(var)	mb_atomic_get_int(&(var)->val)
 #define mb_atomic_set(var, i)	mb_atomic_set_int(&(var)->val, i)
 #define mb_atomic_get_and_set(var, i) mb_atomic_get_and_set_int(&(var)->val, i)
ccab6f01
 #define mb_atomic_cmpxchg(var, o, n) mb_atomic_cmpxchg_int(&(var)->val, o, n)
 #define mb_atomic_add(var, i) mb_atomic_add_int(&(var)->val, i)
 
0db44da7
 
 
 #else /* ! __CPU_arm6 => __CPU_arm */
 
 /* no atomic ops for v <6 , only SWP supported
  * Atomic ops could be implemented if one bit is sacrificed and used like
  *  a spinlock, e.g:
  *          mov %r0, #0x1
  *       1: swp %r1, %r0, [&atomic_val]
  *          if (%r1 & 0x1) goto 1 # wait if first bit is 1 
  *          %r1>>=1  # restore the value (only 31 bits can be used )
  *          %r1=op (%r1, ...) 
  *          %r1<<=1   # shift back the value, such that the first bit is 0
  *          str %r1, [&atomic_val]  # write the value
  *
  * However only 31 bits could be used (=> atomic_*_int and atomic_*_long
  *  would still have to be lock based, since in these cases we guarantee all 
  *  the bits)  and I'm not sure there would be a significant performance
  *  benefit when compared with the fallback lock based version:
  *    lock(atomic_lock);
  *    atomic_val=op(*atomic_val, ...)
  *    unlock(atomic_lock);
  *
  *  -- andrei
  */
 
 #endif /* __CPU_arm6 */
 
 
 #endif