fastlock.h
eca7f442
 /*
53c7e0f1
  * fast architecture specific locking
eca7f442
  *
53c7e0f1
  * Copyright (C) 2001-2003 FhG Fokus
7dd0b342
  *
ccab6f01
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
eca7f442
  */
02ca141b
 /*!
 * \file
 * \brief Kamailio core :: fast architecture specific locking
 * \author andrei
 * \ingroup core
 * Module: \ref core
d3da8467
  * WARNING: the code was not tested on the following architectures:
  *           - arm6  (cross-compiles ok, no test)
  *           - alpha (cross-compiles ok, no test)
  *           - mips64 (cross-compiles ok)
  *           - ppc64 (compiles ok)
  *           - sparc32 (tested on a sparc64)
  */
 
7dd0b342
 
eca7f442
 #ifndef fastlock_h
 #define fastlock_h
 
a83e2612
 #include "sched_yield.h"
eca7f442
 
a2fd48d9
 
 #define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
                          try first the lock with non-atomic/non memory locking
                          operations, and only if the lock appears to be free
                          switch to the more expensive version */
 
2781889b
 typedef  volatile int fl_lock_t;
eca7f442
 
 
 
 #define init_lock( l ) (l)=0
 
 
cc4d2e2c
 /* what membar to use (if any) after taking a lock. This
  *  was separated from the lock code to allow better optimizations.
  *  e.g.: use the membar_getlock only after getting the lock and don't use 
  *  it if lock_get fails / when spinning on tsl.
  *  There is no corresponding membar_release_lock (because lock_release
  *  must always include the needed memory barrier).
  *  WARNING: this is intended only for internal fastlock use*/
 #if defined(__CPU_i386) || defined(__CPU_x86_64)
 #define membar_getlock()   /* not needed on x86 */
d3da8467
 
 #elif defined(__CPU_sparc64)
cc4d2e2c
 #ifndef NOSMP
 #define membar_getlock() \
 	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
d3da8467
 	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
 	 * since ldstub acts both as a store and as a load */
cc4d2e2c
 #else
 /* no need for a compiler barrier, that is already included in lock_get/tsl*/
 #define membar_getlock() /* not needed if no smp*/
 #endif /* NOSMP */
d3da8467
 
 #elif  defined(__CPU_sparc)
 #define membar_getlock()/* no need for a compiler barrier, already included */
 
cc4d2e2c
 #elif defined __CPU_arm || defined __CPU_arm6
d3da8467
 #ifndef NOSMP
 #warning smp not supported on arm* (no membars), try compiling with -DNOSMP
 #endif /* NOSMP */
cc4d2e2c
 #define membar_getlock() 
d3da8467
 
cc4d2e2c
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
 #ifndef NOSMP
 #define membar_getlock() \
 	asm volatile("lwsync \n\t" : : : "memory");
 #else
 #define membar_getlock() 
 #endif /* NOSMP */
d3da8467
 
 #elif defined __CPU_mips2 || defined __CPU_mips64
cc4d2e2c
 #ifndef NOSMP
 #define membar_getlock() \
 	asm volatile("sync \n\t" : : : "memory");
 #else
 #define membar_getlock() 
 #endif /* NOSMP */
d3da8467
 
 #elif defined __CPU_mips
 #ifndef NOSMP
 #warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
 #endif
 #define membar_getlock() 
 
cc4d2e2c
 #elif defined __CPU_alpha
 #ifndef NOSMP
 #define membar_getlock() \
 	asm volatile("mb \n\t" : : : "memory");
 #else
 #define membar_getlock() 
 #endif /* NOSMP */
d3da8467
 
 #else /* __CPU_xxx */
cc4d2e2c
 #error "unknown architecture"
 #endif
 
eca7f442
 
cc4d2e2c
 
d3da8467
 /*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
cc4d2e2c
  * WARNING: no memory barriers included, if you use this function directly
  *          (not recommended) and it gets the lock (ret==0), you should call 
  *          membar_getlock() after it */
2781889b
 inline static int tsl(fl_lock_t* lock)
eca7f442
 {
a0bb0a26
 	int val;
30f1f956
 
4d080f49
 #if defined(__CPU_i386) || defined(__CPU_x86_64)
cd5c8c64
 
 #ifdef NOSMP
 	asm volatile(
a2fd48d9
 		" xor %0, %0 \n\t"
 		" btsl $0, %2 \n\t"
 		" setc %b0 \n\t"
ec495407
 		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
cd5c8c64
 	);
 #else
a2fd48d9
 	asm volatile(
 #ifdef SPIN_OPTIMIZE
 		" cmpb $0, %2 \n\t"
 		" mov $1, %0 \n\t"
 		" jnz 1f \n\t"
 #else
 		" mov $1, %0 \n\t"
 #endif
 		" xchgb %2, %b0 \n\t"
 		"1: \n\t"
ec495407
 		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
d3da8467
 #ifdef SPIN_OPTIMIZE
 				, "cc"
 #endif
eca7f442
 	);
cd5c8c64
 #endif /*NOSMP*/
d3da8467
 #elif defined(__CPU_sparc64)
 	asm volatile(
 #ifdef SPIN_OPTIMIZE
 			"   ldub [%2], %0 \n\t"
 			"   brnz,a,pn %0, 1f \n\t"
 			"   nop \n\t"
 #endif
 			"   ldstub [%2], %0 \n\t"
 			"1: \n\t"
 			/* membar_getlock must be  called outside this function */
ec495407
 			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
d3da8467
 	);
 #elif defined(__CPU_sparc)
c4217e61
 	asm volatile(
d3da8467
 #ifdef SPIN_OPTIMIZE
 			"   ldub [%2], %0 \n\t"
 			"   tst %0 \n\t"
 			"   bne,a  1f \n\t"
 			"   nop \n\t"
 #endif
 			"   ldstub [%2], %0 \n\t"
 			"1: \n\t"
cc4d2e2c
 			/* membar_getlock must be  called outside this function */
ec495407
 			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
d3da8467
 #ifdef SPIN_OPTIMIZE
 				, "cc"
 #endif
c4217e61
 	);
d3da8467
 #elif defined __CPU_arm 
cd87f178
 	asm volatile(
d3da8467
 			"swp %0, %2, [%3] \n\t"
7497d6bb
 			: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
d3da8467
 	);
 #elif defined __CPU_arm6
 	asm volatile(
 			"   ldrex %0, [%2] \n\t" 
 			"   cmp %0, #0 \n\t"
 			"   strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
 			/* if %0!=0 => either it was 1 initially or was 0
 			 * and somebody changed it just before the strexeq (so the 
 			 * lock is taken) => it's safe to return %0 */
ec495407
 			: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
cd87f178
 	);
871a6a07
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
523768d2
 	asm volatile(
d3da8467
 			"1: \n\t"
 #ifdef SPIN_OPTIMIZE
ec495407
 			"   lwzx %0, 0, %2 \n\t"
d3da8467
 			"   cmpwi %0, 0 \n\t"
 			"   bne- 2f \n\t" /* predict: not taken */
 #endif
 			"   lwarx  %0, 0, %2\n\t"
523768d2
 			"   cmpwi  %0, 0\n\t"
d3da8467
 			"   bne-    2f\n\t"
 			"   stwcx. %3, 0, %2\n\t"
523768d2
 			"   bne-   1b\n\t"
cc4d2e2c
 			/* membar_getlock must be  called outside this function */
d3da8467
 			"2:\n\t"
ec495407
 			: "=&r" (val), "=m"(*lock) :  "r"(lock), "r"(1) : "memory", "cc"
523768d2
         );
0db44da7
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
 	|| defined __CPU_mips64
5ffcc5d0
 	long tmp;
 	
 	asm volatile(
3aa05ac0
 		".set push \n\t"
5ffcc5d0
 		".set noreorder\n\t"
3aa05ac0
 		".set mips2 \n\t"
d3da8467
 #ifdef SPIN_OPTIMIZE
 		"    lw %1, %2 \n\t"
 		"    bne %1, $0, 2f \n\t"
 		"    nop \n\t"
 #endif
5ffcc5d0
 		"1:  ll %1, %2   \n\t"
d3da8467
 		"    bne %1, $0, 2f \n\t"
 		"    li %0, 1 \n\t"  /* delay slot */
5ffcc5d0
 		"    sc %0, %2  \n\t"
 		"    beqz %0, 1b \n\t"
 		"    nop \n\t"
d3da8467
 		"2: \n\t"
cc4d2e2c
 		/* membar_getlock must be called outside this function */
3aa05ac0
 		".set pop\n\t"
5ffcc5d0
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
613e86ab
 		: "m" (*lock) 
 		: "memory"
5ffcc5d0
 	);
871a6a07
 #elif defined __CPU_alpha
 	long tmp;
 	tmp=0;
 	/* lock low bit set to 1 when the lock is hold and to 0 otherwise */
 	asm volatile(
 		"1:  ldl %0, %1   \n\t"
 		"    blbs %0, 2f  \n\t"  /* optimization if locked */
 		"    ldl_l %0, %1 \n\t"
 		"    blbs %0, 2f  \n\t" 
 		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
 		"    stl_c %2, %1 \n\t"
d3da8467
 		"    beq %2, 3f   \n\t" /* back cond. jumps are always predicted to be 
 								   taken => make forward jump */
cc4d2e2c
 		/* membar_getlock must be called outside this function */
871a6a07
 		"2:               \n\t"
d3da8467
 		".subsection 2 \n\t"
 		"3:  br 1b \n\t"
 		".previous \n\t"
ec495407
 		:"=&r" (val), "=m"(*lock), "=&r"(tmp)
0db44da7
 		:"m"(*lock) 
871a6a07
 		: "memory"
 	);
43cfbe04
 #else
53c7e0f1
 #error "unknown architecture"
c4217e61
 #endif
43cfbe04
 	return val;
eca7f442
 }
 
 
 
2781889b
 inline static void get_lock(fl_lock_t* lock)
eca7f442
 {
30f1f956
 #ifdef ADAPTIVE_WAIT
 	int i=ADAPTIVE_WAIT_LOOPS;
 #endif
eca7f442
 	
 	while(tsl(lock)){
30f1f956
 #ifdef BUSY_WAIT
 #elif defined ADAPTIVE_WAIT
 		if (i>0) i--;
 		else sched_yield();
 #else
eca7f442
 		sched_yield();
30f1f956
 #endif
eca7f442
 	}
cc4d2e2c
 	membar_getlock();
 }
 
 
 
 /* like get_lock, but it doesn't wait. If it gets the lock returns 0,
  *  <0  otherwise (-1) */
 inline static int try_lock(fl_lock_t* lock)
 {
 	if (tsl(lock)){
 		return -1;
 	}
 	membar_getlock();
 	return 0;
eca7f442
 }
 
 
 
2781889b
 inline static void release_lock(fl_lock_t* lock)
eca7f442
 {
a2fd48d9
 #if defined(__CPU_i386) 
 #ifdef NOSMP
eca7f442
 	asm volatile(
a2fd48d9
 		" movb $0, %0 \n\t" 
 		: "=m"(*lock) : : "memory"
bf0fab3f
 	); 
a2fd48d9
 #else /* ! NOSMP */
 	int val;
 	/* a simple mov $0, (lock) does not force StoreStore ordering on all
 	   x86 versions and it doesn't seem to force LoadStore either */
 	asm volatile(
 		" xchgb %b0, %1 \n\t"
 		: "=q" (val), "=m" (*lock) : "0" (0) : "memory"
 	);
 #endif /* NOSMP */
 #elif defined(__CPU_x86_64)
 	asm volatile(
 		" movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is 
 							   implicit (at least on the same mem. type) */
 		: "=m"(*lock) : : "memory"
 	);
245b8b93
 #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
c4217e61
 	asm volatile(
cd5c8c64
 #ifndef NOSMP
d3da8467
 #ifdef __CPU_sparc64
 			"membar #LoadStore | #StoreStore \n\t"
 #else /* __CPU_sparc */
 			"stbar \n\t"
 #endif /* __CPU_sparc64 */
cd5c8c64
 #endif
d3da8467
 			"stb %%g0, [%1] \n\t"
 			: "=m"(*lock) : "r" (lock) : "memory"
eca7f442
 	);
0db44da7
 #elif defined __CPU_arm || defined __CPU_arm6
d3da8467
 #ifndef NOSMP
 #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
 #endif
cd87f178
 	asm volatile(
d3da8467
 		" str %1, [%2] \n\r" 
 		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
cd87f178
 	);
871a6a07
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
523768d2
 	asm volatile(
871a6a07
 			/* "sync\n\t"  lwsync is faster and will work
 			 *             here too
 			 *             [IBM Prgramming Environments Manual, D.4.2.2]
 			 */
 			"lwsync\n\t"
ec495407
 			"stwx %1, 0, %2\n\t"
 			: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
871a6a07
 	);
0db44da7
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
 	|| defined __CPU_mips64
871a6a07
 	asm volatile(
3aa05ac0
 		".set push \n\t"
5ffcc5d0
 		".set noreorder \n\t"
3aa05ac0
 		".set mips2 \n\t"
613e86ab
 #ifndef NOSMP
d3da8467
 #ifdef __CPU_mips
 #warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
 #else
5ffcc5d0
 		"    sync \n\t"
613e86ab
 #endif
d3da8467
 #endif
5ffcc5d0
 		"    sw $0, %0 \n\t"
3aa05ac0
 		".set pop \n\t"
613e86ab
 		: "=m" (*lock)  : /* no input */ : "memory"
5ffcc5d0
 	);
871a6a07
 #elif defined __CPU_alpha
 	asm volatile(
d3da8467
 #ifndef  NOSMP
871a6a07
 		"    mb          \n\t"
d3da8467
 #endif
871a6a07
 		"    stl $31, %0 \n\t"
 		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
 	);  
43cfbe04
 #else
53c7e0f1
 #error "unknown architecture"
c4217e61
 #endif
eca7f442
 }
 
 
 
 #endif