Browse code

- fastlock: optimizations (in general a "nicer" spin on the lock for the other cpus) and cleanups for sparc, sparc64, armv6 (nosmp for now), ppc*, mips* - fastlock: alpha: replace the cond. jump backward with a cond. jump forward and then jump backward (because a cond. jump with a negative relative offset is always predicted as taken and we want it to be predicted as not taken) - fastlock: sparc (32) smp support - lock_ops.h: introduced lock_try and lock_set_try (non-blocking lock_*_get versions, returns -1 if it failed to get the lock and 0 if it succeeds), for all the supported locking methods (fast_lock, pthread_mutex, posix_sem, sysv_sems) - updated locking doc.

Andrei Pelinescu-Onciul authored on 04/04/2006 18:04:01
Showing 4 changed files
... ...
@@ -66,7 +66,7 @@ MAIN_NAME=ser
66 66
 VERSION = 0
67 67
 PATCHLEVEL = 10
68 68
 SUBLEVEL =   99
69
-EXTRAVERSION = -dev35
69
+EXTRAVERSION = -dev36
70 70
 
71 71
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
72 72
 			$(SUBLEVEL) )
... ...
@@ -3,6 +3,7 @@
3 3
 # History:
4 4
 #---------
5 5
 #  2003-03-11  created by andrei
6
+#  2006-04-04  minor archs updates, added lock_try(..)  (andrei)
6 7
 
7 8
 
8 9
 SER locking interface
... ...
@@ -12,9 +13,9 @@ SER locking interface
12 12
    ----------
13 13
 
14 14
  The main reason in creating it was to have a single transparent interface to various locking methods. For example right now ser uses the following locking methods, depending on their availability on the target system:
15
- FAST_LOCK - fast inline assembly locks, defined in fast_lock.h. They are currently available for x86, sparc64, strongarm (amv4l) and ppc (external untested contributed code). In general if the assembly code exists for a given arhitecture and the compiler knows inline assembly (for example sun cc does not) FAST_LOCK is prefered. The main advantage of using FAST_LOCK is very low memory overhead and extremely fast lock/unlock operations (like 20 times faster then SYSV semaphores on linux & 40 times on solaris). The only thing that comes close to them are pthread mutexes (which are about 3-4 times slower).
15
+ FAST_LOCK - fast inline assembly locks, defined in fast_lock.h. They are currently available for x86, x86_64, sparc, sparc64, arm , armv6 (no smp mode supported yet), ppc, ppc64, mips, mips64 and alpha . In general if the assembly code exists for a given arhitecture and the compiler knows inline assembly (for example sun cc does not) FAST_LOCK is prefered. The main advantage of using FAST_LOCK is very low memory overhead and extremely fast lock/unlock operations (like 20 times faster then SYSV semaphores on linux & 40 times on solaris). The only thing that comes close to them are pthread mutexes (which are about 3-4 times slower).
16 16
  PTHREAD_MUTEX - uses pthread_mutex_lock/unlock. They are quite fast but they work between processes only on some systems (they do not work on linux).
17
- POSIX_SEM  - uses posix semaphores (sem_wait/sem_post). They are slower then the previous methods but still way faster then SYSV sempahores. Unfortunately they also do not work  on all the systems (e.g. linux).
17
+ POSIX_SEM  - uses posix semaphores (sem_wait/sem_post). They are slower then the previous methods but still way faster then SYSV sempahores. Unfortunately they also do not work on all the systems (e.g. linux).
18 18
  SYSV_SEM - this is the most portable but also the slowest locking method. Another problem is that the number of semaphores that can be alocated by a process is limited. One also has to free them before exiting.
19 19
 
20 20
 
... ...
@@ -93,6 +94,9 @@ Locking & unlocking:
93 93
 
94 94
 void    lock_get(gen_lock_t* lock);      - lock (mutex down)
95 95
 void    lock_release(gen_lock_t* lock);  - unlock (mutex up)
96
+int     lock_try(gen_lock_t* lock);      - tries to lock and returns 0 
97
+                                           if succesfull, -1 if not (this is
98
+                                           a non-blocking lock_get())
96 99
 
97 100
 
98 101
 
... ...
@@ -143,6 +147,9 @@ Locking & unlocking:
143 143
 
144 144
 void lock_set_get(lock_set_t* s, int i);
145 145
 void lock_set_release(lock_set_t* s, int i);
146
+int  lock_set_try(lock_set_t* s, int i);      - tries to lock the i-th lock
147
+                                                from the set. If succesfull
148
+                                                returns 0, if not -1.
146 149
 
147 150
 Example:
148 151
 
... ...
@@ -49,9 +49,20 @@
49 49
  *               (membar_getlock()) (andrei)
50 50
  *              added try_lock(); more x86 optimizations, x86  release_lock
51 51
  *               fix (andrei)
52
+ * 2006-04-04  sparc* optimizations, sparc32 smp support, armv6 no smp support,
53
+ *              ppc, mips*, alpha optimizations (andrei)
52 54
  *
53 55
  */
54 56
 
57
+/*
58
+ * WARNING: the code was not tested on the following architectures:
59
+ *           - arm6  (cross-compiles ok, no test)
60
+ *           - alpha (cross-compiles ok, no test)
61
+ *           - mips64 (cross-compiles ok)
62
+ *           - ppc64 (compiles ok)
63
+ *           - sparc32 (tested on a sparc64)
64
+ */
65
+
55 66
 
56 67
 #ifndef fastlock_h
57 68
 #define fastlock_h
... ...
@@ -87,17 +98,27 @@ typedef  volatile int fl_lock_t;
87 87
  *  WARNING: this is intended only for internal fastlock use*/
88 88
 #if defined(__CPU_i386) || defined(__CPU_x86_64)
89 89
 #define membar_getlock()   /* not needed on x86 */
90
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
90
+
91
+#elif defined(__CPU_sparc64)
91 92
 #ifndef NOSMP
92 93
 #define membar_getlock() \
93 94
 	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
95
+	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
96
+	 * since ldstub acts both as a store and as a load */
94 97
 #else
95 98
 /* no need for a compiler barrier, that is already included in lock_get/tsl*/
96 99
 #define membar_getlock() /* not needed if no smp*/
97 100
 #endif /* NOSMP */
101
+
102
+#elif  defined(__CPU_sparc)
103
+#define membar_getlock()/* no need for a compiler barrier, already included */
104
+
98 105
 #elif defined __CPU_arm || defined __CPU_arm6
99
-#error "FIXME: check arm6 membar"
106
+#ifndef NOSMP
107
+#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
108
+#endif /* NOSMP */
100 109
 #define membar_getlock() 
110
+
101 111
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
102 112
 #ifndef NOSMP
103 113
 #define membar_getlock() \
... ...
@@ -105,14 +126,21 @@ typedef  volatile int fl_lock_t;
105 105
 #else
106 106
 #define membar_getlock() 
107 107
 #endif /* NOSMP */
108
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
109
-	|| defined __CPU_mips64
108
+
109
+#elif defined __CPU_mips2 || defined __CPU_mips64
110 110
 #ifndef NOSMP
111 111
 #define membar_getlock() \
112 112
 	asm volatile("sync \n\t" : : : "memory");
113 113
 #else
114 114
 #define membar_getlock() 
115 115
 #endif /* NOSMP */
116
+
117
+#elif defined __CPU_mips
118
+#ifndef NOSMP
119
+#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
120
+#endif
121
+#define membar_getlock() 
122
+
116 123
 #elif defined __CPU_alpha
117 124
 #ifndef NOSMP
118 125
 #define membar_getlock() \
... ...
@@ -120,13 +148,14 @@ typedef  volatile int fl_lock_t;
120 120
 #else
121 121
 #define membar_getlock() 
122 122
 #endif /* NOSMP */
123
-#else
123
+
124
+#else /* __CPU_xxx */
124 125
 #error "unknown architecture"
125 126
 #endif
126 127
 
127 128
 
128 129
 
129
-/*test and set lock, ret 1 if lock held by someone else, 0 otherwise
130
+/*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
130 131
  * WARNING: no memory barriers included, if you use this function directly
131 132
  *          (not recommended) and it gets the lock (ret==0), you should call 
132 133
  *          membar_getlock() after it */
... ...
@@ -155,35 +184,70 @@ inline static int tsl(fl_lock_t* lock)
155 155
 		" xchgb %2, %b0 \n\t"
156 156
 		"1: \n\t"
157 157
 		: "=q" (val), "=m" (*lock) : "m"(*lock) : "memory"
158
+#ifdef SPIN_OPTIMIZE
159
+				, "cc"
160
+#endif
158 161
 	);
159 162
 #endif /*NOSMP*/
160
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
163
+#elif defined(__CPU_sparc64)
164
+	asm volatile(
165
+#ifdef SPIN_OPTIMIZE
166
+			"   ldub [%2], %0 \n\t"
167
+			"   brnz,a,pn %0, 1f \n\t"
168
+			"   nop \n\t"
169
+#endif
170
+			"   ldstub [%2], %0 \n\t"
171
+			"1: \n\t"
172
+			/* membar_getlock must be  called outside this function */
173
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
174
+	);
175
+#elif defined(__CPU_sparc)
161 176
 	asm volatile(
162
-			"ldstub [%1], %0 \n\t"
177
+#ifdef SPIN_OPTIMIZE
178
+			"   ldub [%2], %0 \n\t"
179
+			"   tst %0 \n\t"
180
+			"   bne,a  1f \n\t"
181
+			"   nop \n\t"
182
+#endif
183
+			"   ldstub [%2], %0 \n\t"
184
+			"1: \n\t"
163 185
 			/* membar_getlock must be  called outside this function */
164
-			: "=r"(val) : "r"(lock):"memory"
186
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
187
+#ifdef SPIN_OPTIMIZE
188
+				, "cc"
189
+#endif
165 190
 	);
166
-	
167
-#elif defined __CPU_arm || defined __CPU_arm6
191
+#elif defined __CPU_arm 
168 192
 	asm volatile(
169
-			"# here \n\t"
170
-			"swpb %0, %1, [%2] \n\t"
171
-			: "=r" (val)
172
-			: "r"(1), "r" (lock) : "memory"
193
+			"swp %0, %2, [%3] \n\t"
194
+			: "=r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
195
+	);
196
+#elif defined __CPU_arm6
197
+	asm volatile(
198
+			"   ldrex %0, [%2] \n\t" 
199
+			"   cmp %0, #0 \n\t"
200
+			"   strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
201
+			/* if %0!=0 => either it was 1 initially or was 0
202
+			 * and somebody changed it just before the strexeq (so the 
203
+			 * lock is taken) => it's safe to return %0 */
204
+			: "=r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
173 205
 	);
174
-	
175 206
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
176 207
 	asm volatile(
177
-			"1: lwarx  %0, 0, %2\n\t"
208
+			"1: \n\t"
209
+#ifdef SPIN_OPTIMIZE
210
+			"   lwz %0, 0, (%2) \n\t"
211
+			"   cmpwi %0, 0 \n\t"
212
+			"   bne- 2f \n\t" /* predict: not taken */
213
+#endif
214
+			"   lwarx  %0, 0, %2\n\t"
178 215
 			"   cmpwi  %0, 0\n\t"
179
-			"   bne    0f\n\t"
180
-			"   stwcx. %1, 0, %2\n\t"
216
+			"   bne-    2f\n\t"
217
+			"   stwcx. %3, 0, %2\n\t"
181 218
 			"   bne-   1b\n\t"
182 219
 			/* membar_getlock must be  called outside this function */
183
-			"0:\n\t"
184
-			: "=r" (val)
185
-			: "r"(1), "b" (lock) :
186
-			"memory", "cc"
220
+			"2:\n\t"
221
+			: "=r" (val), "=m"(*lock) :  "r" (lock), "r"(1) : "memory", "cc"
187 222
         );
188 223
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
189 224
 	|| defined __CPU_mips64
... ...
@@ -193,11 +257,18 @@ inline static int tsl(fl_lock_t* lock)
193 193
 		".set push \n\t"
194 194
 		".set noreorder\n\t"
195 195
 		".set mips2 \n\t"
196
+#ifdef SPIN_OPTIMIZE
197
+		"    lw %1, %2 \n\t"
198
+		"    bne %1, $0, 2f \n\t"
199
+		"    nop \n\t"
200
+#endif
196 201
 		"1:  ll %1, %2   \n\t"
197
-		"    li %0, 1 \n\t"
202
+		"    bne %1, $0, 2f \n\t"
203
+		"    li %0, 1 \n\t"  /* delay slot */
198 204
 		"    sc %0, %2  \n\t"
199 205
 		"    beqz %0, 1b \n\t"
200 206
 		"    nop \n\t"
207
+		"2: \n\t"
201 208
 		/* membar_getlock must be called outside this function */
202 209
 		".set pop\n\t"
203 210
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
... ...
@@ -215,9 +286,13 @@ inline static int tsl(fl_lock_t* lock)
215 215
 		"    blbs %0, 2f  \n\t" 
216 216
 		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
217 217
 		"    stl_c %2, %1 \n\t"
218
-		"    beq %2, 1b   \n\t"
218
+		"    beq %2, 3f   \n\t" /* back cond. jumps are always predicted to be 
219
+								   taken => make forward jump */
219 220
 		/* membar_getlock must be called outside this function */
220 221
 		"2:               \n\t"
222
+		".subsection 2 \n\t"
223
+		"3:  br 1b \n\t"
224
+		".previous \n\t"
221 225
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
222 226
 		:"m"(*lock) 
223 227
 		: "memory"
... ...
@@ -289,19 +364,22 @@ inline static void release_lock(fl_lock_t* lock)
289 289
 #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
290 290
 	asm volatile(
291 291
 #ifndef NOSMP
292
-			"membar #LoadStore | #StoreStore \n\t" /*is this really needed?*/
292
+#ifdef __CPU_sparc64
293
+			"membar #LoadStore | #StoreStore \n\t"
294
+#else /* __CPU_sparc */
295
+			"stbar \n\t"
296
+#endif /* __CPU_sparc64 */
293 297
 #endif
294
-			"stb %%g0, [%0] \n\t"
295
-			: /*no output*/
296
-			: "r" (lock)
297
-			: "memory"
298
+			"stb %%g0, [%1] \n\t"
299
+			: "=m"(*lock) : "r" (lock) : "memory"
298 300
 	);
299 301
 #elif defined __CPU_arm || defined __CPU_arm6
302
+#ifndef NOSMP
303
+#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
304
+#endif
300 305
 	asm volatile(
301
-		" str %0, [%1] \n\r" 
302
-		: /*no outputs*/ 
303
-		: "r"(0), "r"(lock)
304
-		: "memory"
306
+		" str %1, [%2] \n\r" 
307
+		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
305 308
 	);
306 309
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
307 310
 	asm volatile(
... ...
@@ -310,10 +388,8 @@ inline static void release_lock(fl_lock_t* lock)
310 310
 			 *             [IBM Prgramming Environments Manual, D.4.2.2]
311 311
 			 */
312 312
 			"lwsync\n\t"
313
-			"stw %0, 0(%1)\n\t"
314
-			: /* no output */
315
-			: "r"(0), "b" (lock)
316
-			: "memory"
313
+			"stw %1, 0(%2)\n\t"
314
+			: "=m"(*lock) : "r"(0), "r" (lock) : "memory"
317 315
 	);
318 316
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
319 317
 	|| defined __CPU_mips64
... ...
@@ -322,15 +398,21 @@ inline static void release_lock(fl_lock_t* lock)
322 322
 		".set noreorder \n\t"
323 323
 		".set mips2 \n\t"
324 324
 #ifndef NOSMP
325
+#ifdef __CPU_mips
326
+#warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
327
+#else
325 328
 		"    sync \n\t"
326 329
 #endif
330
+#endif
327 331
 		"    sw $0, %0 \n\t"
328 332
 		".set pop \n\t"
329 333
 		: "=m" (*lock)  : /* no input */ : "memory"
330 334
 	);
331 335
 #elif defined __CPU_alpha
332 336
 	asm volatile(
337
+#ifndef  NOSMP
333 338
 		"    mb          \n\t"
339
+#endif
334 340
 		"    stl $31, %0 \n\t"
335 341
 		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
336 342
 	);  
... ...
@@ -42,6 +42,7 @@
42 42
  *  2003-03-17  possible signal interruptions treated for sysv (andrei)
43 43
  *  2004-07-28  s/lock_set_t/gen_lock_set_t/ because of a type conflict
44 44
  *              on darwin (andrei)
45
+ *  2006-04-04  added lock_try(lock) and lock_set_try(s,i) (andrei)
45 46
  *
46 47
 Implements:
47 48
 
... ...
@@ -51,13 +52,19 @@ Implements:
51 51
 	void    lock_destroy(gen_lock_t* lock);  - removes the lock (e.g sysv rmid)
52 52
 	void    lock_get(gen_lock_t* lock);      - lock (mutex down)
53 53
 	void    lock_release(gen_lock_t* lock);  - unlock (mutex up)
54
+	int     lock_try(gen_lock_t* lock);      - tries to get the lock, returns
55
+	                                            0 on success and -1 on failure
54 56
 	
55
-	lock sets: [implemented only for FL & SYSV so far]
57
+	lock sets: 
56 58
 	----------
57 59
 	gen_lock_set_t* lock_set_init(gen_lock_set_t* set);  - inits the lock set
58 60
 	void lock_set_destroy(gen_lock_set_t* s);        - removes the lock set
59 61
 	void lock_set_get(gen_lock_set_t* s, int i);     - locks sem i from the set
60
-	void lock_set_release(gen_lock_set_t* s, int i)  - unlocks sem i from the set
62
+	void lock_set_release(gen_lock_set_t* s, int i)  - unlocks sem i from the
63
+	                                                   set
64
+	int  lock_set_try(gen_lock_set_t* s, int i);    - tries to lock the sem i,
65
+	                                                  returns 0 on success and
66
+	                                                  -1 on failure
61 67
 
62 68
 WARNING: - lock_set_init may fail for large number of sems (e.g. sysv). 
63 69
          - signals are not treated! (some locks are "awakened" by the signals)
... ...
@@ -81,9 +88,11 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
81 81
 	return lock;
82 82
 }
83 83
 
84
+#define lock_try(lock) try_lock(lock)
84 85
 #define lock_get(lock) get_lock(lock)
85 86
 #define lock_release(lock) release_lock(lock)
86 87
 
88
+
87 89
 #elif defined USE_PTHREAD_MUTEX
88 90
 #include <pthread.h>
89 91
 
... ...
@@ -97,6 +106,7 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
97 97
 	else return 0;
98 98
 }
99 99
 
100
+#define lock_try(lock) pthread_mutex_trylock(lock)
100 101
 #define lock_get(lock) pthread_mutex_lock(lock)
101 102
 #define lock_release(lock) pthread_mutex_unlock(lock)
102 103
 
... ...
@@ -115,6 +125,7 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
115 115
 	return lock;
116 116
 }
117 117
 
118
+#define lock_try(lock) sem_trywait(lock)
118 119
 #define lock_get(lock) sem_wait(lock)
119 120
 #define lock_release(lock) sem_post(lock)
120 121
 
... ...
@@ -174,6 +185,30 @@ inline static void lock_destroy(gen_lock_t* lock)
174 174
 }
175 175
 
176 176
 
177
+/* returns 0 if it got the lock, -1 otherwise */
178
+inline static int lock_try(gen_lock_t* lock)
179
+{
180
+	struct sembuf sop;
181
+
182
+	sop.sem_num=0;
183
+	sop.sem_op=-1; /* down */
184
+	sop.sem_flg=IPC_NOWAIT; 
185
+tryagain:
186
+	if (semop(*lock, &sop, 1)==-1){
187
+		if (errno==EAGAIN){
188
+			return -1;
189
+		}else if (errno==EINTR){
190
+			DBG("lock_get: signal received while waiting for on a mutex\n");
191
+			goto tryagain;
192
+		}else{
193
+			LOG(L_CRIT, "ERROR: lock_get sysv: %s (%d)\n", strerror(errno),
194
+						errno);
195
+			return -1;
196
+		}
197
+	}
198
+	return 0;
199
+}
200
+
177 201
 inline static void lock_get(gen_lock_t* lock)
178 202
 {
179 203
 	struct sembuf sop;
... ...
@@ -241,6 +276,7 @@ inline static gen_lock_set_t* lock_set_init(gen_lock_set_t* s)
241 241
 }
242 242
 
243 243
 /* WARNING: no boundary checks!*/
244
+#define lock_set_try(set, i) lock_try(&set->locks[i])
244 245
 #define lock_set_get(set, i) lock_get(&set->locks[i])
245 246
 #define lock_set_release(set, i) lock_release(&set->locks[i])
246 247
 
... ...
@@ -289,6 +325,32 @@ inline static void lock_set_destroy(gen_lock_set_t* s)
289 289
 	semctl(s->semid, 0, IPC_RMID, (union semun)(int)0);
290 290
 }
291 291
 
292
+
293
+/* returns 0 if it "gets" the lock, -1 otherwise */
294
+inline static int lock_set_try(gen_lock_set_t* s, int n)
295
+{
296
+	struct sembuf sop;
297
+	
298
+	sop.sem_num=n;
299
+	sop.sem_op=-1; /* down */
300
+	sop.sem_flg=IPC_NOWAIT; 
301
+tryagain:
302
+	if (semop(s->semid, &sop, 1)==-1){
303
+		if (errno==EAGAIN){
304
+			return -1;
305
+		}else if (errno==EINTR){
306
+			DBG("lock_get: signal received while waiting for on a mutex\n");
307
+			goto tryagain;
308
+		}else{
309
+			LOG(L_CRIT, "ERROR: lock_get sysv: %s (%d)\n", strerror(errno),
310
+						errno);
311
+			return -1;
312
+		}
313
+	}
314
+	return 0;
315
+}
316
+
317
+
292 318
 inline static void lock_set_get(gen_lock_set_t* s, int n)
293 319
 {
294 320
 	struct sembuf sop;