Browse code

- fastlock: optimizations (in general a "nicer" spin on the lock for the other cpus) and cleanups for sparc, sparc64, armv6 (nosmp for now), ppc*, mips* - fastlock: alpha: replace the cond. jump backward with a cond. jump forward and then jump backward (because a cond. jump with a negative relative offset is always predicted as taken and we want it to be predicted as not taken) - fastlock: sparc (32) smp support - lock_ops.h: introduced lock_try and lock_set_try (non-blocking lock_*_get versions, returns -1 if it failed to get the lock and 0 if it succeeds), for all the supported locking methods (fast_lock, pthread_mutex, posix_sem, sysv_sems) - updated locking doc.

Andrei Pelinescu-Onciul authored on 04/04/2006 18:04:01
Showing 4 changed files
... ...
@@ -66,7 +66,7 @@ MAIN_NAME=ser
66 66
 VERSION = 0
67 67
 PATCHLEVEL = 10
68 68
 SUBLEVEL =   99
69
-EXTRAVERSION = -dev35
69
+EXTRAVERSION = -dev36
70 70
 
71 71
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
72 72
 			$(SUBLEVEL) )
... ...
@@ -3,6 +3,7 @@
3 3
 # History:
4 4
 #---------
5 5
 #  2003-03-11  created by andrei
6
+#  2006-04-04  minor archs updates, added lock_try(..)  (andrei)
6 7
 
7 8
 
8 9
 SER locking interface
... ...
@@ -12,9 +13,9 @@ SER locking interface
12 13
    ----------
13 14
 
14 15
  The main reason in creating it was to have a single transparent interface to various locking methods. For example right now ser uses the following locking methods, depending on their availability on the target system:
15
- FAST_LOCK - fast inline assembly locks, defined in fast_lock.h. They are currently available for x86, sparc64, strongarm (amv4l) and ppc (external untested contributed code). In general if the assembly code exists for a given arhitecture and the compiler knows inline assembly (for example sun cc does not) FAST_LOCK is prefered. The main advantage of using FAST_LOCK is very low memory overhead and extremely fast lock/unlock operations (like 20 times faster then SYSV semaphores on linux & 40 times on solaris). The only thing that comes close to them are pthread mutexes (which are about 3-4 times slower).
16
+ FAST_LOCK - fast inline assembly locks, defined in fast_lock.h. They are currently available for x86, x86_64, sparc, sparc64, arm , armv6 (no smp mode supported yet), ppc, ppc64, mips, mips64 and alpha . In general if the assembly code exists for a given arhitecture and the compiler knows inline assembly (for example sun cc does not) FAST_LOCK is prefered. The main advantage of using FAST_LOCK is very low memory overhead and extremely fast lock/unlock operations (like 20 times faster then SYSV semaphores on linux & 40 times on solaris). The only thing that comes close to them are pthread mutexes (which are about 3-4 times slower).
16 17
  PTHREAD_MUTEX - uses pthread_mutex_lock/unlock. They are quite fast but they work between processes only on some systems (they do not work on linux).
17
- POSIX_SEM  - uses posix semaphores (sem_wait/sem_post). They are slower then the previous methods but still way faster then SYSV sempahores. Unfortunately they also do not work  on all the systems (e.g. linux).
18
+ POSIX_SEM  - uses posix semaphores (sem_wait/sem_post). They are slower then the previous methods but still way faster then SYSV sempahores. Unfortunately they also do not work on all the systems (e.g. linux).
18 19
  SYSV_SEM - this is the most portable but also the slowest locking method. Another problem is that the number of semaphores that can be alocated by a process is limited. One also has to free them before exiting.
19 20
 
20 21
 
... ...
@@ -93,6 +94,9 @@ Locking & unlocking:
93 94
 
94 95
 void    lock_get(gen_lock_t* lock);      - lock (mutex down)
95 96
 void    lock_release(gen_lock_t* lock);  - unlock (mutex up)
97
+int     lock_try(gen_lock_t* lock);      - tries to lock and returns 0 
98
+                                           if succesfull, -1 if not (this is
99
+                                           a non-blocking lock_get())
96 100
 
97 101
 
98 102
 
... ...
@@ -143,6 +147,9 @@ Locking & unlocking:
143 147
 
144 148
 void lock_set_get(lock_set_t* s, int i);
145 149
 void lock_set_release(lock_set_t* s, int i);
150
+int  lock_set_try(lock_set_t* s, int i);      - tries to lock the i-th lock
151
+                                                from the set. If succesfull
152
+                                                returns 0, if not -1.
146 153
 
147 154
 Example:
148 155
 
... ...
@@ -49,9 +49,20 @@
49 49
  *               (membar_getlock()) (andrei)
50 50
  *              added try_lock(); more x86 optimizations, x86  release_lock
51 51
  *               fix (andrei)
52
+ * 2006-04-04  sparc* optimizations, sparc32 smp support, armv6 no smp support,
53
+ *              ppc, mips*, alpha optimizations (andrei)
52 54
  *
53 55
  */
54 56
 
57
+/*
58
+ * WARNING: the code was not tested on the following architectures:
59
+ *           - arm6  (cross-compiles ok, no test)
60
+ *           - alpha (cross-compiles ok, no test)
61
+ *           - mips64 (cross-compiles ok)
62
+ *           - ppc64 (compiles ok)
63
+ *           - sparc32 (tested on a sparc64)
64
+ */
65
+
55 66
 
56 67
 #ifndef fastlock_h
57 68
 #define fastlock_h
... ...
@@ -87,17 +98,27 @@ typedef  volatile int fl_lock_t;
87 98
  *  WARNING: this is intended only for internal fastlock use*/
88 99
 #if defined(__CPU_i386) || defined(__CPU_x86_64)
89 100
 #define membar_getlock()   /* not needed on x86 */
90
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
101
+
102
+#elif defined(__CPU_sparc64)
91 103
 #ifndef NOSMP
92 104
 #define membar_getlock() \
93 105
 	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
106
+	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
107
+	 * since ldstub acts both as a store and as a load */
94 108
 #else
95 109
 /* no need for a compiler barrier, that is already included in lock_get/tsl*/
96 110
 #define membar_getlock() /* not needed if no smp*/
97 111
 #endif /* NOSMP */
112
+
113
+#elif  defined(__CPU_sparc)
114
+#define membar_getlock()/* no need for a compiler barrier, already included */
115
+
98 116
 #elif defined __CPU_arm || defined __CPU_arm6
99
-#error "FIXME: check arm6 membar"
117
+#ifndef NOSMP
118
+#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
119
+#endif /* NOSMP */
100 120
 #define membar_getlock() 
121
+
101 122
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
102 123
 #ifndef NOSMP
103 124
 #define membar_getlock() \
... ...
@@ -105,14 +126,21 @@ typedef  volatile int fl_lock_t;
105 126
 #else
106 127
 #define membar_getlock() 
107 128
 #endif /* NOSMP */
108
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
109
-	|| defined __CPU_mips64
129
+
130
+#elif defined __CPU_mips2 || defined __CPU_mips64
110 131
 #ifndef NOSMP
111 132
 #define membar_getlock() \
112 133
 	asm volatile("sync \n\t" : : : "memory");
113 134
 #else
114 135
 #define membar_getlock() 
115 136
 #endif /* NOSMP */
137
+
138
+#elif defined __CPU_mips
139
+#ifndef NOSMP
140
+#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
141
+#endif
142
+#define membar_getlock() 
143
+
116 144
 #elif defined __CPU_alpha
117 145
 #ifndef NOSMP
118 146
 #define membar_getlock() \
... ...
@@ -120,13 +148,14 @@ typedef  volatile int fl_lock_t;
120 148
 #else
121 149
 #define membar_getlock() 
122 150
 #endif /* NOSMP */
123
-#else
151
+
152
+#else /* __CPU_xxx */
124 153
 #error "unknown architecture"
125 154
 #endif
126 155
 
127 156
 
128 157
 
129
-/*test and set lock, ret 1 if lock held by someone else, 0 otherwise
158
+/*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
130 159
  * WARNING: no memory barriers included, if you use this function directly
131 160
  *          (not recommended) and it gets the lock (ret==0), you should call 
132 161
  *          membar_getlock() after it */
... ...
@@ -155,35 +184,70 @@ inline static int tsl(fl_lock_t* lock)
155 184
 		" xchgb %2, %b0 \n\t"
156 185
 		"1: \n\t"
157 186
 		: "=q" (val), "=m" (*lock) : "m"(*lock) : "memory"
187
+#ifdef SPIN_OPTIMIZE
188
+				, "cc"
189
+#endif
158 190
 	);
159 191
 #endif /*NOSMP*/
160
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
192
+#elif defined(__CPU_sparc64)
193
+	asm volatile(
194
+#ifdef SPIN_OPTIMIZE
195
+			"   ldub [%2], %0 \n\t"
196
+			"   brnz,a,pn %0, 1f \n\t"
197
+			"   nop \n\t"
198
+#endif
199
+			"   ldstub [%2], %0 \n\t"
200
+			"1: \n\t"
201
+			/* membar_getlock must be  called outside this function */
202
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
203
+	);
204
+#elif defined(__CPU_sparc)
161 205
 	asm volatile(
162
-			"ldstub [%1], %0 \n\t"
206
+#ifdef SPIN_OPTIMIZE
207
+			"   ldub [%2], %0 \n\t"
208
+			"   tst %0 \n\t"
209
+			"   bne,a  1f \n\t"
210
+			"   nop \n\t"
211
+#endif
212
+			"   ldstub [%2], %0 \n\t"
213
+			"1: \n\t"
163 214
 			/* membar_getlock must be  called outside this function */
164
-			: "=r"(val) : "r"(lock):"memory"
215
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
216
+#ifdef SPIN_OPTIMIZE
217
+				, "cc"
218
+#endif
165 219
 	);
166
-	
167
-#elif defined __CPU_arm || defined __CPU_arm6
220
+#elif defined __CPU_arm 
168 221
 	asm volatile(
169
-			"# here \n\t"
170
-			"swpb %0, %1, [%2] \n\t"
171
-			: "=r" (val)
172
-			: "r"(1), "r" (lock) : "memory"
222
+			"swp %0, %2, [%3] \n\t"
223
+			: "=r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
224
+	);
225
+#elif defined __CPU_arm6
226
+	asm volatile(
227
+			"   ldrex %0, [%2] \n\t" 
228
+			"   cmp %0, #0 \n\t"
229
+			"   strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
230
+			/* if %0!=0 => either it was 1 initially or was 0
231
+			 * and somebody changed it just before the strexeq (so the 
232
+			 * lock is taken) => it's safe to return %0 */
233
+			: "=r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
173 234
 	);
174
-	
175 235
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
176 236
 	asm volatile(
177
-			"1: lwarx  %0, 0, %2\n\t"
237
+			"1: \n\t"
238
+#ifdef SPIN_OPTIMIZE
239
+			"   lwz %0, 0, (%2) \n\t"
240
+			"   cmpwi %0, 0 \n\t"
241
+			"   bne- 2f \n\t" /* predict: not taken */
242
+#endif
243
+			"   lwarx  %0, 0, %2\n\t"
178 244
 			"   cmpwi  %0, 0\n\t"
179
-			"   bne    0f\n\t"
180
-			"   stwcx. %1, 0, %2\n\t"
245
+			"   bne-    2f\n\t"
246
+			"   stwcx. %3, 0, %2\n\t"
181 247
 			"   bne-   1b\n\t"
182 248
 			/* membar_getlock must be  called outside this function */
183
-			"0:\n\t"
184
-			: "=r" (val)
185
-			: "r"(1), "b" (lock) :
186
-			"memory", "cc"
249
+			"2:\n\t"
250
+			: "=r" (val), "=m"(*lock) :  "r" (lock), "r"(1) : "memory", "cc"
187 251
         );
188 252
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
189 253
 	|| defined __CPU_mips64
... ...
@@ -193,11 +257,18 @@ inline static int tsl(fl_lock_t* lock)
193 257
 		".set push \n\t"
194 258
 		".set noreorder\n\t"
195 259
 		".set mips2 \n\t"
260
+#ifdef SPIN_OPTIMIZE
261
+		"    lw %1, %2 \n\t"
262
+		"    bne %1, $0, 2f \n\t"
263
+		"    nop \n\t"
264
+#endif
196 265
 		"1:  ll %1, %2   \n\t"
197
-		"    li %0, 1 \n\t"
266
+		"    bne %1, $0, 2f \n\t"
267
+		"    li %0, 1 \n\t"  /* delay slot */
198 268
 		"    sc %0, %2  \n\t"
199 269
 		"    beqz %0, 1b \n\t"
200 270
 		"    nop \n\t"
271
+		"2: \n\t"
201 272
 		/* membar_getlock must be called outside this function */
202 273
 		".set pop\n\t"
203 274
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
... ...
@@ -215,9 +286,13 @@ inline static int tsl(fl_lock_t* lock)
215 286
 		"    blbs %0, 2f  \n\t" 
216 287
 		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
217 288
 		"    stl_c %2, %1 \n\t"
218
-		"    beq %2, 1b   \n\t"
289
+		"    beq %2, 3f   \n\t" /* back cond. jumps are always predicted to be 
290
+								   taken => make forward jump */
219 291
 		/* membar_getlock must be called outside this function */
220 292
 		"2:               \n\t"
293
+		".subsection 2 \n\t"
294
+		"3:  br 1b \n\t"
295
+		".previous \n\t"
221 296
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
222 297
 		:"m"(*lock) 
223 298
 		: "memory"
... ...
@@ -289,19 +364,22 @@ inline static void release_lock(fl_lock_t* lock)
289 364
 #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
290 365
 	asm volatile(
291 366
 #ifndef NOSMP
292
-			"membar #LoadStore | #StoreStore \n\t" /*is this really needed?*/
367
+#ifdef __CPU_sparc64
368
+			"membar #LoadStore | #StoreStore \n\t"
369
+#else /* __CPU_sparc */
370
+			"stbar \n\t"
371
+#endif /* __CPU_sparc64 */
293 372
 #endif
294
-			"stb %%g0, [%0] \n\t"
295
-			: /*no output*/
296
-			: "r" (lock)
297
-			: "memory"
373
+			"stb %%g0, [%1] \n\t"
374
+			: "=m"(*lock) : "r" (lock) : "memory"
298 375
 	);
299 376
 #elif defined __CPU_arm || defined __CPU_arm6
377
+#ifndef NOSMP
378
+#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
379
+#endif
300 380
 	asm volatile(
301
-		" str %0, [%1] \n\r" 
302
-		: /*no outputs*/ 
303
-		: "r"(0), "r"(lock)
304
-		: "memory"
381
+		" str %1, [%2] \n\r" 
382
+		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
305 383
 	);
306 384
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
307 385
 	asm volatile(
... ...
@@ -310,10 +388,8 @@ inline static void release_lock(fl_lock_t* lock)
310 388
 			 *             [IBM Prgramming Environments Manual, D.4.2.2]
311 389
 			 */
312 390
 			"lwsync\n\t"
313
-			"stw %0, 0(%1)\n\t"
314
-			: /* no output */
315
-			: "r"(0), "b" (lock)
316
-			: "memory"
391
+			"stw %1, 0(%2)\n\t"
392
+			: "=m"(*lock) : "r"(0), "r" (lock) : "memory"
317 393
 	);
318 394
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
319 395
 	|| defined __CPU_mips64
... ...
@@ -322,7 +398,11 @@ inline static void release_lock(fl_lock_t* lock)
322 398
 		".set noreorder \n\t"
323 399
 		".set mips2 \n\t"
324 400
 #ifndef NOSMP
401
+#ifdef __CPU_mips
402
+#warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
403
+#else
325 404
 		"    sync \n\t"
405
+#endif
326 406
 #endif
327 407
 		"    sw $0, %0 \n\t"
328 408
 		".set pop \n\t"
... ...
@@ -330,7 +410,9 @@ inline static void release_lock(fl_lock_t* lock)
330 410
 	);
331 411
 #elif defined __CPU_alpha
332 412
 	asm volatile(
413
+#ifndef  NOSMP
333 414
 		"    mb          \n\t"
415
+#endif
334 416
 		"    stl $31, %0 \n\t"
335 417
 		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
336 418
 	);  
... ...
@@ -42,6 +42,7 @@
42 42
  *  2003-03-17  possible signal interruptions treated for sysv (andrei)
43 43
  *  2004-07-28  s/lock_set_t/gen_lock_set_t/ because of a type conflict
44 44
  *              on darwin (andrei)
45
+ *  2006-04-04  added lock_try(lock) and lock_set_try(s,i) (andrei)
45 46
  *
46 47
 Implements:
47 48
 
... ...
@@ -51,13 +52,19 @@ Implements:
51 52
 	void    lock_destroy(gen_lock_t* lock);  - removes the lock (e.g sysv rmid)
52 53
 	void    lock_get(gen_lock_t* lock);      - lock (mutex down)
53 54
 	void    lock_release(gen_lock_t* lock);  - unlock (mutex up)
55
+	int     lock_try(gen_lock_t* lock);      - tries to get the lock, returns
56
+	                                            0 on success and -1 on failure
54 57
 	
55
-	lock sets: [implemented only for FL & SYSV so far]
58
+	lock sets: 
56 59
 	----------
57 60
 	gen_lock_set_t* lock_set_init(gen_lock_set_t* set);  - inits the lock set
58 61
 	void lock_set_destroy(gen_lock_set_t* s);        - removes the lock set
59 62
 	void lock_set_get(gen_lock_set_t* s, int i);     - locks sem i from the set
60
-	void lock_set_release(gen_lock_set_t* s, int i)  - unlocks sem i from the set
63
+	void lock_set_release(gen_lock_set_t* s, int i)  - unlocks sem i from the
64
+	                                                   set
65
+	int  lock_set_try(gen_lock_set_t* s, int i);    - tries to lock the sem i,
66
+	                                                  returns 0 on success and
67
+	                                                  -1 on failure
61 68
 
62 69
 WARNING: - lock_set_init may fail for large number of sems (e.g. sysv). 
63 70
          - signals are not treated! (some locks are "awakened" by the signals)
... ...
@@ -81,9 +88,11 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
81 88
 	return lock;
82 89
 }
83 90
 
91
+#define lock_try(lock) try_lock(lock)
84 92
 #define lock_get(lock) get_lock(lock)
85 93
 #define lock_release(lock) release_lock(lock)
86 94
 
95
+
87 96
 #elif defined USE_PTHREAD_MUTEX
88 97
 #include <pthread.h>
89 98
 
... ...
@@ -97,6 +106,7 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
97 106
 	else return 0;
98 107
 }
99 108
 
109
+#define lock_try(lock) pthread_mutex_trylock(lock)
100 110
 #define lock_get(lock) pthread_mutex_lock(lock)
101 111
 #define lock_release(lock) pthread_mutex_unlock(lock)
102 112
 
... ...
@@ -115,6 +125,7 @@ inline static gen_lock_t* lock_init(gen_lock_t* lock)
115 125
 	return lock;
116 126
 }
117 127
 
128
+#define lock_try(lock) sem_trywait(lock)
118 129
 #define lock_get(lock) sem_wait(lock)
119 130
 #define lock_release(lock) sem_post(lock)
120 131
 
... ...
@@ -174,6 +185,30 @@ inline static void lock_destroy(gen_lock_t* lock)
174 185
 }
175 186
 
176 187
 
188
+/* returns 0 if it got the lock, -1 otherwise */
189
+inline static int lock_try(gen_lock_t* lock)
190
+{
191
+	struct sembuf sop;
192
+
193
+	sop.sem_num=0;
194
+	sop.sem_op=-1; /* down */
195
+	sop.sem_flg=IPC_NOWAIT; 
196
+tryagain:
197
+	if (semop(*lock, &sop, 1)==-1){
198
+		if (errno==EAGAIN){
199
+			return -1;
200
+		}else if (errno==EINTR){
201
+			DBG("lock_get: signal received while waiting for on a mutex\n");
202
+			goto tryagain;
203
+		}else{
204
+			LOG(L_CRIT, "ERROR: lock_get sysv: %s (%d)\n", strerror(errno),
205
+						errno);
206
+			return -1;
207
+		}
208
+	}
209
+	return 0;
210
+}
211
+
177 212
 inline static void lock_get(gen_lock_t* lock)
178 213
 {
179 214
 	struct sembuf sop;
... ...
@@ -241,6 +276,7 @@ inline static gen_lock_set_t* lock_set_init(gen_lock_set_t* s)
241 276
 }
242 277
 
243 278
 /* WARNING: no boundary checks!*/
279
+#define lock_set_try(set, i) lock_try(&set->locks[i])
244 280
 #define lock_set_get(set, i) lock_get(&set->locks[i])
245 281
 #define lock_set_release(set, i) lock_release(&set->locks[i])
246 282
 
... ...
@@ -289,6 +325,32 @@ inline static void lock_set_destroy(gen_lock_set_t* s)
289 325
 	semctl(s->semid, 0, IPC_RMID, (union semun)(int)0);
290 326
 }
291 327
 
328
+
329
+/* returns 0 if it "gets" the lock, -1 otherwise */
330
+inline static int lock_set_try(gen_lock_set_t* s, int n)
331
+{
332
+	struct sembuf sop;
333
+	
334
+	sop.sem_num=n;
335
+	sop.sem_op=-1; /* down */
336
+	sop.sem_flg=IPC_NOWAIT; 
337
+tryagain:
338
+	if (semop(s->semid, &sop, 1)==-1){
339
+		if (errno==EAGAIN){
340
+			return -1;
341
+		}else if (errno==EINTR){
342
+			DBG("lock_get: signal received while waiting for on a mutex\n");
343
+			goto tryagain;
344
+		}else{
345
+			LOG(L_CRIT, "ERROR: lock_get sysv: %s (%d)\n", strerror(errno),
346
+						errno);
347
+			return -1;
348
+		}
349
+	}
350
+	return 0;
351
+}
352
+
353
+
292 354
 inline static void lock_set_get(gen_lock_set_t* s, int n)
293 355
 {
294 356
 	struct sembuf sop;