Browse code

core, lib, modules: restructured source code tree

- new folder src/ to hold the source code for main project applications
- main.c is in src/
- all core files are subfolder are in src/core/
- modules are in src/modules/
- libs are in src/lib/
- application Makefiles are in src/
- application binary is built in src/ (src/kamailio)

Daniel-Constantin Mierla authored on 07/12/2016 11:03:51
Showing 1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,385 +0,0 @@
1
-/*
2
- * fast architecture specific locking
3
- *
4
- * Copyright (C) 2001-2003 FhG Fokus
5
- *
6
- * Permission to use, copy, modify, and distribute this software for any
7
- * purpose with or without fee is hereby granted, provided that the above
8
- * copyright notice and this permission notice appear in all copies.
9
- *
10
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
- */
18
-/*!
19
-* \file
20
-* \brief Kamailio core :: fast architecture specific locking
21
-* \author andrei
22
-* \ingroup core
23
-* Module: \ref core
24
- * WARNING: the code was not tested on the following architectures:
25
- *           - arm6  (cross-compiles ok, no test)
26
- *           - alpha (cross-compiles ok, no test)
27
- *           - mips64 (cross-compiles ok)
28
- *           - ppc64 (compiles ok)
29
- *           - sparc32 (tested on a sparc64)
30
- */
31
-
32
-
33
-#ifndef fastlock_h
34
-#define fastlock_h
35
-
36
-#include "sched_yield.h"
37
-
38
-
39
-#define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
40
-                         try first the lock with non-atomic/non memory locking
41
-                         operations, and only if the lock appears to be free
42
-                         switch to the more expensive version */
43
-
44
-typedef  volatile int fl_lock_t;
45
-
46
-
47
-
48
-#define init_lock( l ) (l)=0
49
-
50
-
51
-/* what membar to use (if any) after taking a lock. This
52
- *  was separated from the lock code to allow better optimizations.
53
- *  e.g.: use the membar_getlock only after getting the lock and don't use 
54
- *  it if lock_get fails / when spinning on tsl.
55
- *  There is no corresponding membar_release_lock (because lock_release
56
- *  must always include the needed memory barrier).
57
- *  WARNING: this is intended only for internal fastlock use*/
58
-#if defined(__CPU_i386) || defined(__CPU_x86_64)
59
-#define membar_getlock()   /* not needed on x86 */
60
-
61
-#elif defined(__CPU_sparc64)
62
-#ifndef NOSMP
63
-#define membar_getlock() \
64
-	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
65
-	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
66
-	 * since ldstub acts both as a store and as a load */
67
-#else
68
-/* no need for a compiler barrier, that is already included in lock_get/tsl*/
69
-#define membar_getlock() /* not needed if no smp*/
70
-#endif /* NOSMP */
71
-
72
-#elif  defined(__CPU_sparc)
73
-#define membar_getlock()/* no need for a compiler barrier, already included */
74
-
75
-#elif defined __CPU_arm || defined __CPU_arm6
76
-#ifndef NOSMP
77
-#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
78
-#endif /* NOSMP */
79
-#define membar_getlock() 
80
-
81
-#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
82
-#ifndef NOSMP
83
-#define membar_getlock() \
84
-	asm volatile("lwsync \n\t" : : : "memory");
85
-#else
86
-#define membar_getlock() 
87
-#endif /* NOSMP */
88
-
89
-#elif defined __CPU_mips2 || defined __CPU_mips64
90
-#ifndef NOSMP
91
-#define membar_getlock() \
92
-	asm volatile("sync \n\t" : : : "memory");
93
-#else
94
-#define membar_getlock() 
95
-#endif /* NOSMP */
96
-
97
-#elif defined __CPU_mips
98
-#ifndef NOSMP
99
-#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
100
-#endif
101
-#define membar_getlock() 
102
-
103
-#elif defined __CPU_alpha
104
-#ifndef NOSMP
105
-#define membar_getlock() \
106
-	asm volatile("mb \n\t" : : : "memory");
107
-#else
108
-#define membar_getlock() 
109
-#endif /* NOSMP */
110
-
111
-#else /* __CPU_xxx */
112
-#error "unknown architecture"
113
-#endif
114
-
115
-
116
-
117
-/*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
118
- * WARNING: no memory barriers included, if you use this function directly
119
- *          (not recommended) and it gets the lock (ret==0), you should call 
120
- *          membar_getlock() after it */
121
-inline static int tsl(fl_lock_t* lock)
122
-{
123
-	int val;
124
-
125
-#if defined(__CPU_i386) || defined(__CPU_x86_64)
126
-
127
-#ifdef NOSMP
128
-	asm volatile(
129
-		" xor %0, %0 \n\t"
130
-		" btsl $0, %2 \n\t"
131
-		" setc %b0 \n\t"
132
-		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
133
-	);
134
-#else
135
-	asm volatile(
136
-#ifdef SPIN_OPTIMIZE
137
-		" cmpb $0, %2 \n\t"
138
-		" mov $1, %0 \n\t"
139
-		" jnz 1f \n\t"
140
-#else
141
-		" mov $1, %0 \n\t"
142
-#endif
143
-		" xchgb %2, %b0 \n\t"
144
-		"1: \n\t"
145
-		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
146
-#ifdef SPIN_OPTIMIZE
147
-				, "cc"
148
-#endif
149
-	);
150
-#endif /*NOSMP*/
151
-#elif defined(__CPU_sparc64)
152
-	asm volatile(
153
-#ifdef SPIN_OPTIMIZE
154
-			"   ldub [%2], %0 \n\t"
155
-			"   brnz,a,pn %0, 1f \n\t"
156
-			"   nop \n\t"
157
-#endif
158
-			"   ldstub [%2], %0 \n\t"
159
-			"1: \n\t"
160
-			/* membar_getlock must be  called outside this function */
161
-			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
162
-	);
163
-#elif defined(__CPU_sparc)
164
-	asm volatile(
165
-#ifdef SPIN_OPTIMIZE
166
-			"   ldub [%2], %0 \n\t"
167
-			"   tst %0 \n\t"
168
-			"   bne,a  1f \n\t"
169
-			"   nop \n\t"
170
-#endif
171
-			"   ldstub [%2], %0 \n\t"
172
-			"1: \n\t"
173
-			/* membar_getlock must be  called outside this function */
174
-			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
175
-#ifdef SPIN_OPTIMIZE
176
-				, "cc"
177
-#endif
178
-	);
179
-#elif defined __CPU_arm 
180
-	asm volatile(
181
-			"swp %0, %2, [%3] \n\t"
182
-			: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
183
-	);
184
-#elif defined __CPU_arm6
185
-	asm volatile(
186
-			"   ldrex %0, [%2] \n\t" 
187
-			"   cmp %0, #0 \n\t"
188
-			"   strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
189
-			/* if %0!=0 => either it was 1 initially or was 0
190
-			 * and somebody changed it just before the strexeq (so the 
191
-			 * lock is taken) => it's safe to return %0 */
192
-			: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
193
-	);
194
-#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
195
-	asm volatile(
196
-			"1: \n\t"
197
-#ifdef SPIN_OPTIMIZE
198
-			"   lwzx %0, 0, %2 \n\t"
199
-			"   cmpwi %0, 0 \n\t"
200
-			"   bne- 2f \n\t" /* predict: not taken */
201
-#endif
202
-			"   lwarx  %0, 0, %2\n\t"
203
-			"   cmpwi  %0, 0\n\t"
204
-			"   bne-    2f\n\t"
205
-			"   stwcx. %3, 0, %2\n\t"
206
-			"   bne-   1b\n\t"
207
-			/* membar_getlock must be  called outside this function */
208
-			"2:\n\t"
209
-			: "=&r" (val), "=m"(*lock) :  "r"(lock), "r"(1) : "memory", "cc"
210
-        );
211
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
212
-	|| defined __CPU_mips64
213
-	long tmp;
214
-	
215
-	asm volatile(
216
-		".set push \n\t"
217
-		".set noreorder\n\t"
218
-		".set mips2 \n\t"
219
-#ifdef SPIN_OPTIMIZE
220
-		"    lw %1, %2 \n\t"
221
-		"    bne %1, $0, 2f \n\t"
222
-		"    nop \n\t"
223
-#endif
224
-		"1:  ll %1, %2   \n\t"
225
-		"    bne %1, $0, 2f \n\t"
226
-		"    li %0, 1 \n\t"  /* delay slot */
227
-		"    sc %0, %2  \n\t"
228
-		"    beqz %0, 1b \n\t"
229
-		"    nop \n\t"
230
-		"2: \n\t"
231
-		/* membar_getlock must be called outside this function */
232
-		".set pop\n\t"
233
-		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
234
-		: "m" (*lock) 
235
-		: "memory"
236
-	);
237
-#elif defined __CPU_alpha
238
-	long tmp;
239
-	tmp=0;
240
-	/* lock low bit set to 1 when the lock is hold and to 0 otherwise */
241
-	asm volatile(
242
-		"1:  ldl %0, %1   \n\t"
243
-		"    blbs %0, 2f  \n\t"  /* optimization if locked */
244
-		"    ldl_l %0, %1 \n\t"
245
-		"    blbs %0, 2f  \n\t" 
246
-		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
247
-		"    stl_c %2, %1 \n\t"
248
-		"    beq %2, 3f   \n\t" /* back cond. jumps are always predicted to be 
249
-								   taken => make forward jump */
250
-		/* membar_getlock must be called outside this function */
251
-		"2:               \n\t"
252
-		".subsection 2 \n\t"
253
-		"3:  br 1b \n\t"
254
-		".previous \n\t"
255
-		:"=&r" (val), "=m"(*lock), "=&r"(tmp)
256
-		:"m"(*lock) 
257
-		: "memory"
258
-	);
259
-#else
260
-#error "unknown architecture"
261
-#endif
262
-	return val;
263
-}
264
-
265
-
266
-
267
-inline static void get_lock(fl_lock_t* lock)
268
-{
269
-#ifdef ADAPTIVE_WAIT
270
-	int i=ADAPTIVE_WAIT_LOOPS;
271
-#endif
272
-	
273
-	while(tsl(lock)){
274
-#ifdef BUSY_WAIT
275
-#elif defined ADAPTIVE_WAIT
276
-		if (i>0) i--;
277
-		else sched_yield();
278
-#else
279
-		sched_yield();
280
-#endif
281
-	}
282
-	membar_getlock();
283
-}
284
-
285
-
286
-
287
-/* like get_lock, but it doesn't wait. If it gets the lock returns 0,
288
- *  <0  otherwise (-1) */
289
-inline static int try_lock(fl_lock_t* lock)
290
-{
291
-	if (tsl(lock)){
292
-		return -1;
293
-	}
294
-	membar_getlock();
295
-	return 0;
296
-}
297
-
298
-
299
-
300
-inline static void release_lock(fl_lock_t* lock)
301
-{
302
-#if defined(__CPU_i386) 
303
-#ifdef NOSMP
304
-	asm volatile(
305
-		" movb $0, %0 \n\t" 
306
-		: "=m"(*lock) : : "memory"
307
-	); 
308
-#else /* ! NOSMP */
309
-	int val;
310
-	/* a simple mov $0, (lock) does not force StoreStore ordering on all
311
-	   x86 versions and it doesn't seem to force LoadStore either */
312
-	asm volatile(
313
-		" xchgb %b0, %1 \n\t"
314
-		: "=q" (val), "=m" (*lock) : "0" (0) : "memory"
315
-	);
316
-#endif /* NOSMP */
317
-#elif defined(__CPU_x86_64)
318
-	asm volatile(
319
-		" movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is 
320
-							   implicit (at least on the same mem. type) */
321
-		: "=m"(*lock) : : "memory"
322
-	);
323
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
324
-	asm volatile(
325
-#ifndef NOSMP
326
-#ifdef __CPU_sparc64
327
-			"membar #LoadStore | #StoreStore \n\t"
328
-#else /* __CPU_sparc */
329
-			"stbar \n\t"
330
-#endif /* __CPU_sparc64 */
331
-#endif
332
-			"stb %%g0, [%1] \n\t"
333
-			: "=m"(*lock) : "r" (lock) : "memory"
334
-	);
335
-#elif defined __CPU_arm || defined __CPU_arm6
336
-#ifndef NOSMP
337
-#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
338
-#endif
339
-	asm volatile(
340
-		" str %1, [%2] \n\r" 
341
-		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
342
-	);
343
-#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
344
-	asm volatile(
345
-			/* "sync\n\t"  lwsync is faster and will work
346
-			 *             here too
347
-			 *             [IBM Prgramming Environments Manual, D.4.2.2]
348
-			 */
349
-			"lwsync\n\t"
350
-			"stwx %1, 0, %2\n\t"
351
-			: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
352
-	);
353
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
354
-	|| defined __CPU_mips64
355
-	asm volatile(
356
-		".set push \n\t"
357
-		".set noreorder \n\t"
358
-		".set mips2 \n\t"
359
-#ifndef NOSMP
360
-#ifdef __CPU_mips
361
-#warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
362
-#else
363
-		"    sync \n\t"
364
-#endif
365
-#endif
366
-		"    sw $0, %0 \n\t"
367
-		".set pop \n\t"
368
-		: "=m" (*lock)  : /* no input */ : "memory"
369
-	);
370
-#elif defined __CPU_alpha
371
-	asm volatile(
372
-#ifndef  NOSMP
373
-		"    mb          \n\t"
374
-#endif
375
-		"    stl $31, %0 \n\t"
376
-		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
377
-	);  
378
-#else
379
-#error "unknown architecture"
380
-#endif
381
-}
382
-
383
-
384
-
385
-#endif
Browse code

core : Update include files - delete IDs, update doxygen, delete history

Olle E. Johansson authored on 03/01/2015 10:55:48
Showing 1 changed files
... ...
@@ -1,10 +1,6 @@
1 1
 /*
2 2
  * fast architecture specific locking
3 3
  *
4
- * $Id$
5
- *
6
- * 
7
- *
8 4
  * Copyright (C) 2001-2003 FhG Fokus
9 5
  *
10 6
  * Permission to use, copy, modify, and distribute this software for any
... ...
@@ -19,38 +15,12 @@
19 15
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 16
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 17
  */
22
-/*
23
- *
24
- *History:
25
- *--------
26
- *  2002-02-05  created by andrei
27
- *  2003-01-16  added PPC locking code contributed by Dinos Dorkofikis
28
- *               <kdor@intranet.gr>
29
- *  2004-09-12  added MIPS locking for ISA>=2 (>r3000)  (andrei)
30
- *  2004-12-16  for now use the same locking code for sparc32 as for sparc64
31
- *               (it will work only if NOSMP is defined) (andrei)
32
- *  2005-04-27  added alpha locking code (andrei)
33
- *  2005-05-25  PPC locking code enabled for PPC64; added a lwsync to
34
- *               the tsl part and replaced the sync with a lwsync for the
35
- *               unlock part (andrei)
36
- *  2006-03-08  mips2 NOSMP (skip sync), optimized x86 & mips clobbers and
37
- *               input/output constraints (andrei)
38
- *  2006-04-03  optimization: call lock_get memory barrier outside tsl,in the 
39
- *               calling function, only if the lock operation succeeded
40
- *               (membar_getlock()) (andrei)
41
- *              added try_lock(); more x86 optimizations, x86  release_lock
42
- *               fix (andrei)
43
- * 2006-04-04  sparc* optimizations, sparc32 smp support, armv6 no smp support,
44
- *              ppc, mips*, alpha optimizations (andrei)
45
- * 2006-04-05  ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
46
- *             where needed (andrei)
47
- * 2006-11-22  arm early clobber added: according to the swp instruction 
48
- *              specification the address register must be != from the other 2
49
- *              (Julien Blache <jblache@debian.org>)
50
- *
51
- */
52
-
53
-/*
18
+/*!
19
+* \file
20
+* \brief Kamailio core :: fast architecture specific locking
21
+* \author andrei
22
+* \ingroup core
23
+* Module: \ref core
54 24
  * WARNING: the code was not tested on the following architectures:
55 25
  *           - arm6  (cross-compiles ok, no test)
56 26
  *           - alpha (cross-compiles ok, no test)
Browse code

- moved sched_yield() wrapper into sched_yield.h at Miklos's request.

Andrei Pelinescu-Onciul authored on 13/07/2007 15:12:46
Showing 1 changed files
... ...
@@ -63,14 +63,7 @@
63 63
 #ifndef fastlock_h
64 64
 #define fastlock_h
65 65
 
66
-#ifdef HAVE_SCHED_YIELD
67
-#include <sched.h>
68
-#else
69
-#include <unistd.h>
70
-	/* fake sched_yield */
71
-	#define sched_yield()	sleep(0)
72
-#endif
73
-
66
+#include "sched_yield.h"
74 67
 
75 68
 
76 69
 #define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
Browse code

- atomic_add & atomic_cmpxchg added to ppc - atomic_unkown (used when the procesor does not suport atomic ops or is not among the supported ones), tries now to use a "hash" of locks if GEN_LOCK_SET_T_UNLIMITED is defined => less contention on multi-cpus - atomic_ops.h defines *_UNLIMITED macros when the number of locks or set size are limited only by the available memory (everything exept SYSV sems) - license changes: all the atomic* stuff and the locks are now under a BSD (OpenBSD) style license

Andrei Pelinescu-Onciul authored on 11/05/2007 20:44:15
Showing 1 changed files
... ...
@@ -7,26 +7,17 @@
7 7
  *
8 8
  * Copyright (C) 2001-2003 FhG Fokus
9 9
  *
10
- * This file is part of ser, a free SIP server.
10
+ * Permission to use, copy, modify, and distribute this software for any
11
+ * purpose with or without fee is hereby granted, provided that the above
12
+ * copyright notice and this permission notice appear in all copies.
11 13
  *
12
- * ser is free software; you can redistribute it and/or modify
13
- * it under the terms of the GNU General Public License as published by
14
- * the Free Software Foundation; either version 2 of the License, or
15
- * (at your option) any later version
16
- *
17
- * For a license to use the ser software under conditions
18
- * other than those described here, or to purchase support for this
19
- * software, please contact iptel.org by e-mail at the following addresses:
20
- *    info@iptel.org
21
- *
22
- * ser is distributed in the hope that it will be useful,
23
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
24
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25
- * GNU General Public License for more details.
26
- *
27
- * You should have received a copy of the GNU General Public License 
28
- * along with this program; if not, write to the Free Software 
29
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
14
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 21
  */
31 22
 /*
32 23
  *
Browse code

- fastlock arm fix: arm early clobber added: according to the swp instruction specification the address register must be != from the other 2 . Fix from openser, originally sent by Julien Blache <jblache@debian.org>)

Andrei Pelinescu-Onciul authored on 22/11/2006 18:46:04
Showing 1 changed files
... ...
@@ -53,6 +53,9 @@
53 53
  *              ppc, mips*, alpha optimizations (andrei)
54 54
  * 2006-04-05  ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
55 55
  *             where needed (andrei)
56
+ * 2006-11-22  arm early clobber added: according to the swp instruction 
57
+ *              specification the address register must be != from the other 2
58
+ *              (Julien Blache <jblache@debian.org>)
56 59
  *
57 60
  */
58 61
 
... ...
@@ -222,7 +225,7 @@ inline static int tsl(fl_lock_t* lock)
222 225
 #elif defined __CPU_arm 
223 226
 	asm volatile(
224 227
 			"swp %0, %2, [%3] \n\t"
225
-			: "=r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
228
+			: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
226 229
 	);
227 230
 #elif defined __CPU_arm6
228 231
 	asm volatile(
Browse code

- ppc fixes (s/stw/stwx/, s/lwz/lwzx) - missing early clobbers added for x86, sparc*, armv6, ppc*, alpha

Andrei Pelinescu-Onciul authored on 05/04/2006 08:49:57
Showing 1 changed files
... ...
@@ -51,6 +51,8 @@
51 51
  *               fix (andrei)
52 52
  * 2006-04-04  sparc* optimizations, sparc32 smp support, armv6 no smp support,
53 53
  *              ppc, mips*, alpha optimizations (andrei)
54
+ * 2006-04-05  ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
55
+ *             where needed (andrei)
54 56
  *
55 57
  */
56 58
 
... ...
@@ -170,7 +172,7 @@ inline static int tsl(fl_lock_t* lock)
170 172
 		" xor %0, %0 \n\t"
171 173
 		" btsl $0, %2 \n\t"
172 174
 		" setc %b0 \n\t"
173
-		: "=q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
175
+		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
174 176
 	);
175 177
 #else
176 178
 	asm volatile(
... ...
@@ -183,7 +185,7 @@ inline static int tsl(fl_lock_t* lock)
183 185
 #endif
184 186
 		" xchgb %2, %b0 \n\t"
185 187
 		"1: \n\t"
186
-		: "=q" (val), "=m" (*lock) : "m"(*lock) : "memory"
188
+		: "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
187 189
 #ifdef SPIN_OPTIMIZE
188 190
 				, "cc"
189 191
 #endif
... ...
@@ -199,7 +201,7 @@ inline static int tsl(fl_lock_t* lock)
199 201
 			"   ldstub [%2], %0 \n\t"
200 202
 			"1: \n\t"
201 203
 			/* membar_getlock must be  called outside this function */
202
-			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
204
+			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
203 205
 	);
204 206
 #elif defined(__CPU_sparc)
205 207
 	asm volatile(
... ...
@@ -212,7 +214,7 @@ inline static int tsl(fl_lock_t* lock)
212 214
 			"   ldstub [%2], %0 \n\t"
213 215
 			"1: \n\t"
214 216
 			/* membar_getlock must be  called outside this function */
215
-			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
217
+			: "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
216 218
 #ifdef SPIN_OPTIMIZE
217 219
 				, "cc"
218 220
 #endif
... ...
@@ -230,13 +232,13 @@ inline static int tsl(fl_lock_t* lock)
230 232
 			/* if %0!=0 => either it was 1 initially or was 0
231 233
 			 * and somebody changed it just before the strexeq (so the 
232 234
 			 * lock is taken) => it's safe to return %0 */
233
-			: "=r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
235
+			: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
234 236
 	);
235 237
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
236 238
 	asm volatile(
237 239
 			"1: \n\t"
238 240
 #ifdef SPIN_OPTIMIZE
239
-			"   lwz %0, 0, (%2) \n\t"
241
+			"   lwzx %0, 0, %2 \n\t"
240 242
 			"   cmpwi %0, 0 \n\t"
241 243
 			"   bne- 2f \n\t" /* predict: not taken */
242 244
 #endif
... ...
@@ -247,7 +249,7 @@ inline static int tsl(fl_lock_t* lock)
247 249
 			"   bne-   1b\n\t"
248 250
 			/* membar_getlock must be  called outside this function */
249 251
 			"2:\n\t"
250
-			: "=r" (val), "=m"(*lock) :  "r" (lock), "r"(1) : "memory", "cc"
252
+			: "=&r" (val), "=m"(*lock) :  "r"(lock), "r"(1) : "memory", "cc"
251 253
         );
252 254
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
253 255
 	|| defined __CPU_mips64
... ...
@@ -293,7 +295,7 @@ inline static int tsl(fl_lock_t* lock)
293 295
 		".subsection 2 \n\t"
294 296
 		"3:  br 1b \n\t"
295 297
 		".previous \n\t"
296
-		:"=&r" (val), "=m"(*lock), "=r"(tmp)
298
+		:"=&r" (val), "=m"(*lock), "=&r"(tmp)
297 299
 		:"m"(*lock) 
298 300
 		: "memory"
299 301
 	);
... ...
@@ -388,8 +390,8 @@ inline static void release_lock(fl_lock_t* lock)
388 390
 			 *             [IBM Prgramming Environments Manual, D.4.2.2]
389 391
 			 */
390 392
 			"lwsync\n\t"
391
-			"stw %1, 0(%2)\n\t"
392
-			: "=m"(*lock) : "r"(0), "r" (lock) : "memory"
393
+			"stwx %1, 0, %2\n\t"
394
+			: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
393 395
 	);
394 396
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
395 397
 	|| defined __CPU_mips64
Browse code

- fastlock: optimizations (in general a "nicer" spin on the lock for the other cpus) and cleanups for sparc, sparc64, armv6 (nosmp for now), ppc*, mips* - fastlock: alpha: replace the cond. jump backward with a cond. jump forward and then jump backward (because a cond. jump with a negative relative offset is always predicted as taken and we want it to be predicted as not taken) - fastlock: sparc (32) smp support - lock_ops.h: introduced lock_try and lock_set_try (non-blocking lock_*_get versions, returns -1 if it failed to get the lock and 0 if it succeeds), for all the supported locking methods (fast_lock, pthread_mutex, posix_sem, sysv_sems) - updated locking doc.

Andrei Pelinescu-Onciul authored on 04/04/2006 18:04:01
Showing 1 changed files
... ...
@@ -49,9 +49,20 @@
49 49
  *               (membar_getlock()) (andrei)
50 50
  *              added try_lock(); more x86 optimizations, x86  release_lock
51 51
  *               fix (andrei)
52
+ * 2006-04-04  sparc* optimizations, sparc32 smp support, armv6 no smp support,
53
+ *              ppc, mips*, alpha optimizations (andrei)
52 54
  *
53 55
  */
54 56
 
57
+/*
58
+ * WARNING: the code was not tested on the following architectures:
59
+ *           - arm6  (cross-compiles ok, no test)
60
+ *           - alpha (cross-compiles ok, no test)
61
+ *           - mips64 (cross-compiles ok)
62
+ *           - ppc64 (compiles ok)
63
+ *           - sparc32 (tested on a sparc64)
64
+ */
65
+
55 66
 
56 67
 #ifndef fastlock_h
57 68
 #define fastlock_h
... ...
@@ -87,17 +98,27 @@ typedef  volatile int fl_lock_t;
87 98
  *  WARNING: this is intended only for internal fastlock use*/
88 99
 #if defined(__CPU_i386) || defined(__CPU_x86_64)
89 100
 #define membar_getlock()   /* not needed on x86 */
90
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
101
+
102
+#elif defined(__CPU_sparc64)
91 103
 #ifndef NOSMP
92 104
 #define membar_getlock() \
93 105
 	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
106
+	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
107
+	 * since ldstub acts both as a store and as a load */
94 108
 #else
95 109
 /* no need for a compiler barrier, that is already included in lock_get/tsl*/
96 110
 #define membar_getlock() /* not needed if no smp*/
97 111
 #endif /* NOSMP */
112
+
113
+#elif  defined(__CPU_sparc)
114
+#define membar_getlock()/* no need for a compiler barrier, already included */
115
+
98 116
 #elif defined __CPU_arm || defined __CPU_arm6
99
-#error "FIXME: check arm6 membar"
117
+#ifndef NOSMP
118
+#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
119
+#endif /* NOSMP */
100 120
 #define membar_getlock() 
121
+
101 122
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
102 123
 #ifndef NOSMP
103 124
 #define membar_getlock() \
... ...
@@ -105,14 +126,21 @@ typedef  volatile int fl_lock_t;
105 126
 #else
106 127
 #define membar_getlock() 
107 128
 #endif /* NOSMP */
108
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
109
-	|| defined __CPU_mips64
129
+
130
+#elif defined __CPU_mips2 || defined __CPU_mips64
110 131
 #ifndef NOSMP
111 132
 #define membar_getlock() \
112 133
 	asm volatile("sync \n\t" : : : "memory");
113 134
 #else
114 135
 #define membar_getlock() 
115 136
 #endif /* NOSMP */
137
+
138
+#elif defined __CPU_mips
139
+#ifndef NOSMP
140
+#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
141
+#endif
142
+#define membar_getlock() 
143
+
116 144
 #elif defined __CPU_alpha
117 145
 #ifndef NOSMP
118 146
 #define membar_getlock() \
... ...
@@ -120,13 +148,14 @@ typedef  volatile int fl_lock_t;
120 148
 #else
121 149
 #define membar_getlock() 
122 150
 #endif /* NOSMP */
123
-#else
151
+
152
+#else /* __CPU_xxx */
124 153
 #error "unknown architecture"
125 154
 #endif
126 155
 
127 156
 
128 157
 
129
-/*test and set lock, ret 1 if lock held by someone else, 0 otherwise
158
+/*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
130 159
  * WARNING: no memory barriers included, if you use this function directly
131 160
  *          (not recommended) and it gets the lock (ret==0), you should call 
132 161
  *          membar_getlock() after it */
... ...
@@ -155,35 +184,70 @@ inline static int tsl(fl_lock_t* lock)
155 184
 		" xchgb %2, %b0 \n\t"
156 185
 		"1: \n\t"
157 186
 		: "=q" (val), "=m" (*lock) : "m"(*lock) : "memory"
187
+#ifdef SPIN_OPTIMIZE
188
+				, "cc"
189
+#endif
158 190
 	);
159 191
 #endif /*NOSMP*/
160
-#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
192
+#elif defined(__CPU_sparc64)
193
+	asm volatile(
194
+#ifdef SPIN_OPTIMIZE
195
+			"   ldub [%2], %0 \n\t"
196
+			"   brnz,a,pn %0, 1f \n\t"
197
+			"   nop \n\t"
198
+#endif
199
+			"   ldstub [%2], %0 \n\t"
200
+			"1: \n\t"
201
+			/* membar_getlock must be  called outside this function */
202
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
203
+	);
204
+#elif defined(__CPU_sparc)
161 205
 	asm volatile(
162
-			"ldstub [%1], %0 \n\t"
206
+#ifdef SPIN_OPTIMIZE
207
+			"   ldub [%2], %0 \n\t"
208
+			"   tst %0 \n\t"
209
+			"   bne,a  1f \n\t"
210
+			"   nop \n\t"
211
+#endif
212
+			"   ldstub [%2], %0 \n\t"
213
+			"1: \n\t"
163 214
 			/* membar_getlock must be  called outside this function */
164
-			: "=r"(val) : "r"(lock):"memory"
215
+			: "=r"(val), "=m"(*lock) : "r"(lock): "memory"
216
+#ifdef SPIN_OPTIMIZE
217
+				, "cc"
218
+#endif
165 219
 	);
166
-	
167
-#elif defined __CPU_arm || defined __CPU_arm6
220
+#elif defined __CPU_arm 
168 221
 	asm volatile(
169
-			"# here \n\t"
170
-			"swpb %0, %1, [%2] \n\t"
171
-			: "=r" (val)
172
-			: "r"(1), "r" (lock) : "memory"
222
+			"swp %0, %2, [%3] \n\t"
223
+			: "=r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
224
+	);
225
+#elif defined __CPU_arm6
226
+	asm volatile(
227
+			"   ldrex %0, [%2] \n\t" 
228
+			"   cmp %0, #0 \n\t"
229
+			"   strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
230
+			/* if %0!=0 => either it was 1 initially or was 0
231
+			 * and somebody changed it just before the strexeq (so the 
232
+			 * lock is taken) => it's safe to return %0 */
233
+			: "=r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
173 234
 	);
174
-	
175 235
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
176 236
 	asm volatile(
177
-			"1: lwarx  %0, 0, %2\n\t"
237
+			"1: \n\t"
238
+#ifdef SPIN_OPTIMIZE
239
+			"   lwz %0, 0, (%2) \n\t"
240
+			"   cmpwi %0, 0 \n\t"
241
+			"   bne- 2f \n\t" /* predict: not taken */
242
+#endif
243
+			"   lwarx  %0, 0, %2\n\t"
178 244
 			"   cmpwi  %0, 0\n\t"
179
-			"   bne    0f\n\t"
180
-			"   stwcx. %1, 0, %2\n\t"
245
+			"   bne-    2f\n\t"
246
+			"   stwcx. %3, 0, %2\n\t"
181 247
 			"   bne-   1b\n\t"
182 248
 			/* membar_getlock must be  called outside this function */
183
-			"0:\n\t"
184
-			: "=r" (val)
185
-			: "r"(1), "b" (lock) :
186
-			"memory", "cc"
249
+			"2:\n\t"
250
+			: "=r" (val), "=m"(*lock) :  "r" (lock), "r"(1) : "memory", "cc"
187 251
         );
188 252
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
189 253
 	|| defined __CPU_mips64
... ...
@@ -193,11 +257,18 @@ inline static int tsl(fl_lock_t* lock)
193 257
 		".set push \n\t"
194 258
 		".set noreorder\n\t"
195 259
 		".set mips2 \n\t"
260
+#ifdef SPIN_OPTIMIZE
261
+		"    lw %1, %2 \n\t"
262
+		"    bne %1, $0, 2f \n\t"
263
+		"    nop \n\t"
264
+#endif
196 265
 		"1:  ll %1, %2   \n\t"
197
-		"    li %0, 1 \n\t"
266
+		"    bne %1, $0, 2f \n\t"
267
+		"    li %0, 1 \n\t"  /* delay slot */
198 268
 		"    sc %0, %2  \n\t"
199 269
 		"    beqz %0, 1b \n\t"
200 270
 		"    nop \n\t"
271
+		"2: \n\t"
201 272
 		/* membar_getlock must be called outside this function */
202 273
 		".set pop\n\t"
203 274
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
... ...
@@ -215,9 +286,13 @@ inline static int tsl(fl_lock_t* lock)
215 286
 		"    blbs %0, 2f  \n\t" 
216 287
 		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
217 288
 		"    stl_c %2, %1 \n\t"
218
-		"    beq %2, 1b   \n\t"
289
+		"    beq %2, 3f   \n\t" /* back cond. jumps are always predicted to be 
290
+								   taken => make forward jump */
219 291
 		/* membar_getlock must be called outside this function */
220 292
 		"2:               \n\t"
293
+		".subsection 2 \n\t"
294
+		"3:  br 1b \n\t"
295
+		".previous \n\t"
221 296
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
222 297
 		:"m"(*lock) 
223 298
 		: "memory"
... ...
@@ -289,19 +364,22 @@ inline static void release_lock(fl_lock_t* lock)
289 364
 #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
290 365
 	asm volatile(
291 366
 #ifndef NOSMP
292
-			"membar #LoadStore | #StoreStore \n\t" /*is this really needed?*/
367
+#ifdef __CPU_sparc64
368
+			"membar #LoadStore | #StoreStore \n\t"
369
+#else /* __CPU_sparc */
370
+			"stbar \n\t"
371
+#endif /* __CPU_sparc64 */
293 372
 #endif
294
-			"stb %%g0, [%0] \n\t"
295
-			: /*no output*/
296
-			: "r" (lock)
297
-			: "memory"
373
+			"stb %%g0, [%1] \n\t"
374
+			: "=m"(*lock) : "r" (lock) : "memory"
298 375
 	);
299 376
 #elif defined __CPU_arm || defined __CPU_arm6
377
+#ifndef NOSMP
378
+#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
379
+#endif
300 380
 	asm volatile(
301
-		" str %0, [%1] \n\r" 
302
-		: /*no outputs*/ 
303
-		: "r"(0), "r"(lock)
304
-		: "memory"
381
+		" str %1, [%2] \n\r" 
382
+		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
305 383
 	);
306 384
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
307 385
 	asm volatile(
... ...
@@ -310,10 +388,8 @@ inline static void release_lock(fl_lock_t* lock)
310 388
 			 *             [IBM Prgramming Environments Manual, D.4.2.2]
311 389
 			 */
312 390
 			"lwsync\n\t"
313
-			"stw %0, 0(%1)\n\t"
314
-			: /* no output */
315
-			: "r"(0), "b" (lock)
316
-			: "memory"
391
+			"stw %1, 0(%2)\n\t"
392
+			: "=m"(*lock) : "r"(0), "r" (lock) : "memory"
317 393
 	);
318 394
 #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
319 395
 	|| defined __CPU_mips64
... ...
@@ -322,7 +398,11 @@ inline static void release_lock(fl_lock_t* lock)
322 398
 		".set noreorder \n\t"
323 399
 		".set mips2 \n\t"
324 400
 #ifndef NOSMP
401
+#ifdef __CPU_mips
402
+#warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
403
+#else
325 404
 		"    sync \n\t"
405
+#endif
326 406
 #endif
327 407
 		"    sw $0, %0 \n\t"
328 408
 		".set pop \n\t"
... ...
@@ -330,7 +410,9 @@ inline static void release_lock(fl_lock_t* lock)
330 410
 	);
331 411
 #elif defined __CPU_alpha
332 412
 	asm volatile(
413
+#ifndef  NOSMP
333 414
 		"    mb          \n\t"
415
+#endif
334 416
 		"    stl $31, %0 \n\t"
335 417
 		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
336 418
 	);  
Browse code

- membar_write on x86_64 is by default empty (since on amd64 stores are always ordered)

- x86/x86_64 lock optimizations: spinning on a lock should be friendlier now
for the other cpus caches (at the extra cost of a cmp mem + jump) ; tried to
arrange a little better the instructions to allow for some parallel
execution.

- x86 unlocks with xchg by default (since some x86s reorder stores, so a
simple mov is unsafe)

Andrei Pelinescu-Onciul authored on 03/04/2006 19:03:16
Showing 1 changed files
... ...
@@ -47,7 +47,8 @@
47 47
  *  2006-04-03  optimization: call lock_get memory barrier outside tsl,in the 
48 48
  *               calling function, only if the lock operation succeeded
49 49
  *               (membar_getlock()) (andrei)
50
- *              added try_lock()  (andrei)
50
+ *              added try_lock(); more x86 optimizations, x86  release_lock
51
+ *               fix (andrei)
51 52
  *
52 53
  */
53 54
 
... ...
@@ -64,6 +65,12 @@
64 65
 #endif
65 66
 
66 67
 
68
+
69
+#define SPIN_OPTIMIZE /* if defined optimize spining on the lock: