Browse code

- advanced synchronization functions: atomic operations (inc, dec, inc_and_test, dec_and_test, or, and) and memory barriers. [ work in progress, for now: x86, x86_64 and mips2 ]

Andrei Pelinescu-Onciul authored on 09/03/2006 20:44:08
Showing 5 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2006 iptelorg GmbH
5
+ *
6
+ * This file is part of ser, a free SIP server.
7
+ *
8
+ * ser is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version
12
+ *
13
+ * For a license to use the ser software under conditions
14
+ * other than those described here, or to purchase support for this
15
+ * software, please contact iptel.org by e-mail at the following addresses:
16
+ *    info@iptel.org
17
+ *
18
+ * ser is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
+ * GNU General Public License for more details.
22
+ *
23
+ * You should have received a copy of the GNU General Public License
24
+ * along with this program; if not, write to the Free Software
25
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
+ */
27
+/*
28
+ *  atomic operations init
29
+ */
30
+/* 
31
+ * History:
32
+ * --------
33
+ *  2006-03-08  created by andrei
34
+ */
35
+
36
+#include "atomic_ops.h"
37
+
38
+#ifdef ATOMIC_USE_LOCK
39
+gen_lock_t* atomic_lock;
40
+#endif
41
+
42
+
43
+/* returns 0 on success, -1 on error */
44
+int atomic_ops_init()
45
+{
46
+	int ret;
47
+	
48
+	ret=0;
49
+#ifdef ATOMIC_USE_LOCK
50
+	if ((atomic_lock=lock_alloc())==0){
51
+		ret=-1;
52
+		goto end;
53
+	}
54
+	if (lock_init(atomic_lock)==0){
55
+		ret=-1;
56
+		lock_destroy(atomic_lock);
57
+		atomic_lock=0;
58
+		goto end;
59
+	}
60
+end:
61
+#endif
62
+	return ret;
63
+}
64
+
0 65
new file mode 100644
... ...
@@ -0,0 +1,362 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2006 iptelorg GmbH
5
+ *
6
+ * This file is part of ser, a free SIP server.
7
+ *
8
+ * ser is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version
12
+ *
13
+ * For a license to use the ser software under conditions
14
+ * other than those described here, or to purchase support for this
15
+ * software, please contact iptel.org by e-mail at the following addresses:
16
+ *    info@iptel.org
17
+ *
18
+ * ser is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
+ * GNU General Public License for more details.
22
+ *
23
+ * You should have received a copy of the GNU General Public License
24
+ * along with this program; if not, write to the Free Software
25
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
+ */
27
+/*
28
+ *  atomic operations and memory barriers
29
+ *  WARNING: atomic ops do not include memory barriers
30
+ *  
31
+ *  memory barriers:
32
+ *  ----------------
33
+ *
34
+ *  void membar();       - memory barrier (load & store)
35
+ *  void membar_read()   - load (read) memory barrier
36
+ *  void membar_write()  - store (write) memory barrier
37
+ *
38
+ *  Note: properly using memory barriers is tricky, in general try not to 
39
+ *        depend on them. Locks include memory barriers, so you don't need
40
+ *        them for writes/load already protected by locks.
41
+ *
42
+ * atomic operations:
43
+ * ------------------
44
+ *  type: atomic_t
45
+ *
46
+ *  void atomic_set(atomic_t* v, long i)      -      v->val=i
47
+ *  long atomic_get(atomic_t* v)              -       return v->val
48
+ *  void atomic_inc(atomic_t* v)
49
+ *  void atomic_dec(atomic_t* v)
50
+ *  long atomic_inc_and_test(atomic_t* v)     - returns 1 if the result is 0
51
+ *  long atomic_dec_and_test(atomic_t* v)     - returns 1 if the result is 0
52
+ *  void atomic_or (atomic_t* v, long mask)   - v->val|=mask 
53
+ *  void atomic_and(atomic_t* v, long mask)   - v->val&=mask
54
+ *  
55
+ */
56
+/* 
57
+ * History:
58
+ * --------
59
+ *  2006-03-08  created by andrei
60
+ */
61
+#ifndef __atomic_ops
62
+#define __atomic_ops
63
+
64
+/* atomic_t defined as a struct to easily catch non atomic ops. on it,
65
+ * e.g.  atomic_t  foo; foo++  will generate a compile error */
66
+typedef struct{ volatile long val; } atomic_t; 
67
+
68
+
69
+/* store and load operations are atomic on all cpus, note however that they
70
+ * don't include memory barriers so if you want to use atomic_{get,set} 
71
+ * to implement mutexes you must explicitely use the barriers */
72
+#define atomic_set(at_var, value)	((at_var)->val=(value))
73
+#define atomic_get(at_var) ((at_var)->val)
74
+
75
+/* init atomic ops */
76
+int atomic_ops_init();
77
+
78
+
79
+
80
+#if defined(__CPU_i386) || defined(__CPU_x86_64)
81
+
82
+#ifdef NOSMP
83
+#define __LOCK_PREF 
84
+#else
85
+#define __LOCK_PREF "lock ;"
86
+#endif
87
+
88
+
89
+
90
+/* memory barriers */
91
+
92
+#ifdef NOSMP
93
+
94
+#define membar()
95
+#define membar_read()
96
+#define membar_write()
97
+
98
+#else
99
+
100
+/* although most x86 do stores in order, we're playing it safe and use
101
+ *  oostore ready write barriers */
102
+#define X86_OOSTORE 
103
+
104
+/* membar, mfence, lfence, sfence available only on newer cpus, so for now
105
+ * stick to lock addl */
106
+#define membar() \
107
+	asm volatile( \
108
+					" lock; addl $0, 0(%%esp) \n\t " \
109
+					: : : "memory" \
110
+				) 
111
+
112
+#define membar_read()	membar()
113
+
114
+#ifdef X86_OOSTORE
115
+/* out of order store version */
116
+#define membar_write()	membar()
117
+#else
118
+/* no oostore, most x86 cpus => do no thing, just a gcc do_not_cache barrier*/
119
+#define membar_write()	asm volatile ("" : : : "memory")
120
+#endif
121
+
122
+
123
+#endif /* NOSMP */
124
+
125
+#define atomic_inc(var) \
126
+	asm volatile( \
127
+			__LOCK_PREF " incl %0 \n\t"  \
128
+			: "=m"((var)->val) : "m"((var)->val) : "cc" \
129
+			) 
130
+
131
+#define atomic_dec(var) \
132
+	asm volatile( \
133
+			__LOCK_PREF " decl %0 \n\t" \
134
+			: "=m"((var)->val) : "m"((var)->val) : "cc" \
135
+			) 
136
+
137
+#define atomic_and(var, i) \
138
+	asm volatile( \
139
+			__LOCK_PREF " andl %1, %0 \n\t" \
140
+			: "=m"((var)->val) : "r"((i)), "m"((var)->val) : "cc" \
141
+			)
142
+#define atomic_or(var, i) \
143
+	asm volatile( \
144
+			__LOCK_PREF " orl %1, %0 \n\t" \
145
+			: "=m"((var)->val) : "r"((i)), "m"((var)->val) : "cc" \
146
+			)
147
+
148
+
149
+/* returns 1 if the result is 0 */
150
+inline static long atomic_inc_and_test(atomic_t* var)
151
+{
152
+	char ret;
153
+	
154
+	asm volatile(
155
+			__LOCK_PREF " incl %0 \n\t"
156
+			"setz  %1 \n\t"
157
+			: "=m"(var->val), "=qm"(ret) : "m" (var->val) : "cc"
158
+			);
159
+	return ret;
160
+}
161
+
162
+
163
+/* returns 1 if the result is 0 */
164
+inline static long atomic_dec_and_test(atomic_t* var)
165
+{
166
+	char ret;
167
+	
168
+	asm volatile(
169
+			__LOCK_PREF " decl %0 \n\t"
170
+			"setz  %1 \n\t"
171
+			: "=m"(var->val), "=qm"(ret) : "m" (var->val) : "cc"
172
+			);
173
+	return ret;
174
+}
175
+
176
+
177
+
178
+#elif defined __CPU_mips2
179
+
180
+#ifdef NOSMP
181
+#define membar()
182
+#define membar_read()  membar()
183
+#define membar_write() membar()
184
+#else
185
+
186
+#define membar() \
187
+	asm volatile( \
188
+			".set noreorder \n\t" \
189
+			"    sync\n\t" \
190
+			".set reorder \n\t" \
191
+			: : : "memory" \
192
+			) 
193
+
194
+#define membar_read()  membar()
195
+#define membar_write() membar()
196
+
197
+#endif /* NOSMP */
198
+
199
+
200
+
201
+/* main asm block */
202
+#define ATOMIC_ASM_OP(op) \
203
+			".set noreorder \n\t" \
204
+			"1:   ll %1, %0 \n\t" \
205
+			"     " op "\n\t" \
206
+			"     sc %2, %0 \n\t" \
207
+			"     beqz %2, 1b \n\t" \
208
+			"     nop \n\t" \
209
+			".set reorder \n\t" 
210
+
211
+
212
+#define ATOMIC_FUNC_DECL(NAME, OP, RET_TYPE, RET_EXPR) \
213
+	inline static RET_TYPE atomic_##NAME (atomic_t *var) \
214
+	{ \
215
+		long ret, tmp; \
216
+		asm volatile( \
217
+			ATOMIC_ASM_OP(OP) \
218
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
219
+			: "m"((var)->val) \
220
+			 \
221
+			); \
222
+		return RET_EXPR; \
223
+	}
224
+
225
+
226
+/* same as above, but with CT in %3 */
227
+#define ATOMIC_FUNC_DECL_CT(NAME, OP, CT, RET_TYPE, RET_EXPR) \
228
+	inline static RET_TYPE atomic_##NAME (atomic_t *var) \
229
+	{ \
230
+		long ret, tmp; \
231
+		asm volatile( \
232
+			ATOMIC_ASM_OP(OP) \
233
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
234
+			: "r"((CT)), "m"((var)->val) \
235
+			 \
236
+			); \
237
+		return RET_EXPR; \
238
+	}
239
+
240
+
241
+/* takes an extra param, i which goes in %3 */
242
+#define ATOMIC_FUNC_DECL1(NAME, OP, RET_TYPE, RET_EXPR) \
243
+	inline static RET_TYPE atomic_##NAME (atomic_t *var, long i) \
244
+	{ \
245
+		long ret, tmp; \
246
+		asm volatile( \
247
+			ATOMIC_ASM_OP(OP) \
248
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
249
+			: "r"((i)), "m"((var)->val) \
250
+			 \
251
+			); \
252
+		return RET_EXPR; \
253
+	}
254
+
255
+
256
+ATOMIC_FUNC_DECL(inc,      "addiu %2, %1, 1", void, /* no return */ )
257
+ATOMIC_FUNC_DECL(inc_and_test, "addiu %2, %1, 1", long, (ret+1)==0 )
258
+
259
+ATOMIC_FUNC_DECL_CT(dec,   "subu %2, %1, %3", 1,  void, /* no return */ )
260
+ATOMIC_FUNC_DECL_CT(dec_and_test, "subu %2, %1, %3", 1, long, (ret-1)==0 )
261
+
262
+ATOMIC_FUNC_DECL1(and, "and %2, %1, %3", void, /* no return */ )
263
+ATOMIC_FUNC_DECL1(or,  "or  %2, %1, %3", void,  /* no return */ )
264
+
265
+#else /* no known cpu */
266
+
267
+#include "locking.h"
268
+
269
+#define ATOMIC_USE_LOCK
270
+
271
+extern gen_lock_t* atomic_lock;
272
+
273
+
274
+
275
+#ifdef NOSMP
276
+#define smp_atomic_lock
277
+#define smp_atomic_unlock
278
+#else
279
+#define smp_atomic_lock    lock_get(atomic_lock)
280
+#define smp_atomic_unlock  lock_release(atomic_lock)
281
+#endif
282
+
283
+/* memory barriers 
284
+ *  not a known cpu -> fall back lock/unlock: safe but costly  (it should 
285
+ *  include a memory barrier effect) */
286
+
287
+#define membar() \
288
+	do{\
289
+		smp_atomic_lock; \
290
+		smp_atomic_unlock; \
291
+	} while(0)
292
+
293
+#define membar_write() membar()
294
+
295
+#define membar_read()  membar()
296
+
297
+
298
+/* atomic ops */
299
+
300
+#define atomic_inc(var) \
301
+	do{ \
302
+		smp_atomic_lock; \
303
+		(var)->val++;\
304
+		smp_atomic_unlock;\
305
+	}while(0)
306
+
307
+
308
+#define atomic_dec(var) \
309
+	do{ \
310
+		smp_atomic_lock; \
311
+		(var)->val--; \
312
+		smp_atomic_unlock; \
313
+	}while(0)
314
+
315
+
316
+#define atomic_and(var, i) \
317
+	do{ \
318
+		smp_atomic_lock; \
319
+		(var)->val&=i; \
320
+		smp_atomic_unlock; \
321
+	}while(0)
322
+
323
+#define atomic_or(var, i) \
324
+	do{ \
325
+		smp_atomic_lock; \
326
+		(var)->val|=i; \
327
+		smp_atomic_unlock; \
328
+	}while(0)
329
+
330
+
331
+
332
+/* returns true if result is 0 */
333
+inline static long atomic_inc_and_test(atomic_t* var)
334
+{
335
+	long ret;
336
+	
337
+	smp_atomic_lock;
338
+	var->val++;
339
+	ret=var->val;
340
+	smp_atomic_unlock;
341
+	
342
+	return (ret==0);
343
+}
344
+
345
+
346
+/* returns true if result is 0 */
347
+inline static long atomic_dec_and_test(atomic_t* var)
348
+{
349
+	long ret;
350
+	
351
+	smp_atomic_lock;
352
+	var->val++;
353
+	ret=var->val;
354
+	smp_atomic_unlock;
355
+	
356
+	return (ret==0);
357
+}
358
+
359
+
360
+#endif /* if __CPU_xx */
361
+
362
+#endif
0 363
new file mode 100644
... ...
@@ -0,0 +1,66 @@
1
+/*
2
+ *
3
+ *  simple atomic ops testing program
4
+ *  (no paralles stuff)
5
+ * 
6
+ *  Compile with: gcc -D__CPU_i386 -O3 on x86 machines and
7
+ *                gcc -mips2 -O2 -D__CPU_mips2  on mips machines.
8
+ *  -- andrei
9
+ *
10
+ *  
11
+ */
12
+
13
+#include <stdio.h>
14
+#include "../atomic_ops.h"
15
+
16
+
17
+
18
+int main(int argc, char** argv)
19
+{
20
+	int r;
21
+	atomic_t v;
22
+#ifdef NOSMP
23
+	printf("no-smp mode\n");
24
+#else
25
+	printf("smp mode\n");
26
+#endif
27
+	
28
+	printf("\nstarting memory barrier opcode tests...\n");
29
+	membar();
30
+	printf(" membar() .............................. ok\n");
31
+	membar_write();
32
+	printf(" membar_write() ........................ ok\n");
33
+	membar_read();
34
+	printf(" membar_read() ......................... ok\n");
35
+	
36
+	printf("\nstarting atomic ops basic tests...\n");
37
+	
38
+	atomic_set(&v, 1);
39
+	printf(" atomic_set, v should be 1 ............. %2ld\n", atomic_get(&v));
40
+	atomic_inc(&v);
41
+	printf(" atomic_inc, v should be 2 ............. %2ld\n", atomic_get(&v));
42
+	r=atomic_inc_and_test(&v);
43
+	printf(" atomic_inc_and_test, v should be  3 ... %2ld\n", atomic_get(&v));
44
+	printf("                      r should be  0 ... %2d\n", r);
45
+	
46
+	atomic_dec(&v);
47
+	printf(" atomic_dec, v should be 2 ............. %2ld\n", atomic_get(&v));
48
+	r=atomic_dec_and_test(&v);
49
+	printf(" atomic_dec_and_test, v should be  1 ... %2ld\n", atomic_get(&v));
50
+	printf("                      r should be  0 ... %2d\n", r);
51
+	r=atomic_dec_and_test(&v);
52
+	printf(" atomic_dec_and_test, v should be  0 ... %2ld\n", atomic_get(&v));
53
+	printf("                      r should be  1 ... %2d\n", r);
54
+	r=atomic_dec_and_test(&v);
55
+	printf(" atomic_dec_and_test, v should be -1 ... %2ld\n", atomic_get(&v));
56
+	printf("                      r should be  0 ... %2d\n", r);
57
+	
58
+	atomic_and(&v, 2);
59
+	printf(" atomic_and, v should be 2 ............. %2ld\n", atomic_get(&v));
60
+	
61
+	atomic_or(&v, 5);
62
+	printf(" atomic_or,  v should be 7 ............. %2ld\n", atomic_get(&v));
63
+	
64
+	printf("\ndone.\n");
65
+	return 0;
66
+}
0 67
new file mode 100644
... ...
@@ -0,0 +1,40 @@
1
+/*
2
+ *
3
+ *  simple locking test program
4
+ *  (no paralles stuff)
5
+ * 
6
+ *  Compile with: gcc -D__CPU_i386 -O3 on x86 machines and
7
+ *                gcc -mips2 -O2 -D__CPU_mips2  on mips machines.
8
+ *  -- andrei
9
+ *
10
+ *  
11
+ */
12
+
13
+#include <stdio.h>
14
+#include "../fastlock.h"
15
+
16
+
17
+
18
+int main(int argc, char** argv)
19
+{
20
+	fl_lock_t lock;
21
+	int r;
22
+	
23
+	lock=0;
24
+	printf("starting locking basic tests...\n");
25
+	
26
+	r=tsl(&lock);
27
+	printf(" tsl should return 0                 ... %d\n", r);
28
+	printf("     lock should be 1 now            ... %d\n", lock);
29
+	r=tsl(&lock);
30
+	printf(" tsl should return 1                 ... %d\n", r);
31
+	printf("     lock should still be 1 now      ... %d\n", lock);
32
+	release_lock(&lock);
33
+	printf(" release_lock: lock should be 0 now  ... %d\n", lock);
34
+	printf("trying tsl once more...\n");
35
+	r=tsl(&lock);
36
+	printf(" tsl should return 0                 ... %d\n", r);
37
+	printf("     lock should be 1 now            ... %d\n", lock);
38
+	printf("\ndone.\n");
39
+	return 0;
40
+}
... ...
@@ -32,7 +32,7 @@ int tsl(fl_lock_t* lock)
32 32
 		"    nop \n\t"
33 33
 		".set reorder\n\t"
34 34
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
35
-		: "0" (tmp), "2" (*lock) 
35
+		: "0" (tmp), "m" (*lock) 
36 36
 		: "cc"
37 37
 	);
38 38
 #elif defined __CPU_i386