Browse code

- advanced synchronization functions: atomic operations (inc, dec, inc_and_test, dec_and_test, or, and) and memory barriers. [ work in progress, for now: x86, x86_64 and mips2 ]

Andrei Pelinescu-Onciul authored on 09/03/2006 20:44:08
Showing 5 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2006 iptelorg GmbH
4
+ *
5
+ * This file is part of ser, a free SIP server.
6
+ *
7
+ * ser is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version
11
+ *
12
+ * For a license to use the ser software under conditions
13
+ * other than those described here, or to purchase support for this
14
+ * software, please contact iptel.org by e-mail at the following addresses:
15
+ *    info@iptel.org
16
+ *
17
+ * ser is distributed in the hope that it will be useful,
18
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
+ * GNU General Public License for more details.
21
+ *
22
+ * You should have received a copy of the GNU General Public License
23
+ * along with this program; if not, write to the Free Software
24
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
+ */
26
+/*
27
+ *  atomic operations init
28
+ */
29
+/* 
30
+ * History:
31
+ * --------
32
+ *  2006-03-08  created by andrei
33
+ */
34
+
35
+#include "atomic_ops.h"
36
+
37
+#ifdef ATOMIC_USE_LOCK
38
+gen_lock_t* atomic_lock;
39
+#endif
40
+
41
+
42
+/* returns 0 on success, -1 on error */
43
+int atomic_ops_init()
44
+{
45
+	int ret;
46
+	
47
+	ret=0;
48
+#ifdef ATOMIC_USE_LOCK
49
+	if ((atomic_lock=lock_alloc())==0){
50
+		ret=-1;
51
+		goto end;
52
+	}
53
+	if (lock_init(atomic_lock)==0){
54
+		ret=-1;
55
+		lock_destroy(atomic_lock);
56
+		atomic_lock=0;
57
+		goto end;
58
+	}
59
+end:
60
+#endif
61
+	return ret;
62
+}
63
+
0 64
new file mode 100644
... ...
@@ -0,0 +1,362 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2006 iptelorg GmbH
4
+ *
5
+ * This file is part of ser, a free SIP server.
6
+ *
7
+ * ser is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version
11
+ *
12
+ * For a license to use the ser software under conditions
13
+ * other than those described here, or to purchase support for this
14
+ * software, please contact iptel.org by e-mail at the following addresses:
15
+ *    info@iptel.org
16
+ *
17
+ * ser is distributed in the hope that it will be useful,
18
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
+ * GNU General Public License for more details.
21
+ *
22
+ * You should have received a copy of the GNU General Public License
23
+ * along with this program; if not, write to the Free Software
24
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
+ */
26
+/*
27
+ *  atomic operations and memory barriers
28
+ *  WARNING: atomic ops do not include memory barriers
29
+ *  
30
+ *  memory barriers:
31
+ *  ----------------
32
+ *
33
+ *  void membar();       - memory barrier (load & store)
34
+ *  void membar_read()   - load (read) memory barrier
35
+ *  void membar_write()  - store (write) memory barrier
36
+ *
37
+ *  Note: properly using memory barriers is tricky, in general try not to 
38
+ *        depend on them. Locks include memory barriers, so you don't need
39
+ *        them for writes/load already protected by locks.
40
+ *
41
+ * atomic operations:
42
+ * ------------------
43
+ *  type: atomic_t
44
+ *
45
+ *  void atomic_set(atomic_t* v, long i)      -      v->val=i
46
+ *  long atomic_get(atomic_t* v)              -       return v->val
47
+ *  void atomic_inc(atomic_t* v)
48
+ *  void atomic_dec(atomic_t* v)
49
+ *  long atomic_inc_and_test(atomic_t* v)     - returns 1 if the result is 0
50
+ *  long atomic_dec_and_test(atomic_t* v)     - returns 1 if the result is 0
51
+ *  void atomic_or (atomic_t* v, long mask)   - v->val|=mask 
52
+ *  void atomic_and(atomic_t* v, long mask)   - v->val&=mask
53
+ *  
54
+ */
55
+/* 
56
+ * History:
57
+ * --------
58
+ *  2006-03-08  created by andrei
59
+ */
60
+#ifndef __atomic_ops
61
+#define __atomic_ops
62
+
63
+/* atomic_t defined as a struct to easily catch non atomic ops. on it,
64
+ * e.g.  atomic_t  foo; foo++  will generate a compile error */
65
+typedef struct{ volatile long val; } atomic_t; 
66
+
67
+
68
+/* store and load operations are atomic on all cpus, note however that they
69
+ * don't include memory barriers so if you want to use atomic_{get,set} 
70
+ * to implement mutexes you must explicitely use the barriers */
71
+#define atomic_set(at_var, value)	((at_var)->val=(value))
72
+#define atomic_get(at_var) ((at_var)->val)
73
+
74
+/* init atomic ops */
75
+int atomic_ops_init();
76
+
77
+
78
+
79
+#if defined(__CPU_i386) || defined(__CPU_x86_64)
80
+
81
+#ifdef NOSMP
82
+#define __LOCK_PREF 
83
+#else
84
+#define __LOCK_PREF "lock ;"
85
+#endif
86
+
87
+
88
+
89
+/* memory barriers */
90
+
91
+#ifdef NOSMP
92
+
93
+#define membar()
94
+#define membar_read()
95
+#define membar_write()
96
+
97
+#else
98
+
99
+/* although most x86 do stores in order, we're playing it safe and use
100
+ *  oostore ready write barriers */
101
+#define X86_OOSTORE 
102
+
103
+/* membar, mfence, lfence, sfence available only on newer cpus, so for now
104
+ * stick to lock addl */
105
+#define membar() \
106
+	asm volatile( \
107
+					" lock; addl $0, 0(%%esp) \n\t " \
108
+					: : : "memory" \
109
+				) 
110
+
111
+#define membar_read()	membar()
112
+
113
+#ifdef X86_OOSTORE
114
+/* out of order store version */
115
+#define membar_write()	membar()
116
+#else
117
+/* no oostore, most x86 cpus => do no thing, just a gcc do_not_cache barrier*/
118
+#define membar_write()	asm volatile ("" : : : "memory")
119
+#endif
120
+
121
+
122
+#endif /* NOSMP */
123
+
124
+#define atomic_inc(var) \
125
+	asm volatile( \
126
+			__LOCK_PREF " incl %0 \n\t"  \
127
+			: "=m"((var)->val) : "m"((var)->val) : "cc" \
128
+			) 
129
+
130
+#define atomic_dec(var) \
131
+	asm volatile( \
132
+			__LOCK_PREF " decl %0 \n\t" \
133
+			: "=m"((var)->val) : "m"((var)->val) : "cc" \
134
+			) 
135
+
136
+#define atomic_and(var, i) \
137
+	asm volatile( \
138
+			__LOCK_PREF " andl %1, %0 \n\t" \
139
+			: "=m"((var)->val) : "r"((i)), "m"((var)->val) : "cc" \
140
+			)
141
+#define atomic_or(var, i) \
142
+	asm volatile( \
143
+			__LOCK_PREF " orl %1, %0 \n\t" \
144
+			: "=m"((var)->val) : "r"((i)), "m"((var)->val) : "cc" \
145
+			)
146
+
147
+
148
+/* returns 1 if the result is 0 */
149
+inline static long atomic_inc_and_test(atomic_t* var)
150
+{
151
+	char ret;
152
+	
153
+	asm volatile(
154
+			__LOCK_PREF " incl %0 \n\t"
155
+			"setz  %1 \n\t"
156
+			: "=m"(var->val), "=qm"(ret) : "m" (var->val) : "cc"
157
+			);
158
+	return ret;
159
+}
160
+
161
+
162
+/* returns 1 if the result is 0 */
163
+inline static long atomic_dec_and_test(atomic_t* var)
164
+{
165
+	char ret;
166
+	
167
+	asm volatile(
168
+			__LOCK_PREF " decl %0 \n\t"
169
+			"setz  %1 \n\t"
170
+			: "=m"(var->val), "=qm"(ret) : "m" (var->val) : "cc"
171
+			);
172
+	return ret;
173
+}
174
+
175
+
176
+
177
+#elif defined __CPU_mips2
178
+
179
+#ifdef NOSMP
180
+#define membar()
181
+#define membar_read()  membar()
182
+#define membar_write() membar()
183
+#else
184
+
185
+#define membar() \
186
+	asm volatile( \
187
+			".set noreorder \n\t" \
188
+			"    sync\n\t" \
189
+			".set reorder \n\t" \
190
+			: : : "memory" \
191
+			) 
192
+
193
+#define membar_read()  membar()
194
+#define membar_write() membar()
195
+
196
+#endif /* NOSMP */
197
+
198
+
199
+
200
+/* main asm block */
201
+#define ATOMIC_ASM_OP(op) \
202
+			".set noreorder \n\t" \
203
+			"1:   ll %1, %0 \n\t" \
204
+			"     " op "\n\t" \
205
+			"     sc %2, %0 \n\t" \
206
+			"     beqz %2, 1b \n\t" \
207
+			"     nop \n\t" \
208
+			".set reorder \n\t" 
209
+
210
+
211
+#define ATOMIC_FUNC_DECL(NAME, OP, RET_TYPE, RET_EXPR) \
212
+	inline static RET_TYPE atomic_##NAME (atomic_t *var) \
213
+	{ \
214
+		long ret, tmp; \
215
+		asm volatile( \
216
+			ATOMIC_ASM_OP(OP) \
217
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
218
+			: "m"((var)->val) \
219
+			 \
220
+			); \
221
+		return RET_EXPR; \
222
+	}
223
+
224
+
225
+/* same as above, but with CT in %3 */
226
+#define ATOMIC_FUNC_DECL_CT(NAME, OP, CT, RET_TYPE, RET_EXPR) \
227
+	inline static RET_TYPE atomic_##NAME (atomic_t *var) \
228
+	{ \
229
+		long ret, tmp; \
230
+		asm volatile( \
231
+			ATOMIC_ASM_OP(OP) \
232
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
233
+			: "r"((CT)), "m"((var)->val) \
234
+			 \
235
+			); \
236
+		return RET_EXPR; \
237
+	}
238
+
239
+
240
+/* takes an extra param, i which goes in %3 */
241
+#define ATOMIC_FUNC_DECL1(NAME, OP, RET_TYPE, RET_EXPR) \
242
+	inline static RET_TYPE atomic_##NAME (atomic_t *var, long i) \
243
+	{ \
244
+		long ret, tmp; \
245
+		asm volatile( \
246
+			ATOMIC_ASM_OP(OP) \
247
+			: "=m"((var)->val), "=&r"(ret), "=&r"(tmp)  \
248
+			: "r"((i)), "m"((var)->val) \
249
+			 \
250
+			); \
251
+		return RET_EXPR; \
252
+	}
253
+
254
+
255
+ATOMIC_FUNC_DECL(inc,      "addiu %2, %1, 1", void, /* no return */ )
256
+ATOMIC_FUNC_DECL(inc_and_test, "addiu %2, %1, 1", long, (ret+1)==0 )
257
+
258
+ATOMIC_FUNC_DECL_CT(dec,   "subu %2, %1, %3", 1,  void, /* no return */ )
259
+ATOMIC_FUNC_DECL_CT(dec_and_test, "subu %2, %1, %3", 1, long, (ret-1)==0 )
260
+
261
+ATOMIC_FUNC_DECL1(and, "and %2, %1, %3", void, /* no return */ )
262
+ATOMIC_FUNC_DECL1(or,  "or  %2, %1, %3", void,  /* no return */ )
263
+
264
+#else /* no known cpu */
265
+
266
+#include "locking.h"
267
+
268
+#define ATOMIC_USE_LOCK
269
+
270
+extern gen_lock_t* atomic_lock;
271
+
272
+
273
+
274
+#ifdef NOSMP
275
+#define smp_atomic_lock
276
+#define smp_atomic_unlock
277
+#else
278
+#define smp_atomic_lock    lock_get(atomic_lock)
279
+#define smp_atomic_unlock  lock_release(atomic_lock)
280
+#endif
281
+
282
+/* memory barriers 
283
+ *  not a known cpu -> fall back lock/unlock: safe but costly  (it should 
284
+ *  include a memory barrier effect) */
285
+
286
+#define membar() \
287
+	do{\
288
+		smp_atomic_lock; \
289
+		smp_atomic_unlock; \
290
+	} while(0)
291
+
292
+#define membar_write() membar()
293
+
294
+#define membar_read()  membar()
295
+
296
+
297
+/* atomic ops */
298
+
299
+#define atomic_inc(var) \
300
+	do{ \
301
+		smp_atomic_lock; \
302
+		(var)->val++;\
303
+		smp_atomic_unlock;\
304
+	}while(0)
305
+
306
+
307
+#define atomic_dec(var) \
308
+	do{ \
309
+		smp_atomic_lock; \
310
+		(var)->val--; \
311
+		smp_atomic_unlock; \
312
+	}while(0)
313
+
314
+
315
+#define atomic_and(var, i) \
316
+	do{ \
317
+		smp_atomic_lock; \
318
+		(var)->val&=i; \
319
+		smp_atomic_unlock; \
320
+	}while(0)
321
+
322
+#define atomic_or(var, i) \
323
+	do{ \
324
+		smp_atomic_lock; \
325
+		(var)->val|=i; \
326
+		smp_atomic_unlock; \
327
+	}while(0)
328
+
329
+
330
+
331
+/* returns true if result is 0 */
332
+inline static long atomic_inc_and_test(atomic_t* var)
333
+{
334
+	long ret;
335
+	
336
+	smp_atomic_lock;
337
+	var->val++;
338
+	ret=var->val;
339
+	smp_atomic_unlock;
340
+	
341
+	return (ret==0);
342
+}
343
+
344
+
345
+/* returns true if result is 0 */
346
+inline static long atomic_dec_and_test(atomic_t* var)
347
+{
348
+	long ret;
349
+	
350
+	smp_atomic_lock;
351
+	var->val++;
352
+	ret=var->val;
353
+	smp_atomic_unlock;
354
+	
355
+	return (ret==0);
356
+}
357
+
358
+
359
+#endif /* if __CPU_xx */
360
+
361
+#endif
0 362
new file mode 100644
... ...
@@ -0,0 +1,66 @@
0
+/*
1
+ *
2
+ *  simple atomic ops testing program
3
+ *  (no paralles stuff)
4
+ * 
5
+ *  Compile with: gcc -D__CPU_i386 -O3 on x86 machines and
6
+ *                gcc -mips2 -O2 -D__CPU_mips2  on mips machines.
7
+ *  -- andrei
8
+ *
9
+ *  
10
+ */
11
+
12
+#include <stdio.h>
13
+#include "../atomic_ops.h"
14
+
15
+
16
+
17
+int main(int argc, char** argv)
18
+{
19
+	int r;
20
+	atomic_t v;
21
+#ifdef NOSMP
22
+	printf("no-smp mode\n");
23
+#else
24
+	printf("smp mode\n");
25
+#endif
26
+	
27
+	printf("\nstarting memory barrier opcode tests...\n");
28
+	membar();
29
+	printf(" membar() .............................. ok\n");
30
+	membar_write();
31
+	printf(" membar_write() ........................ ok\n");
32
+	membar_read();
33
+	printf(" membar_read() ......................... ok\n");
34
+	
35
+	printf("\nstarting atomic ops basic tests...\n");
36
+	
37
+	atomic_set(&v, 1);
38
+	printf(" atomic_set, v should be 1 ............. %2ld\n", atomic_get(&v));
39
+	atomic_inc(&v);
40
+	printf(" atomic_inc, v should be 2 ............. %2ld\n", atomic_get(&v));
41
+	r=atomic_inc_and_test(&v);
42
+	printf(" atomic_inc_and_test, v should be  3 ... %2ld\n", atomic_get(&v));
43
+	printf("                      r should be  0 ... %2d\n", r);
44
+	
45
+	atomic_dec(&v);
46
+	printf(" atomic_dec, v should be 2 ............. %2ld\n", atomic_get(&v));
47
+	r=atomic_dec_and_test(&v);
48
+	printf(" atomic_dec_and_test, v should be  1 ... %2ld\n", atomic_get(&v));
49
+	printf("                      r should be  0 ... %2d\n", r);
50
+	r=atomic_dec_and_test(&v);
51
+	printf(" atomic_dec_and_test, v should be  0 ... %2ld\n", atomic_get(&v));
52
+	printf("                      r should be  1 ... %2d\n", r);
53
+	r=atomic_dec_and_test(&v);
54
+	printf(" atomic_dec_and_test, v should be -1 ... %2ld\n", atomic_get(&v));
55
+	printf("                      r should be  0 ... %2d\n", r);
56
+	
57
+	atomic_and(&v, 2);
58
+	printf(" atomic_and, v should be 2 ............. %2ld\n", atomic_get(&v));
59
+	
60
+	atomic_or(&v, 5);
61
+	printf(" atomic_or,  v should be 7 ............. %2ld\n", atomic_get(&v));
62
+	
63
+	printf("\ndone.\n");
64
+	return 0;
65
+}
0 66
new file mode 100644
... ...
@@ -0,0 +1,40 @@
0
+/*
1
+ *
2
+ *  simple locking test program
3
+ *  (no paralles stuff)
4
+ * 
5
+ *  Compile with: gcc -D__CPU_i386 -O3 on x86 machines and
6
+ *                gcc -mips2 -O2 -D__CPU_mips2  on mips machines.
7
+ *  -- andrei
8
+ *
9
+ *  
10
+ */
11
+
12
+#include <stdio.h>
13
+#include "../fastlock.h"
14
+
15
+
16
+
17
+int main(int argc, char** argv)
18
+{
19
+	fl_lock_t lock;
20
+	int r;
21
+	
22
+	lock=0;
23
+	printf("starting locking basic tests...\n");
24
+	
25
+	r=tsl(&lock);
26
+	printf(" tsl should return 0                 ... %d\n", r);
27
+	printf("     lock should be 1 now            ... %d\n", lock);
28
+	r=tsl(&lock);
29
+	printf(" tsl should return 1                 ... %d\n", r);
30
+	printf("     lock should still be 1 now      ... %d\n", lock);
31
+	release_lock(&lock);
32
+	printf(" release_lock: lock should be 0 now  ... %d\n", lock);
33
+	printf("trying tsl once more...\n");
34
+	r=tsl(&lock);
35
+	printf(" tsl should return 0                 ... %d\n", r);
36
+	printf("     lock should be 1 now            ... %d\n", lock);
37
+	printf("\ndone.\n");
38
+	return 0;
39
+}
... ...
@@ -32,7 +32,7 @@ int tsl(fl_lock_t* lock)
32 32
 		"    nop \n\t"
33 33
 		".set reorder\n\t"
34 34
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
35
-		: "0" (tmp), "2" (*lock) 
35
+		: "0" (tmp), "m" (*lock) 
36 36
 		: "cc"
37 37
 	);
38 38
 #elif defined __CPU_i386