Browse code

- lock optimizations: use the lock specific membar only if the lock_get operation succeeded (this means don't use it while spinning) => move the membar call in get_lock or try_lock

- added try_lock: like get_lock but doesn't block (returns -1 on failure
and 0 on success)

Andrei Pelinescu-Onciul authored on 03/04/2006 14:30:51
Showing 2 changed files
... ...
@@ -44,6 +44,10 @@
44 44
  *               unlock part (andrei)
45 45
  *  2006-03-08  mips2 NOSMP (skip sync), optimized x86 & mips clobbers and
46 46
  *               input/output constraints (andrei)
47
+ *  2006-04-03  optimization: call lock_get memory barrier outside tsl,in the 
48
+ *               calling function, only if the lock operation succeeded
49
+ *               (membar_getlock()) (andrei)
50
+ *              added try_lock()  (andrei)
47 51
  *
48 52
  */
49 53
 
... ...
@@ -67,8 +71,58 @@ typedef  volatile int fl_lock_t;
67 71
 #define init_lock( l ) (l)=0
68 72
 
69 73
 
74
+/* what membar to use (if any) after taking a lock. This
75
+ *  was separated from the lock code to allow better optimizations.
76
+ *  e.g.: use the membar_getlock only after getting the lock and don't use 
77
+ *  it if lock_get fails / when spinning on tsl.
78
+ *  There is no corresponding membar_release_lock (because lock_release
79
+ *  must always include the needed memory barrier).
80
+ *  WARNING: this is intended only for internal fastlock use*/
81
+#if defined(__CPU_i386) || defined(__CPU_x86_64)
82
+#define membar_getlock()   /* not needed on x86 */
83
+#elif defined(__CPU_sparc64) || defined(__CPU_sparc)
84
+#ifndef NOSMP
85
+#define membar_getlock() \
86
+	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
87
+#else
88
+/* no need for a compiler barrier, that is already included in lock_get/tsl*/
89
+#define membar_getlock() /* not needed if no smp*/
90
+#endif /* NOSMP */
91
+#elif defined __CPU_arm || defined __CPU_arm6
92
+#error "FIXME: check arm6 membar"
93
+#define membar_getlock() 
94
+#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
95
+#ifndef NOSMP
96
+#define membar_getlock() \
97
+	asm volatile("lwsync \n\t" : : : "memory");
98
+#else
99
+#define membar_getlock() 
100
+#endif /* NOSMP */
101
+#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
102
+	|| defined __CPU_mips64
103
+#ifndef NOSMP
104
+#define membar_getlock() \
105
+	asm volatile("sync \n\t" : : : "memory");
106
+#else
107
+#define membar_getlock() 
108
+#endif /* NOSMP */
109
+#elif defined __CPU_alpha
110
+#ifndef NOSMP
111
+#define membar_getlock() \
112
+	asm volatile("mb \n\t" : : : "memory");
113
+#else
114
+#define membar_getlock() 
115
+#endif /* NOSMP */
116
+#else
117
+#error "unknown architecture"
118
+#endif
119
+
120
+
70 121
 
71
-/*test and set lock, ret 1 if lock held by someone else, 0 otherwise*/
122
+/*test and set lock, ret 1 if lock held by someone else, 0 otherwise
123
+ * WARNING: no memory barriers included, if you use this function directly
124
+ *          (not recommended) and it gets the lock (ret==0), you should call 
125
+ *          membar_getlock() after it */
72 126
 inline static int tsl(fl_lock_t* lock)
73 127
 {
74 128
 	int val;
... ...
@@ -91,9 +145,7 @@ inline static int tsl(fl_lock_t* lock)
91 145
 #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
92 146
 	asm volatile(
93 147
 			"ldstub [%1], %0 \n\t"
94
-#ifndef NOSMP
95
-			"membar #StoreStore | #StoreLoad \n\t"
96
-#endif
148
+			/* membar_getlock must be  called outside this function */
97 149
 			: "=r"(val) : "r"(lock):"memory"
98 150
 	);
99 151
 	
... ...
@@ -112,10 +164,7 @@ inline static int tsl(fl_lock_t* lock)
112 164
 			"   bne    0f\n\t"
113 165
 			"   stwcx. %1, 0, %2\n\t"
114 166
 			"   bne-   1b\n\t"
115
-			"   lwsync\n\t" /* lwsync or isync, lwsync is faster
116
-							   and should work, see
117
-							   [ IBM Programming environments Manual, D.4.1.1]
118
-							 */
167
+			/* membar_getlock must be  called outside this function */
119 168
 			"0:\n\t"
120 169
 			: "=r" (val)
121 170
 			: "r"(1), "b" (lock) :
... ...
@@ -134,9 +183,7 @@ inline static int tsl(fl_lock_t* lock)
134 183
 		"    sc %0, %2  \n\t"
135 184
 		"    beqz %0, 1b \n\t"
136 185
 		"    nop \n\t"
137
-#ifndef NOSMP
138
-		"    sync \n\t"
139
-#endif
186
+		/* membar_getlock must be called outside this function */
140 187
 		".set pop\n\t"
141 188
 		: "=&r" (tmp), "=&r" (val), "=m" (*lock) 
142 189
 		: "m" (*lock) 
... ...
@@ -154,7 +201,7 @@ inline static int tsl(fl_lock_t* lock)
154 201
 		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
155 202
 		"    stl_c %2, %1 \n\t"
156 203
 		"    beq %2, 1b   \n\t"
157
-		"    mb           \n\t"
204
+		/* membar_getlock must be called outside this function */
158 205
 		"2:               \n\t"
159 206
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
160 207
 		:"m"(*lock) 
... ...
@@ -183,6 +230,20 @@ inline static void get_lock(fl_lock_t* lock)
183 230
 		sched_yield();
184 231
 #endif
185 232
 	}
233
+	membar_getlock();
234
+}
235
+
236
+
237
+
238
+/* like get_lock, but it doesn't wait. If it gets the lock returns 0,
239
+ *  <0  otherwise (-1) */
240
+inline static int try_lock(fl_lock_t* lock)
241
+{
242
+	if (tsl(lock)){
243
+		return -1;
244
+	}
245
+	membar_getlock();
246
+	return 0;
186 247
 }
187 248
 
188 249
 
... ...
@@ -1,4 +1,5 @@
1 1
 /*
2
+ * $Id$
2 3
  *
3 4
  *  simple locking test program
4 5
  *  (no paralles stuff)
... ...
@@ -23,18 +24,21 @@ int main(int argc, char** argv)
23 24
 	lock=0;
24 25
 	printf("starting locking basic tests...\n");
25 26
 	
26
-	r=tsl(&lock);
27
-	printf(" tsl should return 0                 ... %d\n", r);
27
+	r=try_lock(&lock);
28
+	printf(" try_lock should return 0            ... %d\n", r);
28 29
 	printf("     lock should be 1 now            ... %d\n", lock);
29
-	r=tsl(&lock);
30
-	printf(" tsl should return 1                 ... %d\n", r);
30
+	r=try_lock(&lock);
31
+	printf(" tsl should return -1                ... %d\n", r);
31 32
 	printf("     lock should still be 1 now      ... %d\n", lock);
32 33
 	release_lock(&lock);
33 34
 	printf(" release_lock: lock should be 0 now  ... %d\n", lock);
34
-	printf("trying tsl once more...\n");
35
-	r=tsl(&lock);
36
-	printf(" tsl should return 0                 ... %d\n", r);
35
+	printf("try_lock once more...\n");
36
+	r=try_lock(&lock);
37
+	printf(" try_lock should return 0            ... %d\n", r);
37 38
 	printf("     lock should be 1 now            ... %d\n", lock);
39
+	release_lock(&lock);
40
+	get_lock(&lock);
41
+	printf(" get_lock, lock should be 1 now      ... %d\n", lock);
38 42
 	printf("\ndone.\n");
39 43
 	return 0;
40 44
 }