Browse code

- experimental ppc locking tweaks (not tested) - ppc64 enabled (the same ppc code should work)

Andrei Pelinescu-Onciul authored on 25/05/2005 10:53:44
Showing 4 changed files
... ...
@@ -173,6 +173,7 @@ tar:
173 173
 		--exclude=$(notdir $(CURDIR))/debian/ser* \
174 174
 		--exclude=$(notdir $(CURDIR))/ser_tls* \
175 175
 		--exclude=CVS* \
176
+		--exclude=.svn* \
176 177
 		--exclude=.cvsignore \
177 178
 		--exclude=*.[do] \
178 179
 		--exclude=*.so \
... ...
@@ -37,6 +37,7 @@
37 37
 #  2004-12-14  gcc-3.4 special case added (andrei)
38 38
 #  2004-12-15  HAVE_ALLOCA_H added (andrei)
39 39
 #  2004-12-19  amd64 transformed in x86_64 (andrei)
40
+#  2005-04-27  alpha support added (andrei)
40 41
 
41 42
 
42 43
 # check if already included/exported
... ...
@@ -53,7 +54,7 @@ MAIN_NAME=ser
53 54
 VERSION = 0
54 55
 PATCHLEVEL = 10
55 56
 SUBLEVEL =   99
56
-EXTRAVERSION = -dev6
57
+EXTRAVERSION = -dev7
57 58
 
58 59
 RELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
59 60
 OS = $(shell uname -s | sed -e s/SunOS/solaris/ | tr "[A-Z]" "[a-z]")
... ...
@@ -411,6 +412,10 @@ ifeq ($(ARCH), ppc)
411 412
 	use_fast_lock=yes
412 413
 endif
413 414
 
415
+ifeq ($(ARCH), ppc64)
416
+	use_fast_lock=yes
417
+endif
418
+
414 419
 ifeq ($(ARCH), mips)
415 420
 # mips1 arch. (e.g. R3000) - no hardware locking support
416 421
 	use_fast_lock=no
... ...
@@ -421,6 +426,10 @@ ifeq ($(ARCH), mips2)
421 426
 	use_fast_lock=yes
422 427
 endif
423 428
 
429
+ifeq ($(ARCH), alpha)
430
+	use_fast_lock=yes
431
+endif
432
+
424 433
 ifeq ($(use_fast_lock), yes)
425 434
 	DEFS+= -DFAST_LOCK -DADAPTIVE_WAIT -DADAPTIVE_WAIT_LOOPS=1024 
426 435
 	found_lock_method=yes
... ...
@@ -703,6 +712,106 @@ $(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
703 712
 endif		#CC_NAME, gcc
704 713
 endif	#ARCH, mips2
705 714
 
715
+
716
+#if  alpha
717
+ifeq	($(ARCH), alpha)
718
+		# if gcc 
719
+ifeq		($(CC_NAME), gcc)
720
+				#common stuff
721
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
722
+			#if gcc 3.4+
723
+ifeq			($(CC_SHORTVER), 3.4)
724
+					CFLAGS+=
725
+else
726
+			#if gcc 3.0
727
+ifeq			($(CC_SHORTVER), 3.0)
728
+					CFLAGS+=
729
+else
730
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
731
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
732
+					for better results)
733
+					CFLAGS+=
734
+else
735
+				#really old version
736
+$(warning			You are using an old and unsupported gcc \
737
+					 version ($(CC_SHORTVER)), compile at your own risk!)
738
+	
739
+endif			# CC_SHORTVER, 2.9x
740
+endif			# CC_SHORTVER, 3.0
741
+endif			# CC_SHORTVER, 3.4
742
+	
743
+else		# CC_NAME, gcc
744
+				#other compilers
745
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
746
+endif		#CC_NAME, gcc
747
+endif	#ARCH, alpha 
748
+
749
+#if  ppc
750
+ifeq	($(ARCH), ppc)
751
+		# if gcc 
752
+ifeq		($(CC_NAME), gcc)
753
+				#common stuff
754
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
755
+			#if gcc 3.4+
756
+ifeq			($(CC_SHORTVER), 3.4)
757
+					CFLAGS+=
758
+else
759
+			#if gcc 3.0
760
+ifeq			($(CC_SHORTVER), 3.0)
761
+					CFLAGS+=
762
+else
763
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
764
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
765
+					for better results)
766
+					CFLAGS+=
767
+else
768
+				#really old version
769
+$(warning			You are using an old and unsupported gcc \
770
+					 version ($(CC_SHORTVER)), compile at your own risk!)
771
+	
772
+endif			# CC_SHORTVER, 2.9x
773
+endif			# CC_SHORTVER, 3.0
774
+endif			# CC_SHORTVER, 3.4
775
+	
776
+else		# CC_NAME, gcc
777
+				#other compilers
778
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
779
+endif		#CC_NAME, gcc
780
+endif	#ARCH, ppc 
781
+
782
+#if  ppc64
783
+ifeq	($(ARCH), ppc64)
784
+		# if gcc 
785
+ifeq		($(CC_NAME), gcc)
786
+				#common stuff
787
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
788
+			#if gcc 3.4+
789
+ifeq			($(CC_SHORTVER), 3.4)
790
+					CFLAGS+=
791
+else
792
+			#if gcc 3.0
793
+ifeq			($(CC_SHORTVER), 3.0)
794
+					CFLAGS+=
795
+else
796
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
797
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
798
+					for better results)
799
+					CFLAGS+=
800
+else
801
+				#really old version
802
+$(warning			You are using an old and unsupported gcc \
803
+					 version ($(CC_SHORTVER)), compile at your own risk!)
804
+	
805
+endif			# CC_SHORTVER, 2.9x
806
+endif			# CC_SHORTVER, 3.0
807
+endif			# CC_SHORTVER, 3.4
808
+	
809
+else		# CC_NAME, gcc
810
+				#other compilers
811
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
812
+endif		#CC_NAME, gcc
813
+endif	#ARCH, ppc 
814
+
706 815
 CFLAGS+= $(CC_EXTRA_OPTS)
707 816
 
708 817
 
... ...
@@ -3,6 +3,14 @@ Release notes for SIP Express Router (ser)
3 3
 
4 4
 $Id$
5 5
 
6
+0.10.99-dev changes
7
+
8
+
9
+new archs:
10
+  - powerpc64 support
11
+  - alpha experimental support
12
+
13
+
6 14
 0.8.99-dev changes
7 15
 
8 16
 
... ...
@@ -38,6 +38,10 @@
38 38
  *  2004-09-12  added MIPS locking for ISA>=2 (>r3000)  (andrei)
39 39
  *  2004-12-16  for now use the same locking code for sparc32 as for sparc64
40 40
  *               (it will work only if NOSMP is defined) (andrei)
41
+ *  2005-04-27  added alpha locking code (andrei)
42
+ *  2005-05-25  PPC locking code enabled for PPC64; added a lwsync to
43
+ *               the tsl part and replaced the sync with a lwsync for the
44
+ *               unlock part (andrei)
41 45
  *
42 46
  */
43 47
 
... ...
@@ -99,13 +103,17 @@ inline static int tsl(fl_lock_t* lock)
99 103
 			: "r"(1), "r" (lock) : "memory"
100 104
 	);
101 105
 	
102
-#elif defined __CPU_ppc
106
+#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
103 107
 	asm volatile(
104 108
 			"1: lwarx  %0, 0, %2\n\t"
105 109
 			"   cmpwi  %0, 0\n\t"
106 110
 			"   bne    0f\n\t"
107 111
 			"   stwcx. %1, 0, %2\n\t"
108 112
 			"   bne-   1b\n\t"
113
+			"   lwsync\n\t" /* lwsync or isync, lwsync is faster
114
+							   and should work, see
115
+							   [ IBM Programming environments Manual, D.4.1.1]
116
+							 */
109 117
 			"0:\n\t"
110 118
 			: "=r" (val)
111 119
 			: "r"(1), "b" (lock) :
... ...
@@ -127,6 +135,25 @@ inline static int tsl(fl_lock_t* lock)
127 135
 		: "0" (tmp), "2" (*lock) 
128 136
 		: "cc"
129 137
 	);
138
+#elif defined __CPU_alpha
139
+	long tmp;
140
+	tmp=0;
141
+	/* lock low bit set to 1 when the lock is hold and to 0 otherwise */
142
+	asm volatile(
143
+		"1:  ldl %0, %1   \n\t"
144
+		"    blbs %0, 2f  \n\t"  /* optimization if locked */
145
+		"    ldl_l %0, %1 \n\t"
146
+		"    blbs %0, 2f  \n\t" 
147
+		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
148
+		"    stl_c %2, %1 \n\t"
149
+		"    beq %2, 1b   \n\t"
150
+		"    mb           \n\t"
151
+		"2:               \n\t"
152
+		:"=&r" (val), "=m"(*lock), "=r"(tmp)
153
+		:"1"(*lock)  /* warning on gcc 3.4: replace it with m or remove
154
+						it and use +m in the input line ? */
155
+		: "memory"
156
+	);
130 157
 #else
131 158
 #error "unknown architecture"
132 159
 #endif
... ...
@@ -180,23 +207,33 @@ inline static void release_lock(fl_lock_t* lock)
180 207
 		: "r"(0), "r"(lock)
181 208
 		: "memory"
182 209
 	);
183
-#elif defined __CPU_ppc
210
+#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
184 211
 	asm volatile(
185
-			"sync\n\t"
212
+			/* "sync\n\t"  lwsync is faster and will work
213
+			 *             here too
214
+			 *             [IBM Prgramming Environments Manual, D.4.2.2]
215
+			 */
216
+			"lwsync\n\t"
186 217
 			"stw %0, 0(%1)\n\t"
187 218
 			: /* no output */
188 219
 			: "r"(0), "b" (lock)
189 220
 			: "memory"
190
-        );
221
+	);
191 222
 	*lock = 0;
192 223
 #elif defined __CPU_mips2
193
-		asm volatile(
224
+	asm volatile(
194 225
 		".set noreorder \n\t"
195 226
 		"    sync \n\t"
196 227
 		"    sw $0, %0 \n\t"
197 228
 		".set reorder \n\t"
198 229
 		: /*no output*/  : "m" (*lock) : "memory"
199 230
 	);
231
+#elif defined __CPU_alpha
232
+	asm volatile(
233
+		"    mb          \n\t"
234
+		"    stl $31, %0 \n\t"
235
+		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
236
+	);  
200 237
 #else
201 238
 #error "unknown architecture"
202 239
 #endif