Browse code

- experimental ppc locking tweaks (not tested) - ppc64 enabled (the same ppc code should work)

Andrei Pelinescu-Onciul authored on 25/05/2005 10:53:44
Showing 4 changed files
... ...
@@ -173,6 +173,7 @@ tar:
173 173
 		--exclude=$(notdir $(CURDIR))/debian/ser* \
174 174
 		--exclude=$(notdir $(CURDIR))/ser_tls* \
175 175
 		--exclude=CVS* \
176
+		--exclude=.svn* \
176 177
 		--exclude=.cvsignore \
177 178
 		--exclude=*.[do] \
178 179
 		--exclude=*.so \
... ...
@@ -37,6 +37,7 @@
37 37
 #  2004-12-14  gcc-3.4 special case added (andrei)
38 38
 #  2004-12-15  HAVE_ALLOCA_H added (andrei)
39 39
 #  2004-12-19  amd64 transformed in x86_64 (andrei)
40
+#  2005-04-27  alpha support added (andrei)
40 41
 
41 42
 
42 43
 # check if already included/exported
... ...
@@ -53,7 +54,7 @@ MAIN_NAME=ser
53 53
 VERSION = 0
54 54
 PATCHLEVEL = 10
55 55
 SUBLEVEL =   99
56
-EXTRAVERSION = -dev6
56
+EXTRAVERSION = -dev7
57 57
 
58 58
 RELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
59 59
 OS = $(shell uname -s | sed -e s/SunOS/solaris/ | tr "[A-Z]" "[a-z]")
... ...
@@ -411,6 +412,10 @@ ifeq ($(ARCH), ppc)
411 411
 	use_fast_lock=yes
412 412
 endif
413 413
 
414
+ifeq ($(ARCH), ppc64)
415
+	use_fast_lock=yes
416
+endif
417
+
414 418
 ifeq ($(ARCH), mips)
415 419
 # mips1 arch. (e.g. R3000) - no hardware locking support
416 420
 	use_fast_lock=no
... ...
@@ -421,6 +426,10 @@ ifeq ($(ARCH), mips2)
421 421
 	use_fast_lock=yes
422 422
 endif
423 423
 
424
+ifeq ($(ARCH), alpha)
425
+	use_fast_lock=yes
426
+endif
427
+
424 428
 ifeq ($(use_fast_lock), yes)
425 429
 	DEFS+= -DFAST_LOCK -DADAPTIVE_WAIT -DADAPTIVE_WAIT_LOOPS=1024 
426 430
 	found_lock_method=yes
... ...
@@ -703,6 +712,106 @@ $(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
703 703
 endif		#CC_NAME, gcc
704 704
 endif	#ARCH, mips2
705 705
 
706
+
707
+#if  alpha
708
+ifeq	($(ARCH), alpha)
709
+		# if gcc 
710
+ifeq		($(CC_NAME), gcc)
711
+				#common stuff
712
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
713
+			#if gcc 3.4+
714
+ifeq			($(CC_SHORTVER), 3.4)
715
+					CFLAGS+=
716
+else
717
+			#if gcc 3.0
718
+ifeq			($(CC_SHORTVER), 3.0)
719
+					CFLAGS+=
720
+else
721
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
722
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
723
+					for better results)
724
+					CFLAGS+=
725
+else
726
+				#really old version
727
+$(warning			You are using an old and unsupported gcc \
728
+					 version ($(CC_SHORTVER)), compile at your own risk!)
729
+	
730
+endif			# CC_SHORTVER, 2.9x
731
+endif			# CC_SHORTVER, 3.0
732
+endif			# CC_SHORTVER, 3.4
733
+	
734
+else		# CC_NAME, gcc
735
+				#other compilers
736
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
737
+endif		#CC_NAME, gcc
738
+endif	#ARCH, alpha 
739
+
740
+#if  ppc
741
+ifeq	($(ARCH), ppc)
742
+		# if gcc 
743
+ifeq		($(CC_NAME), gcc)
744
+				#common stuff
745
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
746
+			#if gcc 3.4+
747
+ifeq			($(CC_SHORTVER), 3.4)
748
+					CFLAGS+=
749
+else
750
+			#if gcc 3.0
751
+ifeq			($(CC_SHORTVER), 3.0)
752
+					CFLAGS+=
753
+else
754
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
755
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
756
+					for better results)
757
+					CFLAGS+=
758
+else
759
+				#really old version
760
+$(warning			You are using an old and unsupported gcc \
761
+					 version ($(CC_SHORTVER)), compile at your own risk!)
762
+	
763
+endif			# CC_SHORTVER, 2.9x
764
+endif			# CC_SHORTVER, 3.0
765
+endif			# CC_SHORTVER, 3.4
766
+	
767
+else		# CC_NAME, gcc
768
+				#other compilers
769
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
770
+endif		#CC_NAME, gcc
771
+endif	#ARCH, ppc 
772
+
773
+#if  ppc64
774
+ifeq	($(ARCH), ppc64)
775
+		# if gcc 
776
+ifeq		($(CC_NAME), gcc)
777
+				#common stuff
778
+				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
779
+			#if gcc 3.4+
780
+ifeq			($(CC_SHORTVER), 3.4)
781
+					CFLAGS+=
782
+else
783
+			#if gcc 3.0
784
+ifeq			($(CC_SHORTVER), 3.0)
785
+					CFLAGS+=
786
+else
787
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
788
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
789
+					for better results)
790
+					CFLAGS+=
791
+else
792
+				#really old version
793
+$(warning			You are using an old and unsupported gcc \
794
+					 version ($(CC_SHORTVER)), compile at your own risk!)
795
+	
796
+endif			# CC_SHORTVER, 2.9x
797
+endif			# CC_SHORTVER, 3.0
798
+endif			# CC_SHORTVER, 3.4
799
+	
800
+else		# CC_NAME, gcc
801
+				#other compilers
802
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
803
+endif		#CC_NAME, gcc
804
+endif	#ARCH, ppc 
805
+
706 806
 CFLAGS+= $(CC_EXTRA_OPTS)
707 807
 
708 808
 
... ...
@@ -3,6 +3,14 @@ Release notes for SIP Express Router (ser)
3 3
 
4 4
 $Id$
5 5
 
6
+0.10.99-dev changes
7
+
8
+
9
+new archs:
10
+  - powerpc64 support
11
+  - alpha experimental support
12
+
13
+
6 14
 0.8.99-dev changes
7 15
 
8 16
 
... ...
@@ -38,6 +38,10 @@
38 38
  *  2004-09-12  added MIPS locking for ISA>=2 (>r3000)  (andrei)
39 39
  *  2004-12-16  for now use the same locking code for sparc32 as for sparc64
40 40
  *               (it will work only if NOSMP is defined) (andrei)
41
+ *  2005-04-27  added alpha locking code (andrei)
42
+ *  2005-05-25  PPC locking code enabled for PPC64; added a lwsync to
43
+ *               the tsl part and replaced the sync with a lwsync for the
44
+ *               unlock part (andrei)
41 45
  *
42 46
  */
43 47
 
... ...
@@ -99,13 +103,17 @@ inline static int tsl(fl_lock_t* lock)
99 99
 			: "r"(1), "r" (lock) : "memory"
100 100
 	);
101 101
 	
102
-#elif defined __CPU_ppc
102
+#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
103 103
 	asm volatile(
104 104
 			"1: lwarx  %0, 0, %2\n\t"
105 105
 			"   cmpwi  %0, 0\n\t"
106 106
 			"   bne    0f\n\t"
107 107
 			"   stwcx. %1, 0, %2\n\t"
108 108
 			"   bne-   1b\n\t"
109
+			"   lwsync\n\t" /* lwsync or isync, lwsync is faster
110
+							   and should work, see
111
+							   [ IBM Programming environments Manual, D.4.1.1]
112
+							 */
109 113
 			"0:\n\t"
110 114
 			: "=r" (val)
111 115
 			: "r"(1), "b" (lock) :
... ...
@@ -127,6 +135,25 @@ inline static int tsl(fl_lock_t* lock)
127 127
 		: "0" (tmp), "2" (*lock) 
128 128
 		: "cc"
129 129
 	);
130
+#elif defined __CPU_alpha
131
+	long tmp;
132
+	tmp=0;
133
+	/* lock low bit set to 1 when the lock is hold and to 0 otherwise */
134
+	asm volatile(
135
+		"1:  ldl %0, %1   \n\t"
136
+		"    blbs %0, 2f  \n\t"  /* optimization if locked */
137
+		"    ldl_l %0, %1 \n\t"
138
+		"    blbs %0, 2f  \n\t" 
139
+		"    lda %2, 1    \n\t"  /* or: or $31, 1, %2 ??? */
140
+		"    stl_c %2, %1 \n\t"
141
+		"    beq %2, 1b   \n\t"
142
+		"    mb           \n\t"
143
+		"2:               \n\t"
144
+		:"=&r" (val), "=m"(*lock), "=r"(tmp)
145
+		:"1"(*lock)  /* warning on gcc 3.4: replace it with m or remove
146
+						it and use +m in the input line ? */
147
+		: "memory"
148
+	);
130 149
 #else
131 150
 #error "unknown architecture"
132 151
 #endif
... ...
@@ -180,23 +207,33 @@ inline static void release_lock(fl_lock_t* lock)
180 180
 		: "r"(0), "r"(lock)
181 181
 		: "memory"
182 182
 	);
183
-#elif defined __CPU_ppc
183
+#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
184 184
 	asm volatile(
185
-			"sync\n\t"
185
+			/* "sync\n\t"  lwsync is faster and will work
186
+			 *             here too
187
+			 *             [IBM Prgramming Environments Manual, D.4.2.2]
188
+			 */
189
+			"lwsync\n\t"
186 190
 			"stw %0, 0(%1)\n\t"
187 191
 			: /* no output */
188 192
 			: "r"(0), "b" (lock)
189 193
 			: "memory"
190
-        );
194
+	);
191 195
 	*lock = 0;
192 196
 #elif defined __CPU_mips2
193
-		asm volatile(
197
+	asm volatile(
194 198
 		".set noreorder \n\t"
195 199
 		"    sync \n\t"
196 200
 		"    sw $0, %0 \n\t"
197 201
 		".set reorder \n\t"
198 202
 		: /*no output*/  : "m" (*lock) : "memory"
199 203
 	);
204
+#elif defined __CPU_alpha
205
+	asm volatile(
206
+		"    mb          \n\t"
207
+		"    stl $31, %0 \n\t"
208
+		: "=m"(*lock) :/* no input*/ : "memory"  /* because of the mb */
209
+	);  
200 210
 #else
201 211
 #error "unknown architecture"
202 212
 #endif