Browse code

- added functions to get the index of the first or last bit set in a 32 bit or 64 bit int: bit_scan_forward32(), bit_scan_forward64(), bit_scan_reverse32(), bit_scan_reverse64(), bit_scan_forward(long) and bit_scan_reverse(long). All of them are very fast, they use asm if available (for now only for __CPU_x86 and __CPU_x86_64), and fall back to a de Bruijn based method or binary search (depending on which method was faster in my measurements on a particular cpu). - added test/profile.h - simple measure the cpu cycles between two calls functions (for now support for x86, x86_64 and sparc64)

Andrei Pelinescu-Onciul authored on 25/06/2007 17:20:34
Showing 4 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2007 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/*
18
+ *  bit scan operations, see bit_scan.h.
19
+ */
20
+/* 
21
+ * History:
22
+ * --------
23
+ *  2007-06-23  created by andrei
24
+ */
25
+
26
+#include "bit_scan.h"
27
+
28
+int _debruijn_hash32[32]={0, 1, 2, 6, 3, 11, 7, 16, 4, 14, 12, 21, 8,
29
+	23, 17, 26, 31, 5, 10, 15, 13, 20, 22, 25, 30, 9, 19, 24, 29, 18, 28, 27 };
30
+
31
+int _debruijn_hash64[64]={0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9,
32
+	34, 20, 40, 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
33
+	63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, 62, 11, 23,
34
+	32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58 };
35
+
0 36
new file mode 100644
... ...
@@ -0,0 +1,411 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2007 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/*
18
+ *  bit scan operations
19
+ *  int bit_scan_forward(unsigned long v)   - returns the index of the first
20
+ *                                          set bit (undefined value if v==0)
21
+ *  int bit_scan_forward32(unsigned int v)   - returns the index of the first
22
+ *                                          set bit (undefined value if v==0)
23
+ *  int bit_scan_forward64(long long v)      - returns the index of the first
24
+ *                                          set bit (undefined value if v==0)
25
+ *  int bit_scan_reverse(unsigned long v)   - returns the index of the last
26
+ *                                          set bit (undefined value if v==0)
27
+ *  int bit_scan_reverse32(unsigned int v)  - returns the index of the last
28
+ *                                          set bit (undefined value if v==0)
29
+ *  int bit_scan_reverse64(long long v)     - returns the index of the last
30
+ *                                          set bit (undefined value if v==0)
31
+ *
32
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
33
+ *                     inline asm,
34
+ *                  __CPU_x86, __CPU_x86_64,
35
+ *                  ULONG_MAX (limits.h)
36
+ */
37
+/* 
38
+ * History:
39
+ * --------
40
+ *  2007-06-23  created by andrei
41
+ */
42
+
43
+#ifndef _bit_scan_h
44
+#define _bit_scan_h
45
+
46
+#include <limits.h>
47
+
48
+
49
+#ifdef CC_GCC_LIKE_ASM
50
+#if defined __CPU_x86 || defined __CPU_x86_64
51
+#define BIT_SCAN_ASM
52
+#endif
53
+#endif
54
+
55
+
56
+/* set default bitscan versions, depending on the architecture
57
+ * In general the order is  asm, debruijn, br, slow for bit_scan_forward
58
+ *  and asm, br, slow, debruijn for bit_scan_reverse. */
59
+#ifdef BIT_SCAN_ASM
60
+/* have asm => use it */
61
+
62
+#define bit_scan_forward32(i)	bit_scan_forward_asm32(i)
63
+#define bit_scan_forward64(i)	bit_scan_forward_asm64(i)
64
+#define bit_scan_reverse32(i)	bit_scan_reverse_asm32(i)
65
+#define bit_scan_reverse64(i)	bit_scan_reverse_asm64(i)
66
+
67
+#elif defined __CPU_x86 || defined __CPU_x86_64
68
+/* no asm (e.g. no CC_GCC_LIKE_ASM) => debruijn for bit_scan_forward and
69
+ *  br for bit_scan_reverse */
70
+/* make sure debruijn an branch version are enabled */
71
+#ifndef BIT_SCAN_DEBRUIJN
72
+#define BIT_SCAN_DEBRUIJN
73
+#endif
74
+#ifndef BIT_SCAN_BRANCH
75
+#define BIT_SCAN_BRANCH
76
+#endif
77
+
78
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
79
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
80
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
81
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
82
+
83
+#elif defined __CPU_sparc64
84
+/* no asm yet => use branch for everything in 64 bit mode
85
+ *               and debruijn + branch in 32 bit mode
86
+ *  (in 64bit mode the branch method is slightly faster then debruijn,
87
+ *   however note that in 32 bit mode the roles are reversed for _forward)*/
88
+#ifndef BIT_SCAN_BRANCH
89
+#define BIT_SCAN_BRANCH
90
+#endif
91
+
92
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
93
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
94
+#ifdef LP64
95
+#define bit_scan_forward32(i)	bit_scan_forward_br32(i)
96
+#define bit_scan_forward64(i)	bit_scan_forward_br64(i)
97
+#else /* LP64 */
98
+
99
+#ifndef BIT_SCAN_DEBRUIJN
100
+#define BIT_SCAN_DEBRUIJN
101
+#endif
102
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
103
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
104
+#endif /* LP64 */
105
+
106
+#else /* __CPU_XXX */
107
+/* default - like x86 no asm */
108
+/* make sure debruijn an branch version are enabled */
109
+#ifndef BIT_SCAN_DEBRUIJN
110
+#define BIT_SCAN_DEBRUIJN
111
+#endif
112
+#ifndef BIT_SCAN_BRANCH
113
+#define BIT_SCAN_BRANCH
114
+#endif
115
+
116
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
117
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
118
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
119
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
120
+
121
+#endif /* __CPU_XXX */
122
+
123
+
124
+/* try to use the right version for bit_scan_forward(unisgned long l)
125
+ */
126
+#if (defined (ULONG_MAX) && ULONG_MAX > 4294967295) || defined LP64
127
+/* long is 64 bits */
128
+#define bit_scan_forward(l)	bit_scan_forward64((unsigned long long)(l))
129
+#define bit_scan_reverse(l)	bit_scan_reverse64((unsigned long long)(l))
130
+
131
+#else
132
+/* long is 32 bits */
133
+#define bit_scan_forward(l)	bit_scan_forward32((l))
134
+#define bit_scan_reverse(l)	bit_scan_reverse32((l))
135
+#endif
136
+
137
+
138
+
139
+
140
+#ifdef BIT_SCAN_DEBRUIJN
141
+
142
+/* use a de Bruijn sequence to get the index of the set bit for a number
143
+ *  of the form 2^k (DEBRUIJN_HASH32() and DEBRUIJN_HASH64()).
144
+ *  bit_scan_forward & bit_scan_reverse would need first to convert
145
+ *  the argument to 2^k (where k is the first set bit or last set bit index)-
146
+ *  For bit_scan_forward this can be done very fast using x & (-x).
147
+ *  For more info about this method see:
148
+ *  http://citeseer.ist.psu.edu/leiserson98using.html
149
+ *  ("Using de Bruijn Sequences to Index a 1 in a Computer Word")
150
+ */
151
+
152
+extern int _debruijn_hash32[32]; /* see bit_scan.c */
153
+extern int _debruijn_hash64[64]; /* see bit_scan.c */
154
+
155
+#define DEBRUIJN_CT32  0x04653ADFU
156
+#define DEBRUIJN_CT64  0x0218A392CD3D5DBFULL 
157
+
158
+#define DEBRUIJN_HASH32(x)\
159
+	(((x)*DEBRUIJN_CT32)>>(sizeof(x)*8-5))
160
+
161
+#define DEBRUIJN_HASH64(x)\
162
+	(((x)*DEBRUIJN_CT64)>>(sizeof(x)*8-6))
163
+
164
+#define bit_scan_forward_debruijn32(x) \
165
+	( _debruijn_hash32[DEBRUIJN_HASH32((x) & (-(x)))])
166
+
167
+#define bit_scan_forward_debruijn64(x) \
168
+	( _debruijn_hash64[DEBRUIJN_HASH64((x) & (-(x)))])
169
+
170
+
171
+static inline int bit_scan_reverse_debruijn32(unsigned int v)
172
+{
173
+	unsigned int last;
174
+	
175
+	do{
176
+		last=v;
177
+		v=v&(v-1);
178
+	}while(v); /* => last is 2^k */
179
+	return _debruijn_hash32[DEBRUIJN_HASH32(last)];
180
+}
181
+
182
+
183
+static inline int bit_scan_reverse_debruijn64(unsigned long long v)
184
+{
185
+	unsigned long long last;
186
+	
187
+	do{
188
+		last=v;
189
+		v=v&(v-1);
190
+	}while(v); /* => last is 2^k */
191
+	return _debruijn_hash64[DEBRUIJN_HASH64(last)];
192
+}
193
+
194
+
195
+#endif /* BIT_SCAN_DEBRUIJN */
196
+
197
+#ifdef BIT_SCAN_SLOW
198
+/* only for reference purposes (testing the other versions against it) */
199
+
200
+static inline int bit_scan_forward_slow32(unsigned int v)
201
+{
202
+	int r;
203
+	for(r=0; r<(sizeof(v)*8); r++, v>>=1)
204
+		if (v&1) return r;
205
+	return 0;
206
+}
207
+
208
+
209
+static inline int bit_scan_reverse_slow32(unsigned int v)
210
+{
211
+	int r;
212
+	for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
213
+		if (v& (1UL<<(sizeof(v)*8-1))) return r;
214
+	return 0;
215
+}
216
+
217
+
218
+static inline int bit_scan_forward_slow64(unsigned long long v)
219
+{
220
+	int r;
221
+	for(r=0; r<(sizeof(v)*8); r++, v>>=1)
222
+		if (v&1ULL) return r;
223
+	return 0;
224
+}
225
+
226
+
227
+static inline int bit_scan_reverse_slow64(unsigned long long v)
228
+{
229
+	int r;
230
+	for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
231
+		if (v& (1ULL<<(sizeof(v)*8-1))) return r;
232
+	return 0;
233
+}
234
+
235
+
236
+#endif /* BIT_SCAN_SLOW */
237
+
238
+
239
+#ifdef BIT_SCAN_BRANCH
240
+
241
+static inline int bit_scan_forward_br32(unsigned int v)
242
+{
243
+	int b;
244
+	
245
+	b=0;
246
+	if (v&0x01)
247
+		return 0;
248
+	if (!(v & 0xffff)){
249
+		b+=16;
250
+		v>>=16;
251
+	}
252
+	if (!(v&0xff)){
253
+		b+=8;
254
+		v>>=8;
255
+	}
256
+	if (!(v&0x0f)){
257
+		b+=4;
258
+		v>>=4;
259
+	}
260
+	if (!(v&0x03)){
261
+		b+=2;
262
+		v>>=2;
263
+	}
264
+	b+= !(v&0x01);
265
+	return b;
266
+}
267
+
268
+
269
+static inline int bit_scan_reverse_br32(unsigned int v)
270
+{
271
+	int b;
272
+	
273
+	b=0;
274
+	if (v & 0xffff0000){
275
+		b+=16;
276
+		v>>=16;
277
+	}
278
+	if (v&0xff00){
279
+		b+=8;
280
+		v>>=8;
281
+	}
282
+	if (v&0xf0){
283
+		b+=4;
284
+		v>>=4;
285
+	}
286
+	if (v&0x0c){
287
+		b+=2;
288
+		v>>=2;
289
+	}
290
+	b+= !!(v&0x02);
291
+	return b;
292
+}
293
+
294
+
295
+static inline int bit_scan_forward_br64(unsigned long long v)
296
+{
297
+	int b;
298
+	
299
+	b=0;
300
+	if (v&0x01ULL)
301
+		return 0;
302
+	if (!(v & 0xffffffffULL)){
303
+		b+=32;
304
+		v>>=32;
305
+	}
306
+	if (!(v & 0xffffULL)){
307
+		b+=16;
308
+		v>>=16;
309
+	}
310
+	if (!(v&0xffULL)){
311
+		b+=8;
312
+		v>>=8;
313
+	}
314
+	if (!(v&0x0fULL)){
315
+		b+=4;
316
+		v>>=4;
317
+	}
318
+	if (!(v&0x03ULL)){
319
+		b+=2;
320
+		v>>=2;
321
+	}
322
+	b+= !(v&0x01ULL);
323
+	return b;
324
+}
325
+
326
+
327
+static inline int bit_scan_reverse_br64(unsigned long long v)
328
+{
329
+	int b;
330
+	
331
+	b=0;
332
+	if (v & 0xffffffff00000000ULL){
333
+		b+=32;
334
+		v>>=32;
335
+	}
336
+	if (v & 0xffff0000ULL){
337
+		b+=16;
338
+		v>>=16;
339
+	}
340
+	if (v&0xff00ULL){
341
+		b+=8;
342
+		v>>=8;
343
+	}
344
+	if (v&0xf0ULL){
345
+		b+=4;
346
+		v>>=4;
347
+	}
348
+	if (v&0x0cULL){
349
+		b+=2;
350
+		v>>=2;
351
+	}
352
+	b+= !!(v&0x02ULL);
353
+	return b;
354
+}
355
+#endif  /* BIT_SCAN_BRANCH */
356
+
357
+
358
+
359
+#ifdef BIT_SCAN_ASM
360
+#if defined __CPU_x86 || defined __CPU_x86_64
361
+#define HAS_BIT_SCAN_ASM
362
+
363
+static inline int bit_scan_forward_asm32(unsigned int v)
364
+{
365
+	int r;
366
+	asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
367
+	return r;
368
+}
369
+
370
+static inline int bit_scan_reverse_asm32(unsigned int v)
371
+{
372
+	int r;
373
+	asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
374
+	return r;
375
+}
376
+
377
+#ifdef __CPU_x86_64
378
+static inline int bit_scan_forward_asm64(unsigned long long v)
379
+{
380
+	long r;
381
+	asm volatile(" bsfq %1, %0": "=r"(r): "rm"(v) );
382
+	return r;
383
+}
384
+
385
+static inline int bit_scan_reverse_asm64(unsigned long long v)
386
+{
387
+	long r;
388
+	asm volatile(" bsrq %1, %0": "=r"(r): "rm"(v) );
389
+	return r;
390
+}
391
+#else
392
+static inline int bit_scan_forward_asm64(unsigned long long v)
393
+{
394
+	if ((unsigned int)v)
395
+		return bit_scan_forward_asm32((unsigned int)v);
396
+	return 32+bit_scan_forward_asm32(*(((unsigned int*)(void*)&v)+1));
397
+}
398
+
399
+static inline int bit_scan_reverse_asm64(unsigned long long v)
400
+{
401
+	if (v & 0xffffffff00000000ULL)
402
+		return 32+bit_scan_reverse_asm32(*(((unsigned int*)(void*)&v)+1));
403
+	return bit_scan_reverse_asm32((unsigned int)v);
404
+}
405
+#endif /* __CPU_x86_64 */
406
+
407
+#endif /* __CPU_x86 || __CPU_x86_64 */
408
+#endif /* BIT_SCAN_ASM */
409
+
410
+#endif
0 411
new file mode 100644
... ...
@@ -0,0 +1,207 @@
0
+/* $Id$
1
+ * 
2
+ * test bit_scan operations from bit_scan.h
3
+ *  (both for correctness  and speed)
4
+ * 
5
+ * Copyright (C) 2007 iptelorg GmbH
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+/* 
20
+ * Example gcc command line:
21
+ *  gcc -O9 -Wall -DCC_GCC_LIKE_ASM  -D__CPU_x86 bit_scan_test.c ../bit_scan.c
22
+ *      -o bit_scan_test
23
+ *
24
+ * History:
25
+ * --------
26
+ *  2007-06-23  created by andrei
27
+ */
28
+
29
+
30
+#include <stdlib.h>
31
+#include <stdio.h>
32
+
33
+
34
+#define BIT_SCAN_DEBRUIJN
35
+#define BIT_SCAN_BRANCH
36
+#define BIT_SCAN_SLOW
37
+
38
+#include "../bit_scan.h"
39
+#ifdef NO_PROFILE
40
+#define profile_init(x,y)  do{}while(0)
41
+#define profile_start(x)  do{}while(0)
42
+#define profile_end(x)  do{}while(0)
43
+#define PROFILE_PRINT(x) do{}while(0)
44
+#else
45
+#include "profile.h"
46
+#endif
47
+
48
+#define CHECK(txt, v1, val, f, pd) \
49
+	do{ \
50
+		unsigned long long ret; \
51
+		profile_start(pd); \
52
+		ret=(unsigned long long)f(val); \
53
+		profile_end(pd); \
54
+		if ((unsigned long long)v1!=ret){ \
55
+			fprintf(stderr, "ERROR:" #f ": %s, expected %llx (%llx), got"\
56
+					" %llx\n", \
57
+					(txt), (unsigned long long)v1, \
58
+					(unsigned long long)val, ret); \
59
+			exit(-1); \
60
+		} \
61
+	}while(0)
62
+
63
+#ifndef PROFILE_PRINT
64
+#define PROFILE_PRINT(pd) \
65
+	do{ \
66
+		printf("profile: %s (%ld/%ld) total %llu max %llu average %llu\n", \
67
+				(pd)->name,  (pd)->entries, (pd)->exits, \
68
+				(pd)->total_cycles,  (pd)->max_cycles, \
69
+				(pd)->entries? \
70
+				(pd)->total_cycles/(unsigned long long)(pd)->entries:0ULL ); \
71
+	}while(0)
72
+#endif
73
+
74
+int main(int argc, char** argv)
75
+{
76
+	int r;
77
+	unsigned int v;
78
+	unsigned long long ll;
79
+	int i;
80
+#ifndef NO_PROFILE
81
+	struct profile_data pdf1, pdf2, pdf4, pdf5, pdf6, pdf8;
82
+	struct profile_data pdl1, pdl2, pdl4, pdl5, pdl6, pdl8;
83
+#ifdef HAS_BIT_SCAN_ASM
84
+	struct profile_data pdf3, pdf7, pdl3, pdl7;
85
+#endif
86
+	struct profile_data pdf_32, pdf_64, pdl_32, pdl_64;
87
+	struct profile_data pdf_long, pdl_long;
88
+#endif /* NO_PROFILE */
89
+	
90
+	profile_init(&pdf1, "first_debruijn32");
91
+	profile_init(&pdf2, "first_slow32");
92
+#ifdef HAS_BIT_SCAN_ASM
93
+	profile_init(&pdf3, "first_asm32");
94
+#endif
95
+	profile_init(&pdf4, "first_br32");
96
+	profile_init(&pdf5, "first_debruijn64");
97
+	profile_init(&pdf6, "first_slow64");
98
+#ifdef HAS_BIT_SCAN_ASM
99
+	profile_init(&pdf7, "first_asm64");
100
+#endif
101
+	profile_init(&pdf8, "first_br64");
102
+	profile_init(&pdl1, "last_debruijn32");
103
+	profile_init(&pdl2, "last_slow32");
104
+#ifdef HAS_BIT_SCAN_ASM
105
+	profile_init(&pdl3, "last_asm32");
106
+#endif
107
+	profile_init(&pdl4, "last_br32");
108
+	profile_init(&pdl5, "last_debruijn64");
109
+	profile_init(&pdl6, "last_slow64");
110
+#ifdef HAS_BIT_SCAN_ASM
111
+	profile_init(&pdl7, "last_asm64");
112
+#endif
113
+	profile_init(&pdl8, "last_br64");
114
+	
115
+	profile_init(&pdf_32, "scan_forward32");
116
+	profile_init(&pdf_64, "scan_forward64");
117
+	profile_init(&pdl_32, "scan_reverse32");
118
+	profile_init(&pdl_64, "scan_reverse64");
119
+	profile_init(&pdf_long, "scan_forward_l");
120
+	profile_init(&pdl_long, "scan_reverse_l");
121
+
122
+
123
+	for (i=0; i<100; i++){
124
+	for (r=0; r<32; r++){
125
+		v=(1U<<r);
126
+		CHECK("first debruijn 32bit", r, v, bit_scan_forward_debruijn32, &pdf1);
127
+		CHECK("first slow 32bit", r, v, bit_scan_forward_slow32, &pdf2);
128
+#ifdef HAS_BIT_SCAN_ASM
129
+		CHECK("first asm 32bit", r, v, bit_scan_forward_asm32, &pdf3);
130
+#endif
131
+		CHECK("first br 32bit", r, v, bit_scan_forward_br32, &pdf4);
132
+		CHECK("scan_forward32", r, v, bit_scan_forward32, &pdf_32);
133
+		if (sizeof(long)<=4){
134
+			CHECK("scan_forward_l", r, v, bit_scan_forward, &pdf_long);
135
+		}
136
+		v+=(v-1);
137
+		CHECK("last debruijn 32bit", r, v, bit_scan_reverse_debruijn32, &pdl1);
138
+		CHECK("last slow 32bit", r, v, bit_scan_reverse_slow32, &pdl2);
139
+#ifdef HAS_BIT_SCAN_ASM
140
+		CHECK("last asm 32bit", r, v, bit_scan_reverse_asm32, &pdl3);
141
+#endif
142
+		CHECK("last br 32bit", r, v, bit_scan_reverse_br32, &pdl4);
143
+		CHECK("scan_reverse32", r, v, bit_scan_reverse32, &pdl_32);
144
+		if (sizeof(long)<=4){
145
+			CHECK("scan_reverse_l", r, v, bit_scan_reverse, &pdl_long);
146
+		}
147
+	}
148
+	for (r=0; r<64; r++){
149
+		ll=(1ULL<<r);
150
+		CHECK("first debruijn 64bit", r, ll, bit_scan_forward_debruijn64, &pdf5);
151
+		CHECK("first slow 64bit", r, ll, bit_scan_forward_slow64, &pdf6);
152
+#ifdef HAS_BIT_SCAN_ASM
153
+		CHECK("first asm 64bit", r, ll, bit_scan_forward_asm64, &pdf7);
154
+#endif
155
+		CHECK("first br 64bit", r, ll, bit_scan_forward_br64, &pdf8);
156
+		CHECK("scan_forward64", r, ll, bit_scan_forward64, &pdf_64);
157
+		if (sizeof(long)>4){
158
+			CHECK("scan_forward_l", r, ll, bit_scan_forward, &pdf_long);
159
+		}
160
+		ll+=ll-1;
161
+		CHECK("last debruijn 64bit", r, ll, bit_scan_reverse_debruijn64, &pdl5);
162
+		CHECK("last slow 64bit", r, ll, bit_scan_reverse_slow64, &pdl6);
163
+#ifdef HAS_BIT_SCAN_ASM
164
+		CHECK("last asm 64bit", r, ll, bit_scan_reverse_asm64, &pdl7);
165
+#endif
166
+		CHECK("last br 64bit", r, ll, bit_scan_reverse_br64, &pdl8);
167
+		CHECK("scan_reverse64", r, ll, bit_scan_reverse64, &pdl_64);
168
+		if (sizeof(long)>4){
169
+			CHECK("scan_reverse_l", r, ll, bit_scan_reverse, &pdl_long);
170
+		}
171
+	}
172
+	}
173
+
174
+	PROFILE_PRINT(&pdf1);
175
+	PROFILE_PRINT(&pdf2);
176
+#ifdef HAS_BIT_SCAN_ASM
177
+	PROFILE_PRINT(&pdf3);
178
+#endif
179
+	PROFILE_PRINT(&pdf4);
180
+	PROFILE_PRINT(&pdl1);
181
+	PROFILE_PRINT(&pdl2);
182
+#ifdef HAS_BIT_SCAN_ASM
183
+	PROFILE_PRINT(&pdl3);
184
+#endif
185
+	PROFILE_PRINT(&pdl4);
186
+	PROFILE_PRINT(&pdf5);
187
+	PROFILE_PRINT(&pdf6);
188
+#ifdef HAS_BIT_SCAN_ASM
189
+	PROFILE_PRINT(&pdf7);
190
+#endif
191
+	PROFILE_PRINT(&pdf8);
192
+	PROFILE_PRINT(&pdl5);
193
+	PROFILE_PRINT(&pdl6);
194
+#ifdef HAS_BIT_SCAN_ASM
195
+	PROFILE_PRINT(&pdl7);
196
+#endif
197
+	PROFILE_PRINT(&pdl8);
198
+	
199
+	PROFILE_PRINT(&pdf_32);
200
+	PROFILE_PRINT(&pdf_64);
201
+	PROFILE_PRINT(&pdf_long);
202
+	PROFILE_PRINT(&pdl_32);
203
+	PROFILE_PRINT(&pdl_64);
204
+	PROFILE_PRINT(&pdl_long);
205
+	return 0;
206
+}
0 207
new file mode 100644
... ...
@@ -0,0 +1,188 @@
0
+/*
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2007 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/*
18
+ * Basic profile using the cpu cycle counter
19
+ *
20
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
21
+ *            (unsigned long long for now)
22
+ *
23
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
24
+ *
25
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
26
+ *                            - sets u1 and u2 to the least significant, 
27
+ *                              respective most significant 32 bit word of
28
+ *                              the cpu cycles counter
29
+ * struct profile_data;            - holds all the profile results
30
+ *                               (last call cycles, max cycles, total cycles,
31
+ *                                no. of profile_start calls, no. of 
32
+ *                                profile_end calls, name use in profile_init)
33
+ * void     profile_init(pd, name) - intialize a profile structure
34
+ * void     profile_start(pd)      - starts profiling (call before calling
35
+ *                               the target function)
36
+ * void     profile_end(pd)        - stops profiling (call after the target
37
+ *                               function returns)
38
+ * 
39
+ */
40
+ /*
41
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
42
+ *                     inline asm,
43
+ *                  __CPU_x86, __CPU_x86_64, __CPU_sparc64
44
+ */
45
+/* 
46
+ * History:
47
+ * --------
48
+ *  2007-06-23  created by andrei
49
+ */
50
+
51
+
52
+
53
+
54
+#ifndef _profile_h
55
+#define _profile_h
56
+
57
+#include <string.h>
58
+
59
+/*
60
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
61
+ *            (unsigned long long for now)
62
+ *
63
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
64
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
65
+ *                            - sets u1 and u2 to the least significant, 
66
+ *                              respective most significant 32 bit word of
67
+ *                              the cpu cycles counter
68
+ */
69
+
70
+#ifdef __CPU_x86
71
+typedef unsigned long long cycles_t;
72
+
73
+inline static cycles_t get_cpu_cycles()
74
+{
75
+	cycles_t r;
76
+	asm volatile( "rdtsc \n\t" : "=A"(r));
77
+	return r;
78
+}
79
+
80
+#define get_cpu_cycles_uint(u1, u2) \
81
+	do{ \
82
+		/* result in edx:eax */ \
83
+		asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
84
+	}while(0)
85
+
86
+#elif defined __CPU_x86_64
87
+typedef unsigned long long cycles_t;
88
+
89
+inline static cycles_t get_cpu_cycles()
90
+{
91
+	unsigned int u1, u2;
92
+	asm volatile( "rdtsc \n\t" : "=a"(u1), "=d"(u2));
93
+	return ((cycles_t)u2<<32ULL)|u1;
94
+}
95
+
96
+
97
+#define get_cpu_cycles_uint(u1, u2) \
98
+	do{ \
99
+		/* result in edx:eax */ \
100
+		asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
101
+	}while(0)
102
+
103
+#elif defined __CPU_sparc64
104
+
105
+typedef unsigned long long cycles_t;
106
+
107
+inline static cycles_t get_cpu_cycles()
108
+{
109
+#if ! defined(_LP64)
110
+#warning "ilp32 mode "
111
+	struct uint_64{
112
+		unsigned int u2;
113
+		unsigned int u1;
114
+	};
115
+	union{
116
+		cycles_t c;
117
+		struct uint_64 u;
118
+	}r;
119
+	
120
+	asm volatile("rd %%tick, %0 \n\t"
121
+				 "srlx %0, 32, %1 \n\t"
122
+				: "=r"(r.u.u1), "=r"(r.u.u2));
123
+	return r.c;
124
+#else
125
+	cycles_t r;
126
+	/* normal 64 bit mode (e.g. gcc -m64) */
127
+	asm volatile("rd %%tick, %0" : "=r"(r));
128
+	return r;
129
+#endif
130
+}
131
+inline static void  get_cpu_cycles_uint(unsigned int* u1, unsigned int* u2)
132
+{
133
+	cycles_t r;
134
+	asm volatile("rd %%tick, %0" : "=r"(r));
135
+	*u1=(unsigned int)r;
136
+	*u2=(unsigned int)(r>>32);
137
+}
138
+
139
+#else /* __CPU_xxx */
140
+#error "no get_cycles support for this CPU"
141
+#endif /* __CPU_xxx */
142
+
143
+
144
+union profile_cycles{
145
+	cycles_t c;
146
+	struct{
147
+		unsigned int u1;
148
+		unsigned int u2;
149
+	}uint;
150
+};
151
+
152
+struct profile_data{
153
+	cycles_t cycles;  /* last call */
154
+	cycles_t total_cycles;
155
+	cycles_t max_cycles;
156
+	unsigned long entries; /* no. profile_start calls */
157
+	unsigned long exits;   /* no. profile_end calls */
158
+	char * name;
159
+	
160
+	/* private stuff */
161
+	union profile_cycles init_rdtsc;
162
+};
163
+
164
+inline static void profile_init(struct profile_data* pd, char *name)
165
+{
166
+	memset(pd, 0, sizeof(*pd));
167
+	pd->name=name;
168
+}
169
+
170
+
171
+inline static void profile_start(struct profile_data* pd)
172
+{
173
+	pd->entries++;
174
+	pd->init_rdtsc.c=get_cpu_cycles();
175
+}
176
+
177
+
178
+inline static void profile_end(struct profile_data* pd)
179
+{
180
+	pd->cycles=get_cpu_cycles()-pd->init_rdtsc.c;
181
+	if (pd->max_cycles<pd->cycles) pd->max_cycles=pd->cycles;
182
+	pd->total_cycles+=pd->cycles;
183
+	pd->exits++;
184
+}
185
+
186
+
187
+#endif