Browse code

- added functions to get the index of the first or last bit set in a 32 bit or 64 bit int: bit_scan_forward32(), bit_scan_forward64(), bit_scan_reverse32(), bit_scan_reverse64(), bit_scan_forward(long) and bit_scan_reverse(long). All of them are very fast, they use asm if available (for now only for __CPU_x86 and __CPU_x86_64), and fall back to a de Bruijn based method or binary search (depending on which method was faster in my measurements on a particular cpu). - added test/profile.h - simple measure the cpu cycles between two calls functions (for now support for x86, x86_64 and sparc64)

Andrei Pelinescu-Onciul authored on 25/06/2007 17:20:34
Showing 4 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,36 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2007 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/*
19
+ *  bit scan operations, see bit_scan.h.
20
+ */
21
+/* 
22
+ * History:
23
+ * --------
24
+ *  2007-06-23  created by andrei
25
+ */
26
+
27
+#include "bit_scan.h"
28
+
29
+int _debruijn_hash32[32]={0, 1, 2, 6, 3, 11, 7, 16, 4, 14, 12, 21, 8,
30
+	23, 17, 26, 31, 5, 10, 15, 13, 20, 22, 25, 30, 9, 19, 24, 29, 18, 28, 27 };
31
+
32
+int _debruijn_hash64[64]={0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9,
33
+	34, 20, 40, 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
34
+	63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, 62, 11, 23,
35
+	32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58 };
36
+
0 37
new file mode 100644
... ...
@@ -0,0 +1,411 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2007 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/*
19
+ *  bit scan operations
20
+ *  int bit_scan_forward(unsigned long v)   - returns the index of the first
21
+ *                                          set bit (undefined value if v==0)
22
+ *  int bit_scan_forward32(unsigned int v)   - returns the index of the first
23
+ *                                          set bit (undefined value if v==0)
24
+ *  int bit_scan_forward64(long long v)      - returns the index of the first
25
+ *                                          set bit (undefined value if v==0)
26
+ *  int bit_scan_reverse(unsigned long v)   - returns the index of the last
27
+ *                                          set bit (undefined value if v==0)
28
+ *  int bit_scan_reverse32(unsigned int v)  - returns the index of the last
29
+ *                                          set bit (undefined value if v==0)
30
+ *  int bit_scan_reverse64(long long v)     - returns the index of the last
31
+ *                                          set bit (undefined value if v==0)
32
+ *
33
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
34
+ *                     inline asm,
35
+ *                  __CPU_x86, __CPU_x86_64,
36
+ *                  ULONG_MAX (limits.h)
37
+ */
38
+/* 
39
+ * History:
40
+ * --------
41
+ *  2007-06-23  created by andrei
42
+ */
43
+
44
+#ifndef _bit_scan_h
45
+#define _bit_scan_h
46
+
47
+#include <limits.h>
48
+
49
+
50
+#ifdef CC_GCC_LIKE_ASM
51
+#if defined __CPU_x86 || defined __CPU_x86_64
52
+#define BIT_SCAN_ASM
53
+#endif
54
+#endif
55
+
56
+
57
+/* set default bitscan versions, depending on the architecture
58
+ * In general the order is  asm, debruijn, br, slow for bit_scan_forward
59
+ *  and asm, br, slow, debruijn for bit_scan_reverse. */
60
+#ifdef BIT_SCAN_ASM
61
+/* have asm => use it */
62
+
63
+#define bit_scan_forward32(i)	bit_scan_forward_asm32(i)
64
+#define bit_scan_forward64(i)	bit_scan_forward_asm64(i)
65
+#define bit_scan_reverse32(i)	bit_scan_reverse_asm32(i)
66
+#define bit_scan_reverse64(i)	bit_scan_reverse_asm64(i)
67
+
68
+#elif defined __CPU_x86 || defined __CPU_x86_64
69
+/* no asm (e.g. no CC_GCC_LIKE_ASM) => debruijn for bit_scan_forward and
70
+ *  br for bit_scan_reverse */
71
+/* make sure debruijn an branch version are enabled */
72
+#ifndef BIT_SCAN_DEBRUIJN
73
+#define BIT_SCAN_DEBRUIJN
74
+#endif
75
+#ifndef BIT_SCAN_BRANCH
76
+#define BIT_SCAN_BRANCH
77
+#endif
78
+
79
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
80
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
81
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
82
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
83
+
84
+#elif defined __CPU_sparc64
85
+/* no asm yet => use branch for everything in 64 bit mode
86
+ *               and debruijn + branch in 32 bit mode
87
+ *  (in 64bit mode the branch method is slightly faster then debruijn,
88
+ *   however note that in 32 bit mode the roles are reversed for _forward)*/
89
+#ifndef BIT_SCAN_BRANCH
90
+#define BIT_SCAN_BRANCH
91
+#endif
92
+
93
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
94
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
95
+#ifdef LP64
96
+#define bit_scan_forward32(i)	bit_scan_forward_br32(i)
97
+#define bit_scan_forward64(i)	bit_scan_forward_br64(i)
98
+#else /* LP64 */
99
+
100
+#ifndef BIT_SCAN_DEBRUIJN
101
+#define BIT_SCAN_DEBRUIJN
102
+#endif
103
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
104
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
105
+#endif /* LP64 */
106
+
107
+#else /* __CPU_XXX */
108
+/* default - like x86 no asm */
109
+/* make sure debruijn an branch version are enabled */
110
+#ifndef BIT_SCAN_DEBRUIJN
111
+#define BIT_SCAN_DEBRUIJN
112
+#endif
113
+#ifndef BIT_SCAN_BRANCH
114
+#define BIT_SCAN_BRANCH
115
+#endif
116
+
117
+#define bit_scan_forward32(i)	bit_scan_forward_debruijn32(i)
118
+#define bit_scan_forward64(i)	bit_scan_forward_debruijn64(i)
119
+#define bit_scan_reverse32(i)	bit_scan_reverse_br32(i)
120
+#define bit_scan_reverse64(i)	bit_scan_reverse_br64(i)
121
+
122
+#endif /* __CPU_XXX */
123
+
124
+
125
+/* try to use the right version for bit_scan_forward(unisgned long l)
126
+ */
127
+#if (defined (ULONG_MAX) && ULONG_MAX > 4294967295) || defined LP64
128
+/* long is 64 bits */
129
+#define bit_scan_forward(l)	bit_scan_forward64((unsigned long long)(l))
130
+#define bit_scan_reverse(l)	bit_scan_reverse64((unsigned long long)(l))
131
+
132
+#else
133
+/* long is 32 bits */
134
+#define bit_scan_forward(l)	bit_scan_forward32((l))
135
+#define bit_scan_reverse(l)	bit_scan_reverse32((l))
136
+#endif
137
+
138
+
139
+
140
+
141
+#ifdef BIT_SCAN_DEBRUIJN
142
+
143
+/* use a de Bruijn sequence to get the index of the set bit for a number
144
+ *  of the form 2^k (DEBRUIJN_HASH32() and DEBRUIJN_HASH64()).
145
+ *  bit_scan_forward & bit_scan_reverse would need first to convert
146
+ *  the argument to 2^k (where k is the first set bit or last set bit index)-
147
+ *  For bit_scan_forward this can be done very fast using x & (-x).
148
+ *  For more info about this method see:
149
+ *  http://citeseer.ist.psu.edu/leiserson98using.html
150
+ *  ("Using de Bruijn Sequences to Index a 1 in a Computer Word")
151
+ */
152
+
153
+extern int _debruijn_hash32[32]; /* see bit_scan.c */
154
+extern int _debruijn_hash64[64]; /* see bit_scan.c */
155
+
156
+#define DEBRUIJN_CT32  0x04653ADFU
157
+#define DEBRUIJN_CT64  0x0218A392CD3D5DBFULL 
158
+
159
+#define DEBRUIJN_HASH32(x)\
160
+	(((x)*DEBRUIJN_CT32)>>(sizeof(x)*8-5))
161
+
162
+#define DEBRUIJN_HASH64(x)\
163
+	(((x)*DEBRUIJN_CT64)>>(sizeof(x)*8-6))
164
+
165
+#define bit_scan_forward_debruijn32(x) \
166
+	( _debruijn_hash32[DEBRUIJN_HASH32((x) & (-(x)))])
167
+
168
+#define bit_scan_forward_debruijn64(x) \
169
+	( _debruijn_hash64[DEBRUIJN_HASH64((x) & (-(x)))])
170
+
171
+
172
+static inline int bit_scan_reverse_debruijn32(unsigned int v)
173
+{
174
+	unsigned int last;
175
+	
176
+	do{
177
+		last=v;
178
+		v=v&(v-1);
179
+	}while(v); /* => last is 2^k */
180
+	return _debruijn_hash32[DEBRUIJN_HASH32(last)];
181
+}
182
+
183
+
184
+static inline int bit_scan_reverse_debruijn64(unsigned long long v)
185
+{
186
+	unsigned long long last;
187
+	
188
+	do{
189
+		last=v;
190
+		v=v&(v-1);
191
+	}while(v); /* => last is 2^k */
192
+	return _debruijn_hash64[DEBRUIJN_HASH64(last)];
193
+}
194
+
195
+
196
+#endif /* BIT_SCAN_DEBRUIJN */
197
+
198
+#ifdef BIT_SCAN_SLOW
199
+/* only for reference purposes (testing the other versions against it) */
200
+
201
+static inline int bit_scan_forward_slow32(unsigned int v)
202
+{
203
+	int r;
204
+	for(r=0; r<(sizeof(v)*8); r++, v>>=1)
205
+		if (v&1) return r;
206
+	return 0;
207
+}
208
+
209
+
210
+static inline int bit_scan_reverse_slow32(unsigned int v)
211
+{
212
+	int r;
213
+	for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
214
+		if (v& (1UL<<(sizeof(v)*8-1))) return r;
215
+	return 0;
216
+}
217
+
218
+
219
+static inline int bit_scan_forward_slow64(unsigned long long v)
220
+{
221
+	int r;
222
+	for(r=0; r<(sizeof(v)*8); r++, v>>=1)
223
+		if (v&1ULL) return r;
224
+	return 0;
225
+}
226
+
227
+
228
+static inline int bit_scan_reverse_slow64(unsigned long long v)
229
+{
230
+	int r;
231
+	for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
232
+		if (v& (1ULL<<(sizeof(v)*8-1))) return r;
233
+	return 0;
234
+}
235
+
236
+
237
+#endif /* BIT_SCAN_SLOW */
238
+
239
+
240
+#ifdef BIT_SCAN_BRANCH
241
+
242
+static inline int bit_scan_forward_br32(unsigned int v)
243
+{
244
+	int b;
245
+	
246
+	b=0;
247
+	if (v&0x01)
248
+		return 0;
249
+	if (!(v & 0xffff)){
250
+		b+=16;
251
+		v>>=16;
252
+	}
253
+	if (!(v&0xff)){
254
+		b+=8;
255
+		v>>=8;
256
+	}
257
+	if (!(v&0x0f)){
258
+		b+=4;
259
+		v>>=4;
260
+	}
261
+	if (!(v&0x03)){
262
+		b+=2;
263
+		v>>=2;
264
+	}
265
+	b+= !(v&0x01);
266
+	return b;
267
+}
268
+
269
+
270
+static inline int bit_scan_reverse_br32(unsigned int v)
271
+{
272
+	int b;
273
+	
274
+	b=0;
275
+	if (v & 0xffff0000){
276
+		b+=16;
277
+		v>>=16;
278
+	}
279
+	if (v&0xff00){
280
+		b+=8;
281
+		v>>=8;
282
+	}
283
+	if (v&0xf0){
284
+		b+=4;
285
+		v>>=4;
286
+	}
287
+	if (v&0x0c){
288
+		b+=2;
289
+		v>>=2;
290
+	}
291
+	b+= !!(v&0x02);
292
+	return b;
293
+}
294
+
295
+
296
+static inline int bit_scan_forward_br64(unsigned long long v)
297
+{
298
+	int b;
299
+	
300
+	b=0;
301
+	if (v&0x01ULL)
302
+		return 0;
303
+	if (!(v & 0xffffffffULL)){
304
+		b+=32;
305
+		v>>=32;
306
+	}
307
+	if (!(v & 0xffffULL)){
308
+		b+=16;
309
+		v>>=16;
310
+	}
311
+	if (!(v&0xffULL)){
312
+		b+=8;
313
+		v>>=8;
314
+	}
315
+	if (!(v&0x0fULL)){
316
+		b+=4;
317
+		v>>=4;
318
+	}
319
+	if (!(v&0x03ULL)){
320
+		b+=2;
321
+		v>>=2;
322
+	}
323
+	b+= !(v&0x01ULL);
324
+	return b;
325
+}
326
+
327
+
328
+static inline int bit_scan_reverse_br64(unsigned long long v)
329
+{
330
+	int b;
331
+	
332
+	b=0;
333
+	if (v & 0xffffffff00000000ULL){
334
+		b+=32;
335
+		v>>=32;
336
+	}
337
+	if (v & 0xffff0000ULL){
338
+		b+=16;
339
+		v>>=16;
340
+	}
341
+	if (v&0xff00ULL){
342
+		b+=8;
343
+		v>>=8;
344
+	}
345
+	if (v&0xf0ULL){
346
+		b+=4;
347
+		v>>=4;
348
+	}
349
+	if (v&0x0cULL){
350
+		b+=2;
351
+		v>>=2;
352
+	}
353
+	b+= !!(v&0x02ULL);
354
+	return b;
355
+}
356
+#endif  /* BIT_SCAN_BRANCH */
357
+
358
+
359
+
360
+#ifdef BIT_SCAN_ASM
361
+#if defined __CPU_x86 || defined __CPU_x86_64
362
+#define HAS_BIT_SCAN_ASM
363
+
364
+static inline int bit_scan_forward_asm32(unsigned int v)
365
+{
366
+	int r;
367
+	asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
368
+	return r;
369
+}
370
+
371
+static inline int bit_scan_reverse_asm32(unsigned int v)
372
+{
373
+	int r;
374
+	asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
375
+	return r;
376
+}
377
+
378
+#ifdef __CPU_x86_64
379
+static inline int bit_scan_forward_asm64(unsigned long long v)
380
+{
381
+	long r;
382
+	asm volatile(" bsfq %1, %0": "=r"(r): "rm"(v) );
383
+	return r;
384
+}
385
+
386
+static inline int bit_scan_reverse_asm64(unsigned long long v)
387
+{
388
+	long r;
389
+	asm volatile(" bsrq %1, %0": "=r"(r): "rm"(v) );
390
+	return r;
391
+}
392
+#else
393
+static inline int bit_scan_forward_asm64(unsigned long long v)
394
+{
395
+	if ((unsigned int)v)
396
+		return bit_scan_forward_asm32((unsigned int)v);
397
+	return 32+bit_scan_forward_asm32(*(((unsigned int*)(void*)&v)+1));
398
+}
399
+
400
+static inline int bit_scan_reverse_asm64(unsigned long long v)
401
+{
402
+	if (v & 0xffffffff00000000ULL)
403
+		return 32+bit_scan_reverse_asm32(*(((unsigned int*)(void*)&v)+1));
404
+	return bit_scan_reverse_asm32((unsigned int)v);
405
+}
406
+#endif /* __CPU_x86_64 */
407
+
408
+#endif /* __CPU_x86 || __CPU_x86_64 */
409
+#endif /* BIT_SCAN_ASM */
410
+
411
+#endif
0 412
new file mode 100644
... ...
@@ -0,0 +1,207 @@
1
+/* $Id$
2
+ * 
3
+ * test bit_scan operations from bit_scan.h
4
+ *  (both for correctness  and speed)
5
+ * 
6
+ * Copyright (C) 2007 iptelorg GmbH
7
+ *
8
+ * Permission to use, copy, modify, and distribute this software for any
9
+ * purpose with or without fee is hereby granted, provided that the above
10
+ * copyright notice and this permission notice appear in all copies.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
+ */
20
+/* 
21
+ * Example gcc command line:
22
+ *  gcc -O9 -Wall -DCC_GCC_LIKE_ASM  -D__CPU_x86 bit_scan_test.c ../bit_scan.c
23
+ *      -o bit_scan_test
24
+ *
25
+ * History:
26
+ * --------
27
+ *  2007-06-23  created by andrei
28
+ */
29
+
30
+
31
+#include <stdlib.h>
32
+#include <stdio.h>
33
+
34
+
35
+#define BIT_SCAN_DEBRUIJN
36
+#define BIT_SCAN_BRANCH
37
+#define BIT_SCAN_SLOW
38
+
39
+#include "../bit_scan.h"
40
+#ifdef NO_PROFILE
41
+#define profile_init(x,y)  do{}while(0)
42
+#define profile_start(x)  do{}while(0)
43
+#define profile_end(x)  do{}while(0)
44
+#define PROFILE_PRINT(x) do{}while(0)
45
+#else
46
+#include "profile.h"
47
+#endif
48
+
49
+#define CHECK(txt, v1, val, f, pd) \
50
+	do{ \
51
+		unsigned long long ret; \
52
+		profile_start(pd); \
53
+		ret=(unsigned long long)f(val); \
54
+		profile_end(pd); \
55
+		if ((unsigned long long)v1!=ret){ \
56
+			fprintf(stderr, "ERROR:" #f ": %s, expected %llx (%llx), got"\
57
+					" %llx\n", \
58
+					(txt), (unsigned long long)v1, \
59
+					(unsigned long long)val, ret); \
60
+			exit(-1); \
61
+		} \
62
+	}while(0)
63
+
64
+#ifndef PROFILE_PRINT
65
+#define PROFILE_PRINT(pd) \
66
+	do{ \
67
+		printf("profile: %s (%ld/%ld) total %llu max %llu average %llu\n", \
68
+				(pd)->name,  (pd)->entries, (pd)->exits, \
69
+				(pd)->total_cycles,  (pd)->max_cycles, \
70
+				(pd)->entries? \
71
+				(pd)->total_cycles/(unsigned long long)(pd)->entries:0ULL ); \
72
+	}while(0)
73
+#endif
74
+
75
+int main(int argc, char** argv)
76
+{
77
+	int r;
78
+	unsigned int v;
79
+	unsigned long long ll;
80
+	int i;
81
+#ifndef NO_PROFILE
82
+	struct profile_data pdf1, pdf2, pdf4, pdf5, pdf6, pdf8;
83
+	struct profile_data pdl1, pdl2, pdl4, pdl5, pdl6, pdl8;
84
+#ifdef HAS_BIT_SCAN_ASM
85
+	struct profile_data pdf3, pdf7, pdl3, pdl7;
86
+#endif
87
+	struct profile_data pdf_32, pdf_64, pdl_32, pdl_64;
88
+	struct profile_data pdf_long, pdl_long;
89
+#endif /* NO_PROFILE */
90
+	
91
+	profile_init(&pdf1, "first_debruijn32");
92
+	profile_init(&pdf2, "first_slow32");
93
+#ifdef HAS_BIT_SCAN_ASM
94
+	profile_init(&pdf3, "first_asm32");
95
+#endif
96
+	profile_init(&pdf4, "first_br32");
97
+	profile_init(&pdf5, "first_debruijn64");
98
+	profile_init(&pdf6, "first_slow64");
99
+#ifdef HAS_BIT_SCAN_ASM
100
+	profile_init(&pdf7, "first_asm64");
101
+#endif
102
+	profile_init(&pdf8, "first_br64");
103
+	profile_init(&pdl1, "last_debruijn32");
104
+	profile_init(&pdl2, "last_slow32");
105
+#ifdef HAS_BIT_SCAN_ASM
106
+	profile_init(&pdl3, "last_asm32");
107
+#endif
108
+	profile_init(&pdl4, "last_br32");
109
+	profile_init(&pdl5, "last_debruijn64");
110
+	profile_init(&pdl6, "last_slow64");
111
+#ifdef HAS_BIT_SCAN_ASM
112
+	profile_init(&pdl7, "last_asm64");
113
+#endif
114
+	profile_init(&pdl8, "last_br64");
115
+	
116
+	profile_init(&pdf_32, "scan_forward32");
117
+	profile_init(&pdf_64, "scan_forward64");
118
+	profile_init(&pdl_32, "scan_reverse32");
119
+	profile_init(&pdl_64, "scan_reverse64");
120
+	profile_init(&pdf_long, "scan_forward_l");
121
+	profile_init(&pdl_long, "scan_reverse_l");
122
+
123
+
124
+	for (i=0; i<100; i++){
125
+	for (r=0; r<32; r++){
126
+		v=(1U<<r);
127
+		CHECK("first debruijn 32bit", r, v, bit_scan_forward_debruijn32, &pdf1);
128
+		CHECK("first slow 32bit", r, v, bit_scan_forward_slow32, &pdf2);
129
+#ifdef HAS_BIT_SCAN_ASM
130
+		CHECK("first asm 32bit", r, v, bit_scan_forward_asm32, &pdf3);
131
+#endif
132
+		CHECK("first br 32bit", r, v, bit_scan_forward_br32, &pdf4);
133
+		CHECK("scan_forward32", r, v, bit_scan_forward32, &pdf_32);
134
+		if (sizeof(long)<=4){
135
+			CHECK("scan_forward_l", r, v, bit_scan_forward, &pdf_long);
136
+		}
137
+		v+=(v-1);
138
+		CHECK("last debruijn 32bit", r, v, bit_scan_reverse_debruijn32, &pdl1);
139
+		CHECK("last slow 32bit", r, v, bit_scan_reverse_slow32, &pdl2);
140
+#ifdef HAS_BIT_SCAN_ASM
141
+		CHECK("last asm 32bit", r, v, bit_scan_reverse_asm32, &pdl3);
142
+#endif
143
+		CHECK("last br 32bit", r, v, bit_scan_reverse_br32, &pdl4);
144
+		CHECK("scan_reverse32", r, v, bit_scan_reverse32, &pdl_32);
145
+		if (sizeof(long)<=4){
146
+			CHECK("scan_reverse_l", r, v, bit_scan_reverse, &pdl_long);
147
+		}
148
+	}
149
+	for (r=0; r<64; r++){
150
+		ll=(1ULL<<r);
151
+		CHECK("first debruijn 64bit", r, ll, bit_scan_forward_debruijn64, &pdf5);
152
+		CHECK("first slow 64bit", r, ll, bit_scan_forward_slow64, &pdf6);
153
+#ifdef HAS_BIT_SCAN_ASM
154
+		CHECK("first asm 64bit", r, ll, bit_scan_forward_asm64, &pdf7);
155
+#endif
156
+		CHECK("first br 64bit", r, ll, bit_scan_forward_br64, &pdf8);
157
+		CHECK("scan_forward64", r, ll, bit_scan_forward64, &pdf_64);
158
+		if (sizeof(long)>4){
159
+			CHECK("scan_forward_l", r, ll, bit_scan_forward, &pdf_long);
160
+		}
161
+		ll+=ll-1;
162
+		CHECK("last debruijn 64bit", r, ll, bit_scan_reverse_debruijn64, &pdl5);
163
+		CHECK("last slow 64bit", r, ll, bit_scan_reverse_slow64, &pdl6);
164
+#ifdef HAS_BIT_SCAN_ASM
165
+		CHECK("last asm 64bit", r, ll, bit_scan_reverse_asm64, &pdl7);
166
+#endif
167
+		CHECK("last br 64bit", r, ll, bit_scan_reverse_br64, &pdl8);
168
+		CHECK("scan_reverse64", r, ll, bit_scan_reverse64, &pdl_64);
169
+		if (sizeof(long)>4){
170
+			CHECK("scan_reverse_l", r, ll, bit_scan_reverse, &pdl_long);
171
+		}
172
+	}
173
+	}
174
+
175
+	PROFILE_PRINT(&pdf1);
176
+	PROFILE_PRINT(&pdf2);
177
+#ifdef HAS_BIT_SCAN_ASM
178
+	PROFILE_PRINT(&pdf3);
179
+#endif
180
+	PROFILE_PRINT(&pdf4);
181
+	PROFILE_PRINT(&pdl1);
182
+	PROFILE_PRINT(&pdl2);
183
+#ifdef HAS_BIT_SCAN_ASM
184
+	PROFILE_PRINT(&pdl3);
185
+#endif
186
+	PROFILE_PRINT(&pdl4);
187
+	PROFILE_PRINT(&pdf5);
188
+	PROFILE_PRINT(&pdf6);
189
+#ifdef HAS_BIT_SCAN_ASM
190
+	PROFILE_PRINT(&pdf7);
191
+#endif
192
+	PROFILE_PRINT(&pdf8);
193
+	PROFILE_PRINT(&pdl5);
194
+	PROFILE_PRINT(&pdl6);
195
+#ifdef HAS_BIT_SCAN_ASM
196
+	PROFILE_PRINT(&pdl7);
197
+#endif
198
+	PROFILE_PRINT(&pdl8);
199
+	
200
+	PROFILE_PRINT(&pdf_32);
201
+	PROFILE_PRINT(&pdf_64);
202
+	PROFILE_PRINT(&pdf_long);
203
+	PROFILE_PRINT(&pdl_32);
204
+	PROFILE_PRINT(&pdl_64);
205
+	PROFILE_PRINT(&pdl_long);
206
+	return 0;
207
+}
0 208
new file mode 100644
... ...
@@ -0,0 +1,188 @@
1
+/*
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2007 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/*
19
+ * Basic profile using the cpu cycle counter
20
+ *
21
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
22
+ *            (unsigned long long for now)
23
+ *
24
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
25
+ *
26
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
27
+ *                            - sets u1 and u2 to the least significant, 
28
+ *                              respective most significant 32 bit word of
29
+ *                              the cpu cycles counter
30
+ * struct profile_data;            - holds all the profile results
31
+ *                               (last call cycles, max cycles, total cycles,
32
+ *                                no. of profile_start calls, no. of 
33
+ *                                profile_end calls, name use in profile_init)
34
+ * void     profile_init(pd, name) - intialize a profile structure
35
+ * void     profile_start(pd)      - starts profiling (call before calling
36
+ *                               the target function)
37
+ * void     profile_end(pd)        - stops profiling (call after the target
38
+ *                               function returns)
39
+ * 
40
+ */
41
+ /*
42
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
43
+ *                     inline asm,
44
+ *                  __CPU_x86, __CPU_x86_64, __CPU_sparc64
45
+ */
46
+/* 
47
+ * History:
48
+ * --------
49
+ *  2007-06-23  created by andrei
50
+ */
51
+
52
+
53
+
54
+
55
+#ifndef _profile_h
56
+#define _profile_h
57
+
58
+#include <string.h>
59
+
60
+/*
61
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
62
+ *            (unsigned long long for now)
63
+ *
64
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
65
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
66
+ *                            - sets u1 and u2 to the least significant, 
67
+ *                              respective most significant 32 bit word of
68
+ *                              the cpu cycles counter
69
+ */
70
+
71
+#ifdef __CPU_x86
72
+typedef unsigned long long cycles_t;
73
+
74
+inline static cycles_t get_cpu_cycles()
75
+{
76
+	cycles_t r;
77
+	asm volatile( "rdtsc \n\t" : "=A"(r));
78
+	return r;
79
+}
80
+
81
+#define get_cpu_cycles_uint(u1, u2) \
82
+	do{ \
83
+		/* result in edx:eax */ \
84
+		asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
85
+	}while(0)
86
+
87
+#elif defined __CPU_x86_64
88
+typedef unsigned long long cycles_t;
89
+
90
+inline static cycles_t get_cpu_cycles()
91
+{
92
+	unsigned int u1, u2;
93
+	asm volatile( "rdtsc \n\t" : "=a"(u1), "=d"(u2));
94
+	return ((cycles_t)u2<<32ULL)|u1;
95
+}
96
+
97
+
98
+#define get_cpu_cycles_uint(u1, u2) \
99
+	do{ \
100
+		/* result in edx:eax */ \
101
+		asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
102
+	}while(0)
103
+
104
+#elif defined __CPU_sparc64
105
+
106
+typedef unsigned long long cycles_t;
107
+
108
+inline static cycles_t get_cpu_cycles()
109
+{
110
+#if ! defined(_LP64)
111
+#warning "ilp32 mode "
112
+	struct uint_64{
113
+		unsigned int u2;
114
+		unsigned int u1;
115
+	};
116
+	union{
117
+		cycles_t c;
118
+		struct uint_64 u;
119
+	}r;
120
+	
121
+	asm volatile("rd %%tick, %0 \n\t"
122
+				 "srlx %0, 32, %1 \n\t"
123
+				: "=r"(r.u.u1), "=r"(r.u.u2));
124
+	return r.c;
125
+#else
126
+	cycles_t r;
127
+	/* normal 64 bit mode (e.g. gcc -m64) */
128
+	asm volatile("rd %%tick, %0" : "=r"(r));
129
+	return r;
130
+#endif
131
+}
132
+inline static void  get_cpu_cycles_uint(unsigned int* u1, unsigned int* u2)
133
+{
134
+	cycles_t r;
135
+	asm volatile("rd %%tick, %0" : "=r"(r));
136
+	*u1=(unsigned int)r;
137
+	*u2=(unsigned int)(r>>32);
138
+}
139
+
140
+#else /* __CPU_xxx */
141
+#error "no get_cycles support for this CPU"
142
+#endif /* __CPU_xxx */
143
+
144
+
145
+union profile_cycles{
146
+	cycles_t c;
147
+	struct{
148
+		unsigned int u1;
149
+		unsigned int u2;
150
+	}uint;
151
+};
152
+
153
+struct profile_data{
154
+	cycles_t cycles;  /* last call */
155
+	cycles_t total_cycles;
156
+	cycles_t max_cycles;
157
+	unsigned long entries; /* no. profile_start calls */
158
+	unsigned long exits;   /* no. profile_end calls */
159
+	char * name;
160
+	
161
+	/* private stuff */
162
+	union profile_cycles init_rdtsc;
163
+};
164
+
165
+inline static void profile_init(struct profile_data* pd, char *name)
166
+{
167
+	memset(pd, 0, sizeof(*pd));
168
+	pd->name=name;
169
+}
170
+
171
+
172
+inline static void profile_start(struct profile_data* pd)
173
+{
174
+	pd->entries++;
175
+	pd->init_rdtsc.c=get_cpu_cycles();
176
+}
177
+
178
+
179
+inline static void profile_end(struct profile_data* pd)
180
+{
181
+	pd->cycles=get_cpu_cycles()-pd->init_rdtsc.c;
182
+	if (pd->max_cycles<pd->cycles) pd->max_cycles=pd->cycles;
183
+	pd->total_cycles+=pd->cycles;
184
+	pd->exits++;
185
+}
186
+
187
+
188
+#endif