Browse code

- preliminary re sed like subst support

Andrei Pelinescu-Onciul authored on 15/08/2003 17:47:45
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,530 @@
1
+/* 
2
+ * $Id$
3
+ *
4
+ * regexp and regexp substitutions implementations
5
+ * 
6
+ * Copyright (C) 2001-2003 Fhg Fokus
7
+ *
8
+ * This file is part of ser, a free SIP server.
9
+ *
10
+ * ser is free software; you can redistribute it and/or modify
11
+ * it under the terms of the GNU General Public License as published by
12
+ * the Free Software Foundation; either version 2 of the License, or
13
+ * (at your option) any later version
14
+ *
15
+ * For a license to use the ser software under conditions
16
+ * other than those described here, or to purchase support for this
17
+ * software, please contact iptel.org by e-mail at the following addresses:
18
+ *    info@iptel.org
19
+ *
20
+ * ser is distributed in the hope that it will be useful,
21
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
+ * GNU General Public License for more details.
24
+ *
25
+ * You should have received a copy of the GNU General Public License 
26
+ * along with this program; if not, write to the Free Software 
27
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
+ *
29
+ *
30
+ * History:
31
+ * --------
32
+ *   2003-08-04  created by andrei
33
+ */
34
+
35
+
36
+#include "dprint.h"
37
+#include "mem/mem.h"
38
+#include "str.h"
39
+#include "parser/msg_parser.h"
40
+
41
+#include <string.h>
42
+#include <sys/types.h> /* for regex */
43
+#include <regex.h>
44
+
45
+
46
+enum replace_special { REPLACE_NMATCH, REPLACE_CHAR, REPLACE_URI };
47
+
48
+struct replace_with{
49
+	int offset; /* offset in string */
50
+	int size;   /* size of replace "anchor" in string */
51
+	enum replace_special type;
52
+	union{
53
+		int nmatch;
54
+		char c;
55
+	};
56
+};
57
+
58
+struct subst_expr{
59
+	regex_t* re;
60
+	str replacement;
61
+	int replace_all; 
62
+	int n_escapes; /* escapes number (replace[] size) */
63
+	int max_pmatch ; /* highest () referenced */
64
+	struct replace_with replace[1]; /* 0 does not work on all compilers */
65
+};
66
+
67
+struct replace_lst{
68
+	int offset;
69
+	int size;   /* at offset, delete size bytes and replace them with rpl */;
70
+	str rpl;
71
+	struct replace_lst *next;
72
+};
73
+
74
+
75
+
76
+void subst_expr_free(struct subst_expr* se)
77
+{
78
+	if (se->replacement.s) pkg_free(se->replacement.s);
79
+	if (se->re) { regfree(se->re); pkg_free(se->re); };
80
+	pkg_free(se);
81
+}
82
+
83
+
84
+
85
+/* frees the entire least, head (l) too */
86
+void replace_lst_free(struct replace_lst* l)
87
+{
88
+	struct replace_lst* t;
89
+	
90
+	while (l){
91
+		t=l;
92
+		l=l->next;
93
+		if (t->rpl.s) pkg_free(t->rpl.s);
94
+		pkg_free(t);
95
+	}
96
+}
97
+
98
+
99
+
100
+/* parse a /regular expression/replacement/flags into a subst_expr structure */
101
+struct subst_expr* subst_parser(str* subst)
102
+{
103
+#define MAX_REPLACE_WITH 100
104
+	char c;
105
+	char* end;
106
+	char* p;
107
+	char* re;
108
+	char* re_end;
109
+	char* repl;
110
+	char* repl_end;
111
+	struct replace_with rw[MAX_REPLACE_WITH];
112
+	int rw_no;
113
+	int escape;
114
+	int cflags; /* regcomp flags */
115
+	int replace_all;
116
+	struct subst_expr* se;
117
+	regex_t* regex;
118
+	int max_pmatch;
119
+	int r;
120
+	
121
+	/* init */
122
+	se=0;
123
+	regex=0;
124
+	cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
125
+	replace_all=0;
126
+	if (subst->len<3){
127
+		LOG(L_ERR, "ERROR: subst_parser: expression is too short: %.*s\n",
128
+				subst->len, subst->s);
129
+		goto error;
130
+	}
131
+	
132
+	p=subst->s;
133
+	c=*p;
134
+	if (c=='\\'){
135
+		LOG(L_ERR, "ERROR: subst_parser: invalid separator char <%c>"
136
+				" in %.*s\n", c, subst->len, subst->s);
137
+		goto error;
138
+	}
139
+	p++;
140
+	end=subst->s+subst->len;
141
+	/* find re */
142
+	re=p;
143
+	for (;p<end;p++){
144
+		/* if unescaped sep. char */
145
+		if ((*p==c) && (*(p-1)!='\\')) goto found_re;
146
+	}
147
+	LOG(L_ERR, "ERROR: subst_parser: no separator found: %.*s\n", subst->len, 
148
+			subst->s);
149
+	goto error;
150
+found_re:
151
+	re_end=p;
152
+	p++;
153
+	/* parse replacement */
154
+	repl=p;
155
+	rw_no=0;
156
+	max_pmatch=0;
157
+	escape=0;
158
+	for(;p<end; p++){
159
+		if (escape){
160
+			escape=0;
161
+			switch (*p){
162
+				/* special char escapes */
163
+				case '\\':
164
+					rw[rw_no].size=2;
165
+					rw[rw_no].offset=(p-1)-repl;
166
+					rw[rw_no].type=REPLACE_CHAR;
167
+					rw[rw_no].c='\\';
168
+					break;
169
+				case 'n':
170
+					rw[rw_no].size=2;
171
+					rw[rw_no].offset=(p-1)-repl;
172
+					rw[rw_no].type=REPLACE_CHAR;
173
+					rw[rw_no].c='\n';
174
+					break;
175
+				case 'r':
176
+					rw[rw_no].size=2;
177
+					rw[rw_no].offset=(p-1)-repl;
178
+					rw[rw_no].type=REPLACE_CHAR;
179
+					rw[rw_no].c='\r';
180
+					break;
181
+				case 't':
182
+					rw[rw_no].size=2;
183
+					rw[rw_no].offset=(p-1)-repl;
184
+					rw[rw_no].type=REPLACE_CHAR;
185
+					rw[rw_no].c='\t';
186
+					break;
187
+				/* special sip msg parts escapes */
188
+				case 'u':
189
+					rw[rw_no].size=2;
190
+					rw[rw_no].offset=(p-1)-repl;
191
+					rw[rw_no].type=REPLACE_URI;
192
+					break;
193
+				/* re matches */
194
+				case '0': /* allow 0, too, reference to the whole match */
195
+				case '1':
196
+				case '2':
197
+				case '3':
198
+				case '4':
199
+				case '5':
200
+				case '6':
201
+				case '7':
202
+				case '8':
203
+				case '9':
204
+					rw[rw_no].size=2;
205
+					rw[rw_no].offset=(p-1)-repl;
206
+					rw[rw_no].type=REPLACE_NMATCH;
207
+					rw[rw_no].nmatch=(*p)-'0'; /* 0 is the whole matched str*/
208
+					if (max_pmatch<rw[rw_no].nmatch) 
209
+						max_pmatch=rw[rw_no].nmatch;
210
+					break;
211
+				default: /* just print current char */
212
+					if (*p!=c){
213
+						LOG(L_WARN, "subst_parser: WARNING: \\%c unknown"
214
+								" escape in %.*s\n", *p, subst->len, subst->s);
215
+					}
216
+					rw[rw_no].size=2;
217
+					rw[rw_no].offset=(p-1)-repl;
218
+					rw[rw_no].type=REPLACE_CHAR;
219
+					rw[rw_no].c=*p;
220
+					break;
221
+			}
222
+			rw_no++;
223
+			if (rw_no>=MAX_REPLACE_WITH){
224
+				LOG(L_ERR, "ERROR: subst_parser: too many escapes in the"
225
+							" replace part %.*s\n", subst->len, subst->s);
226
+				goto error;
227
+			}
228
+		}else if (*p=='\\') escape=1;
229
+		else  if (*p==c) goto found_repl;
230
+	}
231
+	LOG(L_ERR, "ERROR: subst_parser: missing separator: %.*s\n", subst->len, 
232
+			subst->s);
233
+	goto error;
234
+found_repl:
235
+	repl_end=p;
236
+	p++;
237
+	/* parse flags */
238
+	for(;p<end; p++){
239
+		switch(*p){
240
+			case 'i':
241
+				cflags|=REG_ICASE;
242
+				break;
243
+			case 's':
244
+				cflags&=(~REG_NEWLINE);
245
+				break;
246
+			case 'g':
247
+				replace_all=1;
248
+				break;
249
+			default:
250
+				LOG(L_ERR, "ERROR: subst_parser: unknown flag %c in %.*s\n",
251
+						*p, subst->len, subst->s);
252
+				goto error;
253
+		}
254
+	}
255
+
256
+	/* compile the re */
257
+	if ((regex=pkg_malloc(sizeof(regex_t)))==0){
258
+		LOG(L_ERR, "ERROR: subst_parser: out of memory (re)\n");
259
+		goto error;
260
+	}
261
+	c=*re_end; /* regcomp expects null terminated strings -- save */
262
+	*re_end=0;
263
+	if (regcomp(regex, re, cflags)!=0){
264
+		pkg_free(regex);
265
+		*re_end=c; /* restore */
266
+		LOG(L_ERR, "ERROR: subst_parser: bad regular expression %.*s in "
267
+				"%.*s\n", (int)(re_end-re), re, subst->len, subst->s);
268
+		goto error;
269
+	}
270
+	*re_end=c; /* restore */
271
+	/* construct the subst_expr structure */
272
+	se=pkg_malloc(sizeof(struct subst_expr)+
273
+					((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
274
+		/* 1 replace_with structure is  already included in subst_expr */
275
+	if (se==0){
276
+		LOG(L_ERR, "ERROR: subst_parser: out of memory (subst_expr)\n");
277
+		goto error;
278
+	}
279
+	memset((void*)se, 0, sizeof(struct subst_expr));
280
+	se->replacement.len=repl_end-repl;
281
+	if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
282
+		LOG(L_ERR, "ERROR: subst_parser: out of memory (replacement)\n");
283
+		goto error;
284
+	}
285
+	/* start copying */
286
+	memcpy(se->replacement.s, repl, se->replacement.len);
287
+	se->re=regex;
288
+	se->replace_all=replace_all;
289
+	se->n_escapes=rw_no;
290
+	se->max_pmatch=max_pmatch;
291
+	for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
292
+	return se;
293
+	
294
+error:
295
+	if (se) { subst_expr_free(se); regex=0; }
296
+	if (regex) { regfree (regex); pkg_free(regex); }
297
+	return 0;
298
+}
299
+
300
+
301
+
302
+static int replace_len(char* match, int nmatch, regmatch_t* pmatch,
303
+					struct subst_expr* se, struct sip_msg* msg)
304
+{
305
+	int r;
306
+	int len;
307
+	str* uri;
308
+	
309
+	len=se->replacement.len;
310
+	for (r=0; r<se->n_escapes; r++){
311
+		switch(se->replace[r].type){
312
+			case REPLACE_NMATCH:
313
+				len-=se->replace[r].size;
314
+				if ((se->replace[r].nmatch<nmatch)&&(
315
+						pmatch[se->replace[r].nmatch].rm_so!=-1)){
316
+						/* do the replace */
317
+						len+=pmatch[se->replace[r].nmatch].rm_eo-
318
+								pmatch[se->replace[r].nmatch].rm_so;
319
+				};
320
+				break;
321
+			case REPLACE_CHAR:
322
+				len-=(se->replace[r].size-1);
323
+				break;
324
+			case REPLACE_URI:
325
+				len-=se->replace[r].size;
326
+				if (msg->first_line.type!=SIP_REQUEST){
327
+					LOG(L_CRIT, "BUG: replace_len: uri substitution on"
328
+								" a reply\n");
329
+					break; /* ignore, we can continue */
330
+				}
331
+				uri= (msg->new_uri.s)?(&msg->new_uri):
332
+					(&msg->first_line.u.request.uri);
333
+				len+=uri->len;
334
+				break;
335
+			default:
336
+				LOG(L_CRIT, "BUG: replace_len: unknown type %d\n", 
337
+						se->replace[r].type);
338
+				/* ignore it */
339
+		}
340
+	}
341
+	return len;
342
+}
343
+
344
+
345
+
346
+/* rpl.s will be alloc'ed with the proper size & rpl.len set
347
+ * returns 0 on success, <0 on error*/
348
+static int replace_build(char* match, int nmatch, regmatch_t* pmatch,
349
+					struct subst_expr* se, struct sip_msg* msg, str* rpl)
350
+{
351
+	int r;
352
+	str* uri;
353
+	char* p;
354
+	char* dest;
355
+	char* end;
356
+	int size;
357
+	
358
+	rpl->len=replace_len(match, nmatch, pmatch, se, msg);
359
+	if (rpl->len==0){
360
+		rpl->s=0; /* emtpy string */
361
+		return 0;
362
+	}
363
+	rpl->s=pkg_malloc(rpl->len);
364
+	if (rpl->s==0){
365
+		LOG(L_ERR, "ERROR: replace_build: out of mem (rpl)\n");
366
+		goto error;
367
+	}
368
+	p=se->replacement.s;
369
+	end=p+se->replacement.len;
370
+	dest=rpl->s;
371
+	for (r=0; r<se->n_escapes; r++){
372
+		/* copy the unescaped parts */
373
+		size=se->replacement.s+se->replace[r].offset-p;
374
+		memcpy(dest, p, size);
375
+		p+=size+se->replace[r].size;
376
+		dest+=size;
377
+		switch(se->replace[r].type){
378
+			case REPLACE_NMATCH:
379
+				if ((se->replace[r].nmatch<nmatch)&&(
380
+						pmatch[se->replace[r].nmatch].rm_so!=-1)){
381
+						/* do the replace */
382
+						size=pmatch[se->replace[r].nmatch].rm_eo-
383
+								pmatch[se->replace[r].nmatch].rm_so;
384
+						memcpy(dest, match+pmatch[se->replace[r].nmatch].rm_so,
385
+								size);
386
+						dest+=size;
387
+				};
388
+				break;
389
+			case REPLACE_CHAR:
390
+				*dest=se->replace[r].c;
391
+				dest++;
392
+				break;
393
+			case REPLACE_URI:
394
+				if (msg->first_line.type!=SIP_REQUEST){
395
+					LOG(L_CRIT, "BUG: replace_build: uri substitution on"
396
+								" a reply\n");
397
+					break; /* ignore, we can continue */
398
+				}
399
+				uri= (msg->new_uri.s)?(&msg->new_uri):
400
+					(&msg->first_line.u.request.uri);
401
+				memcpy(dest, uri->s, uri->len);
402
+				dest+=uri->len;
403
+				break;
404
+			default:
405
+				LOG(L_CRIT, "BUG: replace_build: unknown type %d\n", 
406
+						se->replace[r].type);
407
+				/* ignore it */
408
+		}
409
+	}
410
+	memcpy(dest, p, end-p);
411
+	return 0;
412
+error:
413
+	return -1;
414
+}
415
+
416
+
417
+
418
+/* WARNING: input must be 0 terminated! */
419
+struct replace_lst* run_subst(struct subst_expr* se, char* input,
420
+								struct sip_msg* msg)
421
+{
422
+	struct replace_lst *head;
423
+	struct replace_lst **crt;
424
+	char *p;
425
+	int r;
426
+	regmatch_t* pmatch;
427
+	int nmatch;
428
+	
429
+	
430
+	/* init */
431
+	head=0;
432
+	crt=&head;
433
+	p=input;
434
+	nmatch=se->max_pmatch+1;
435
+	/* no of () referenced + 1 for the whole string: pmatch[0] */
436
+	pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
437
+	if (pmatch==0){
438
+		LOG(L_ERR, "ERROR: run_subst_ out of mem. (pmatch)\n");
439
+		goto error;
440
+	}
441
+	do{
442
+		r=regexec(se->re, p, nmatch, pmatch, 0);
443
+		/* subst */
444
+		if (r){
445
+			*crt=pkg_malloc(sizeof(struct replace_lst));
446
+			if (*crt==0){
447
+				LOG(L_ERR, "ERROR: run_subst: out of mem (crt)\n");
448
+				goto error;
449
+			}
450
+			memset(*crt, sizeof(struct replace_lst), 0);
451
+			if (pmatch[0].rm_so==-1){
452
+				LOG(L_ERR, "ERROR: run_subst: unknown offset?\n");
453
+				goto error;
454
+			}
455
+			(*crt)->offset=pmatch[0].rm_so+(int)(p-input);
456
+			(*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
457
+			/* create subst. string */
458
+			/* construct the string from replace[] */
459
+			if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
460
+				goto error;
461
+			}
462
+			crt=&((*crt)->next);
463
+			p+=pmatch[0].rm_eo;
464
+		}
465
+	}while(r && se->replace_all);
466
+	pkg_free(pmatch);
467
+	return head;
468
+error:
469
+	if (head) replace_lst_free(head);
470
+	if (pmatch) pkg_free(pmatch);
471
+	return 0;
472
+}
473
+
474
+
475
+
476
+/* return the substitution result in a str, input must be 0 term */ 
477
+str* subst_str(char *input, struct sip_msg* msg, struct subst_expr* se)
478
+{
479
+	str* res;
480
+	struct replace_lst *lst;
481
+	struct replace_lst* l;
482
+	int len;
483
+	int size;
484
+	char* p;
485
+	char* dest;
486
+	char* end;
487
+	
488
+	
489
+	/* compute the len */
490
+	len=strlen(input);
491
+	end=input+len;
492
+	lst=run_subst(se, input, msg);
493
+	for (l=lst; l; l=l->next)
494
+		len+=(int)(l->rpl.len)-l->size;
495
+	res=pkg_malloc(sizeof(str));
496
+	if (res==0){
497
+		LOG(L_ERR, "ERROR: subst_str: mem. allocation error\n");
498
+		goto error;
499
+	}
500
+	res->s=pkg_malloc(len);
501
+	if (res->s==0){
502
+		LOG(L_ERR, "ERROR: subst_str: mem. allocation error (res->s)\n");
503
+		goto error;
504
+	}
505
+	res->len=len;
506
+	
507
+	/* replace */
508
+	dest=res->s;
509
+	p=input;
510
+	for(l=lst; l; l=l->next){
511
+		size=l->offset+input-p;
512
+		memcpy(dest, p, size);
513
+		p+=size;
514
+		dest+=size;
515
+		if (l->rpl.len){
516
+			memcpy(dest, l->rpl.s, l->rpl.len);
517
+			dest+=l->rpl.len;
518
+		}
519
+	}
520
+	memcpy(dest, p, end-p);
521
+	if(lst) replace_lst_free(lst);
522
+	return res;
523
+error:
524
+	if (lst) replace_lst_free(lst);
525
+	if (res){
526
+		if (res->s) pkg_free(res->s);
527
+		pkg_free(res);
528
+	}
529
+	return 0;
530
+}