re.c
8ed93c23
 /* 
  * regexp and regexp substitutions implementations
  * 
53c7e0f1
  * Copyright (C) 2001-2003 FhG Fokus
8ed93c23
  *
6a0f4382
  * This file is part of Kamailio, a free SIP server.
8ed93c23
  *
6a0f4382
  * Kamailio is free software; you can redistribute it and/or modify
8ed93c23
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version
  *
6a0f4382
  * Kamailio is distributed in the hope that it will be useful,
8ed93c23
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License 
  * along with this program; if not, write to the Free Software 
9e1ff448
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
8ed93c23
  *
  */
 
1d0661db
 /*!
  * \file
6a0f4382
  * \brief Kamailio core ::  regexp and regexp substitutions implementations
1d0661db
  * \ingroup core
  * Module: \ref core
  */
 
8ed93c23
 
 #include "dprint.h"
 #include "mem/mem.h"
cc087c64
 #include "re.h"
8ed93c23
 
 #include <string.h>
 
6e65d12a
 #define MAX_REPLACE_WITH 100
 #define REPLACE_BUFFER_SIZE 1024
8ed93c23
 
 void subst_expr_free(struct subst_expr* se)
 {
 	if (se->replacement.s) pkg_free(se->replacement.s);
 	if (se->re) { regfree(se->re); pkg_free(se->re); };
 	pkg_free(se);
 }
 
 
 
cc087c64
 /* frees the entire list, head (l) too */
8ed93c23
 void replace_lst_free(struct replace_lst* l)
 {
 	struct replace_lst* t;
 	
 	while (l){
 		t=l;
 		l=l->next;
 		if (t->rpl.s) pkg_free(t->rpl.s);
 		pkg_free(t);
 	}
 }
 
6a320d25
 int parse_repl(struct replace_with * rw, char ** begin, 
 				char * end, int *max_token_nb, int with_sep)
 {
 
 	char* p0;
 	char * repl;
 	str s;
 	int token_nb;
 	int escape;
 	int max_pmatch;
 	char *p, c;
 
 	/* parse replacement */
 	p = *begin;
 	c = *p;
 	if(with_sep)
 		p++;
 	repl= p;
 	token_nb=0;
 	max_pmatch=0;
 	escape=0;
 	for(;p<end; p++){
 		if (escape){
 			escape=0;
 			switch (*p){
 				/* special char escapes */
 				case '\\':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c='\\';
 					break;
 				case 'n':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c='\n';
 					break;
 				case 'r':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c='\r';
 					break;
 				case 't':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c='\t';
 					break;
 				case PV_MARKER:
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c=PV_MARKER;
 					break;
 				/* special sip msg parts escapes */
 				case 'u':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_URI;
 					break;
 				/* re matches */
 				case '0': /* allow 0, too, reference to the whole match */
 				case '1':
 				case '2':
 				case '3':
 				case '4':
 				case '5':
 				case '6':
 				case '7':
 				case '8':
 				case '9':
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_NMATCH;
 					rw[token_nb].u.nmatch=(*p)-'0';
 								/* 0 is the whole matched str*/
 					if (max_pmatch<rw[token_nb].u.nmatch) 
 						max_pmatch=rw[token_nb].u.nmatch;
 					break;
 				default: /* just print current char */
 					if (*p!=c){
 						WARN("subst_parser:\\%c unknown escape in %s\n", *p, *begin);
 					}
 					rw[token_nb].size=2;
 					rw[token_nb].offset=(p-1)-repl;
 					rw[token_nb].type=REPLACE_CHAR;
 					rw[token_nb].u.c=*p;
 					break;
 			}
 
 			token_nb++;
 
 			if (token_nb>=MAX_REPLACE_WITH){
 				ERR("subst_parser: too many escapes in the replace part %s\n", *begin);
 				goto error;
 			}
 		}else if (*p=='\\') {
 			escape=1;
 		}else if (*p==PV_MARKER) {
 			s.s = p;
 			s.len = end - s.s;
 			p0 = pv_parse_spec(&s, &rw[token_nb].u.spec);
 			if(p0==NULL)
 			{
 				ERR("subst_parser: bad specifier in replace part %s\n", *begin);
 				goto error;
 			}
 			rw[token_nb].size=p0-p;
 			rw[token_nb].offset=p-repl;
 			rw[token_nb].type=REPLACE_SPEC;
 			token_nb++;
 			p=p0-1;
 		}else  if (*p==c && with_sep){
 				goto found_repl;
 		}
 	}
 	if(with_sep){
 		ERR("subst_parser: missing separator: %s\n", *begin);
 		goto error;
 	}
 
 found_repl:
 
 	*max_token_nb = max_pmatch;
 	*begin = p;
 	return token_nb;
 
 error:
 	return -1;
 }
8ed93c23
 
 
 /* parse a /regular expression/replacement/flags into a subst_expr structure */
 struct subst_expr* subst_parser(str* subst)
 {
 	char c;
 	char* end;
 	char* p;
 	char* re;
 	char* re_end;
 	char* repl;
 	char* repl_end;
 	struct replace_with rw[MAX_REPLACE_WITH];
 	int rw_no;
 	int cflags; /* regcomp flags */
 	int replace_all;
 	struct subst_expr* se;
 	regex_t* regex;
 	int max_pmatch;
 	int r;
 	
 	/* init */
 	se=0;
 	regex=0;
 	cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
 	replace_all=0;
 	if (subst->len<3){
af8bbc2e
 		LM_ERR("expression is too short: %.*s\n", subst->len, subst->s);
8ed93c23
 		goto error;
 	}
 	
 	p=subst->s;
 	c=*p;
 	if (c=='\\'){
af8bbc2e
 		LM_ERR("invalid separator char <%c> in %.*s\n", c, subst->len, subst->s);
8ed93c23
 		goto error;
 	}
 	p++;
 	end=subst->s+subst->len;
 	/* find re */
 	re=p;
 	for (;p<end;p++){
 		/* if unescaped sep. char */
 		if ((*p==c) && (*(p-1)!='\\')) goto found_re;
 	}
af8bbc2e
 	LM_ERR("no separator found: %.*s\n", subst->len, subst->s);
8ed93c23
 	goto error;
 found_re:
 	re_end=p;
cf1a563f
 	if (end < (p + 2)) {
af8bbc2e
 		LM_ERR("String too short\n");
cf1a563f
 		goto error;
8ed93c23
 	}
cf1a563f
 	repl=p+1;
 	if ((rw_no = parse_repl(rw, &p, end, &max_pmatch, WITH_SEP)) < 0)
 		goto error;
 	repl_end = p;
8ed93c23
 	p++;
cf1a563f
 	
8ed93c23
 	/* parse flags */
 	for(;p<end; p++){
 		switch(*p){
 			case 'i':
 				cflags|=REG_ICASE;
 				break;
 			case 's':
 				cflags&=(~REG_NEWLINE);
 				break;
 			case 'g':
 				replace_all=1;
 				break;
 			default:
af8bbc2e
 				LM_ERR("unknown flag %c in %.*s\n", *p, subst->len, subst->s);
8ed93c23
 				goto error;
 		}
 	}
 
 	/* compile the re */
 	if ((regex=pkg_malloc(sizeof(regex_t)))==0){
af8bbc2e
 		LM_ERR("out of memory\n");
8ed93c23
 		goto error;
 	}
 	c=*re_end; /* regcomp expects null terminated strings -- save */
 	*re_end=0;
 	if (regcomp(regex, re, cflags)!=0){
 		pkg_free(regex);
0add9bfb
 		regex=0;
8ed93c23
 		*re_end=c; /* restore */
af8bbc2e
 		LM_ERR("bad regular expression %.*s in %.*s\n",
 				(int)(re_end-re), re, subst->len, subst->s);
8ed93c23
 		goto error;
 	}
 	*re_end=c; /* restore */
 	/* construct the subst_expr structure */
 	se=pkg_malloc(sizeof(struct subst_expr)+
 					((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
 		/* 1 replace_with structure is  already included in subst_expr */
 	if (se==0){
af8bbc2e
 		LM_ERR("out of memory\n");
8ed93c23
 		goto error;
 	}
 	memset((void*)se, 0, sizeof(struct subst_expr));
511de7f5
 	se->re=regex;
8ed93c23
 	se->replacement.len=repl_end-repl;
da472839
 	if (se->replacement.len > 0) {
 		if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
af8bbc2e
 			LM_ERR("out of memory\n");
da472839
 			goto error;
 		}
 		/* start copying */
 		memcpy(se->replacement.s, repl, se->replacement.len);
 	} else {
 		se->replacement.s = NULL;
8ed93c23
 	}
 	se->replace_all=replace_all;
 	se->n_escapes=rw_no;
 	se->max_pmatch=max_pmatch;
 	for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
508fe14d
 	LM_DBG("ok, se is %p\n", se);
8ed93c23
 	return se;
 	
 error:
 	if (se) { subst_expr_free(se); regex=0; }
 	if (regex) { regfree (regex); pkg_free(regex); }
 	return 0;
 }
 
 /* rpl.s will be alloc'ed with the proper size & rpl.len set
  * returns 0 on success, <0 on error*/
d1b976d9
 static int replace_build(const char* match, int nmatch, regmatch_t* pmatch,
8ed93c23
 					struct subst_expr* se, struct sip_msg* msg, str* rpl)
 {
 	int r;
 	str* uri;
e12b2f1d
 	pv_value_t sv;
8ed93c23
 	char* p;
 	char* dest;
 	char* end;
 	int size;
e12b2f1d
 	static char rbuf[REPLACE_BUFFER_SIZE];
 
 #define RBUF_APPEND(dst, src, size) \
 	if ((dst) - rbuf + (size) >= REPLACE_BUFFER_SIZE - 1) {	\
af8bbc2e
 		LM_ERR("Buffer too small\n");			\
e12b2f1d
 		goto error;											\
 	}														\
 	memcpy((dst), (src), (size));							\
 	(dst) += (size);
 
8ed93c23
 	p=se->replacement.s;
 	end=p+se->replacement.len;
e12b2f1d
 	dest=rbuf;
 	
8ed93c23
 	for (r=0; r<se->n_escapes; r++){
 		/* copy the unescaped parts */
 		size=se->replacement.s+se->replace[r].offset-p;
e12b2f1d
 		RBUF_APPEND(dest, p, size);
8ed93c23
 		p+=size+se->replace[r].size;
 		switch(se->replace[r].type){
 			case REPLACE_NMATCH:
e6e049ef
 				if ((se->replace[r].u.nmatch<nmatch)&&(
 						pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
8ed93c23
 						/* do the replace */
e6e049ef
 						size=pmatch[se->replace[r].u.nmatch].rm_eo-
 								pmatch[se->replace[r].u.nmatch].rm_so;
e12b2f1d
 						RBUF_APPEND(dest, 
 									match+pmatch[se->replace[r].u.nmatch].rm_so,
 									size);
8ed93c23
 				};
 				break;
 			case REPLACE_CHAR:
e12b2f1d
 				RBUF_APPEND(dest, &se->replace[r].u.c, 1);
8ed93c23
 				break;
 			case REPLACE_URI:
 				if (msg->first_line.type!=SIP_REQUEST){
af8bbc2e
 					LM_CRIT("uri substitution on a reply\n");
8ed93c23
 					break; /* ignore, we can continue */
 				}
 				uri= (msg->new_uri.s)?(&msg->new_uri):
 					(&msg->first_line.u.request.uri);
e12b2f1d
 				RBUF_APPEND(dest, uri->s, uri->len);
 				break;
 			case REPLACE_SPEC:
 				if(pv_get_spec_value(msg, &se->replace[r].u.spec, &sv)!=0) {
af8bbc2e
 					LM_ERR("item substitution returned error\n");
e12b2f1d
 					break; /* ignore, we can continue */
 				}
 				RBUF_APPEND(dest, sv.rs.s, sv.rs.len);
8ed93c23
 				break;
 			default:
af8bbc2e
 				LM_CRIT("unknown type %d\n", se->replace[r].type);
8ed93c23
 				/* ignore it */
 		}
 	}
e12b2f1d
 	RBUF_APPEND(dest, p, end-p);
 	rpl->len = dest - rbuf;
 	if ((rpl->s = pkg_malloc(rpl->len)) == NULL) {
af8bbc2e
 		LM_ERR("Out of pkg memory\n");
e12b2f1d
 		goto error;
 	}
 	memcpy(rpl->s, rbuf, rpl->len);
8ed93c23
 	return 0;
 error:
 	return -1;
 }
 
 
 
 /* WARNING: input must be 0 terminated! */
4e8f053e
 /* returns: 0 if no match or error, or subst result; if count!=0
  *           it will be set to 0 (no match), the number of matches
  *           or -1 (error).
  */
d1b976d9
 struct replace_lst* subst_run(struct subst_expr* se, const char* input,
4e8f053e
 								struct sip_msg* msg, int* count)
8ed93c23
 {
 	struct replace_lst *head;
 	struct replace_lst **crt;
d1b976d9
 	const char *p;
8ed93c23
 	int r;
 	regmatch_t* pmatch;
 	int nmatch;
267af5f1
 	int eflags;
4e8f053e
 	int cnt;
8ed93c23
 	
 	
 	/* init */
 	head=0;
4e8f053e
 	cnt=0;
8ed93c23
 	crt=&head;
 	p=input;
 	nmatch=se->max_pmatch+1;
 	/* no of () referenced + 1 for the whole string: pmatch[0] */
 	pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
 	if (pmatch==0){
af8bbc2e
 		LM_ERR("out of mem\n");
8ed93c23
 		goto error;
 	}
267af5f1
 	eflags=0;
8ed93c23
 	do{
267af5f1
 		r=regexec(se->re, p, nmatch, pmatch, eflags);
508fe14d
 		LM_DBG("running. r=%d\n", r);
8ed93c23
 		/* subst */
cc087c64
 		if (r==0){ /* != REG_NOMATCH */
b23b15b9
 			if (pmatch[0].rm_so==-1) {
af8bbc2e
 				LM_ERR("Unknown offset?\n");
b23b15b9
 				goto error;
 			}
 			if (pmatch[0].rm_so==pmatch[0].rm_eo) {
af8bbc2e
 				LM_ERR("Matched string is empty, invalid regexp?\n");
b23b15b9
 				goto error;
 			}
8ed93c23
 			*crt=pkg_malloc(sizeof(struct replace_lst));
 			if (*crt==0){
af8bbc2e
 				LM_ERR("out of mem\n");
8ed93c23
 				goto error;
 			}
e41bdf13
 			memset(*crt, 0, sizeof(struct replace_lst));
8ed93c23
 			(*crt)->offset=pmatch[0].rm_so+(int)(p-input);
 			(*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
508fe14d
 			LM_DBG("matched (%d, %d): [%.*s]\n",
267af5f1
 					(*crt)->offset, (*crt)->size, 
 					(*crt)->size, input+(*crt)->offset);
8ed93c23
 			/* create subst. string */
 			/* construct the string from replace[] */
 			if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
 				goto error;
 			}
 			crt=&((*crt)->next);
 			p+=pmatch[0].rm_eo;
b23b15b9
 			if (*(p-1) == '\n' || *(p-1) == '\r') eflags&=~REG_NOTBOL;
 			else eflags|=REG_NOTBOL;
4e8f053e
 			cnt++;
8ed93c23
 		}
cc087c64
 	}while((r==0) && se->replace_all);
8ed93c23
 	pkg_free(pmatch);
4e8f053e
 	if (count)*count=cnt;
8ed93c23
 	return head;
 error:
 	if (head) replace_lst_free(head);
 	if (pmatch) pkg_free(pmatch);
4e8f053e
 	if (count) *count=-1;
8ed93c23
 	return 0;
 }
 
 
 
627b66ef
 /* returns the substitution result in a str, input must be 0 term
4e8f053e
  *  0 on no match or malloc error
  *  if count is non zero it will be set to the number of matches, or -1
  *   if error 
  */ 
 str* subst_str(const char *input, struct sip_msg* msg, struct subst_expr* se,
 				int* count)
8ed93c23
 {
 	str* res;
 	struct replace_lst *lst;
 	struct replace_lst* l;
 	int len;
 	int size;
d1b976d9
 	const char* p;
8ed93c23
 	char* dest;
d1b976d9
 	const char* end;
8ed93c23
 	
 	
 	/* compute the len */
 	len=strlen(input);
 	end=input+len;
4e8f053e
 	lst=subst_run(se, input, msg, count);
627b66ef
 	if (lst==0){
508fe14d
 		LM_DBG("no match\n");
627b66ef
 		return 0;
 	}
00aee3fb
 	for (l=lst; l; l=l->next)
8ed93c23
 		len+=(int)(l->rpl.len)-l->size;
 	res=pkg_malloc(sizeof(str));
 	if (res==0){
af8bbc2e
 		LM_ERR("mem. allocation error\n");
8ed93c23
 		goto error;
 	}
627b66ef
 	res->s=pkg_malloc(len+1); /* space for null termination */
8ed93c23
 	if (res->s==0){
af8bbc2e
 		LM_ERR("mem. allocation error (res->s)\n");
8ed93c23
 		goto error;
 	}
ad781d24
 	res->s[len]=0;
8ed93c23
 	res->len=len;
 	
 	/* replace */
 	dest=res->s;
 	p=input;
 	for(l=lst; l; l=l->next){
 		size=l->offset+input-p;
627b66ef
 		memcpy(dest, p, size); /* copy till offset */
 		p+=size + l->size; /* skip l->size bytes */
8ed93c23
 		dest+=size;
 		if (l->rpl.len){
 			memcpy(dest, l->rpl.s, l->rpl.len);
 			dest+=l->rpl.len;
 		}
 	}
 	memcpy(dest, p, end-p);
 	if(lst) replace_lst_free(lst);
 	return res;
 error:
 	if (lst) replace_lst_free(lst);
 	if (res){
 		if (res->s) pkg_free(res->s);
 		pkg_free(res);
 	}
4e8f053e
 	if (count) *count=-1;
8ed93c23
 	return 0;
 }