/* 
 * $Id$
 *
 * regexp and regexp substitutions implementations
 * 
 * Copyright (C) 2001-2003 FhG Fokus
 *
 * This file is part of ser, a free SIP server.
 *
 * ser is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version
 *
 * For a license to use the ser software under conditions
 * other than those described here, or to purchase support for this
 * software, please contact iptel.org by e-mail at the following addresses:
 *    info@iptel.org
 *
 * ser is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License 
 * along with this program; if not, write to the Free Software 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *
 * History:
 * --------
 *   2003-08-04  created by andrei
 *   2004-11-12  minor api extension, added *count (andrei)
 */


#include "dprint.h"
#include "mem/mem.h"
#include "re.h"

#include <string.h>



void subst_expr_free(struct subst_expr* se)
{
	if (se->replacement.s) pkg_free(se->replacement.s);
	if (se->re) { regfree(se->re); pkg_free(se->re); };
	pkg_free(se);
}



/* frees the entire list, head (l) too */
void replace_lst_free(struct replace_lst* l)
{
	struct replace_lst* t;
	
	while (l){
		t=l;
		l=l->next;
		if (t->rpl.s) pkg_free(t->rpl.s);
		pkg_free(t);
	}
}



/* parse a /regular expression/replacement/flags into a subst_expr structure */
struct subst_expr* subst_parser(str* subst)
{
#define MAX_REPLACE_WITH 100
	char c;
	char* end;
	char* p;
	char* re;
	char* re_end;
	char* repl;
	char* repl_end;
	struct replace_with rw[MAX_REPLACE_WITH];
	int rw_no;
	int escape;
	int cflags; /* regcomp flags */
	int replace_all;
	struct subst_expr* se;
	regex_t* regex;
	int max_pmatch;
	int r;
	
	/* init */
	se=0;
	regex=0;
	cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
	replace_all=0;
	if (subst->len<3){
		LOG(L_ERR, "ERROR: subst_parser: expression is too short: %.*s\n",
				subst->len, subst->s);
		goto error;
	}
	
	p=subst->s;
	c=*p;
	if (c=='\\'){
		LOG(L_ERR, "ERROR: subst_parser: invalid separator char <%c>"
				" in %.*s\n", c, subst->len, subst->s);
		goto error;
	}
	p++;
	end=subst->s+subst->len;
	/* find re */
	re=p;
	for (;p<end;p++){
		/* if unescaped sep. char */
		if ((*p==c) && (*(p-1)!='\\')) goto found_re;
	}
	LOG(L_ERR, "ERROR: subst_parser: no separator found: %.*s\n", subst->len, 
			subst->s);
	goto error;
found_re:
	re_end=p;
	p++;
	/* parse replacement */
	repl=p;
	rw_no=0;
	max_pmatch=0;
	escape=0;
	for(;p<end; p++){
		if (escape){
			escape=0;
			switch (*p){
				/* special char escapes */
				case '\\':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_CHAR;
					rw[rw_no].u.c='\\';
					break;
				case 'n':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_CHAR;
					rw[rw_no].u.c='\n';
					break;
				case 'r':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_CHAR;
					rw[rw_no].u.c='\r';
					break;
				case 't':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_CHAR;
					rw[rw_no].u.c='\t';
					break;
				/* special sip msg parts escapes */
				case 'u':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_URI;
					break;
				/* re matches */
				case '0': /* allow 0, too, reference to the whole match */
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9':
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_NMATCH;
					rw[rw_no].u.nmatch=(*p)-'0';/* 0 is the whole matched str*/
					if (max_pmatch<rw[rw_no].u.nmatch) 
						max_pmatch=rw[rw_no].u.nmatch;
					break;
				default: /* just print current char */
					if (*p!=c){
						LOG(L_WARN, "subst_parser: WARNING: \\%c unknown"
								" escape in %.*s\n", *p, subst->len, subst->s);
					}
					rw[rw_no].size=2;
					rw[rw_no].offset=(p-1)-repl;
					rw[rw_no].type=REPLACE_CHAR;
					rw[rw_no].u.c=*p;
					break;
			}
			rw_no++;
			if (rw_no>=MAX_REPLACE_WITH){
				LOG(L_ERR, "ERROR: subst_parser: too many escapes in the"
							" replace part %.*s\n", subst->len, subst->s);
				goto error;
			}
		}else if (*p=='\\') escape=1;
		else  if (*p==c) goto found_repl;
	}
	LOG(L_ERR, "ERROR: subst_parser: missing separator: %.*s\n", subst->len, 
			subst->s);
	goto error;
found_repl:
	repl_end=p;
	p++;
	/* parse flags */
	for(;p<end; p++){
		switch(*p){
			case 'i':
				cflags|=REG_ICASE;
				break;
			case 's':
				cflags&=(~REG_NEWLINE);
				break;
			case 'g':
				replace_all=1;
				break;
			default:
				LOG(L_ERR, "ERROR: subst_parser: unknown flag %c in %.*s\n",
						*p, subst->len, subst->s);
				goto error;
		}
	}

	/* compile the re */
	if ((regex=pkg_malloc(sizeof(regex_t)))==0){
		LOG(L_ERR, "ERROR: subst_parser: out of memory (re)\n");
		goto error;
	}
	c=*re_end; /* regcomp expects null terminated strings -- save */
	*re_end=0;
	if (regcomp(regex, re, cflags)!=0){
		pkg_free(regex);
		regex=0;
		*re_end=c; /* restore */
		LOG(L_ERR, "ERROR: subst_parser: bad regular expression %.*s in "
				"%.*s\n", (int)(re_end-re), re, subst->len, subst->s);
		goto error;
	}
	*re_end=c; /* restore */
	/* construct the subst_expr structure */
	se=pkg_malloc(sizeof(struct subst_expr)+
					((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
		/* 1 replace_with structure is  already included in subst_expr */
	if (se==0){
		LOG(L_ERR, "ERROR: subst_parser: out of memory (subst_expr)\n");
		goto error;
	}
	memset((void*)se, 0, sizeof(struct subst_expr));
	se->replacement.len=repl_end-repl;
	if (se->replacement.len > 0) {
		if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
			LOG(L_ERR, "ERROR: subst_parser: out of memory (replacement)\n");
			goto error;
		}
		/* start copying */
		memcpy(se->replacement.s, repl, se->replacement.len);
	} else {
		se->replacement.s = NULL;
	}
	se->re=regex;
	se->replace_all=replace_all;
	se->n_escapes=rw_no;
	se->max_pmatch=max_pmatch;
	for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
	DBG("subst_parser: ok, se is %p\n", se);
	return se;
	
error:
	if (se) { subst_expr_free(se); regex=0; }
	if (regex) { regfree (regex); pkg_free(regex); }
	return 0;
}



static int replace_len(const char* match, int nmatch, regmatch_t* pmatch,
					struct subst_expr* se, struct sip_msg* msg)
{
	int r;
	int len;
	str* uri;
	
	len=se->replacement.len;
	for (r=0; r<se->n_escapes; r++){
		switch(se->replace[r].type){
			case REPLACE_NMATCH:
				len-=se->replace[r].size;
				if ((se->replace[r].u.nmatch<nmatch)&&(
						pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
						/* do the replace */
						len+=pmatch[se->replace[r].u.nmatch].rm_eo-
								pmatch[se->replace[r].u.nmatch].rm_so;
				};
				break;
			case REPLACE_CHAR:
				len-=(se->replace[r].size-1);
				break;
			case REPLACE_URI:
				len-=se->replace[r].size;
				if (msg->first_line.type!=SIP_REQUEST){
					LOG(L_CRIT, "BUG: replace_len: uri substitution on"
								" a reply\n");
					break; /* ignore, we can continue */
				}
				uri= (msg->new_uri.s)?(&msg->new_uri):
					(&msg->first_line.u.request.uri);
				len+=uri->len;
				break;
			default:
				LOG(L_CRIT, "BUG: replace_len: unknown type %d\n", 
						se->replace[r].type);
				/* ignore it */
		}
	}
	return len;
}



/* rpl.s will be alloc'ed with the proper size & rpl.len set
 * returns 0 on success, <0 on error*/
static int replace_build(const char* match, int nmatch, regmatch_t* pmatch,
					struct subst_expr* se, struct sip_msg* msg, str* rpl)
{
	int r;
	str* uri;
	char* p;
	char* dest;
	char* end;
	int size;
	
	rpl->len=replace_len(match, nmatch, pmatch, se, msg);
	if (rpl->len==0){
		rpl->s=0; /* empty string */
		return 0;
	}
	rpl->s=pkg_malloc(rpl->len);
	if (rpl->s==0){
		LOG(L_ERR, "ERROR: replace_build: out of mem (rpl)\n");
		goto error;
	}
	p=se->replacement.s;
	end=p+se->replacement.len;
	dest=rpl->s;
	for (r=0; r<se->n_escapes; r++){
		/* copy the unescaped parts */
		size=se->replacement.s+se->replace[r].offset-p;
		memcpy(dest, p, size);
		p+=size+se->replace[r].size;
		dest+=size;
		switch(se->replace[r].type){
			case REPLACE_NMATCH:
				if ((se->replace[r].u.nmatch<nmatch)&&(
						pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
						/* do the replace */
						size=pmatch[se->replace[r].u.nmatch].rm_eo-
								pmatch[se->replace[r].u.nmatch].rm_so;
						memcpy(dest, 
								match+pmatch[se->replace[r].u.nmatch].rm_so,
								size);
						dest+=size;
				};
				break;
			case REPLACE_CHAR:
				*dest=se->replace[r].u.c;
				dest++;
				break;
			case REPLACE_URI:
				if (msg->first_line.type!=SIP_REQUEST){
					LOG(L_CRIT, "BUG: replace_build: uri substitution on"
								" a reply\n");
					break; /* ignore, we can continue */
				}
				uri= (msg->new_uri.s)?(&msg->new_uri):
					(&msg->first_line.u.request.uri);
				memcpy(dest, uri->s, uri->len);
				dest+=uri->len;
				break;
			default:
				LOG(L_CRIT, "BUG: replace_build: unknown type %d\n", 
						se->replace[r].type);
				/* ignore it */
		}
	}
	memcpy(dest, p, end-p);
	return 0;
error:
	return -1;
}



/* WARNING: input must be 0 terminated! */
/* returns: 0 if no match or error, or subst result; if count!=0
 *           it will be set to 0 (no match), the number of matches
 *           or -1 (error).
 */
struct replace_lst* subst_run(struct subst_expr* se, const char* input,
								struct sip_msg* msg, int* count)
{
	struct replace_lst *head;
	struct replace_lst **crt;
	const char *p;
	int r;
	regmatch_t* pmatch;
	int nmatch;
	int eflags;
	int cnt;
	
	
	/* init */
	head=0;
	cnt=0;
	crt=&head;
	p=input;
	nmatch=se->max_pmatch+1;
	/* no of () referenced + 1 for the whole string: pmatch[0] */
	pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
	if (pmatch==0){
		LOG(L_ERR, "ERROR: subst_run_ out of mem. (pmatch)\n");
		goto error;
	}
	eflags=0;
	do{
		r=regexec(se->re, p, nmatch, pmatch, eflags);
		DBG("subst_run: running. r=%d\n", r);
		/* subst */
		if (r==0){ /* != REG_NOMATCH */
			/* change eflags, not to match any more at string start */
			eflags|=REG_NOTBOL;
			*crt=pkg_malloc(sizeof(struct replace_lst));
			if (*crt==0){
				LOG(L_ERR, "ERROR: subst_run: out of mem (crt)\n");
				goto error;
			}
			memset(*crt, 0, sizeof(struct replace_lst));
			if (pmatch[0].rm_so==-1){
				LOG(L_ERR, "ERROR: subst_run: unknown offset?\n");
				goto error;
			}
			(*crt)->offset=pmatch[0].rm_so+(int)(p-input);
			(*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
			DBG("subst_run: matched (%d, %d): [%.*s]\n",
					(*crt)->offset, (*crt)->size, 
					(*crt)->size, input+(*crt)->offset);
			/* create subst. string */
			/* construct the string from replace[] */
			if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
				goto error;
			}
			crt=&((*crt)->next);
			p+=pmatch[0].rm_eo;
			cnt++;
		}
	}while((r==0) && se->replace_all);
	pkg_free(pmatch);
	if (count)*count=cnt;
	return head;
error:
	if (head) replace_lst_free(head);
	if (pmatch) pkg_free(pmatch);
	if (count) *count=-1;
	return 0;
}



/* returns the substitution result in a str, input must be 0 term
 *  0 on no match or malloc error
 *  if count is non zero it will be set to the number of matches, or -1
 *   if error 
 */ 
str* subst_str(const char *input, struct sip_msg* msg, struct subst_expr* se,
				int* count)
{
	str* res;
	struct replace_lst *lst;
	struct replace_lst* l;
	int len;
	int size;
	const char* p;
	char* dest;
	const char* end;
	
	
	/* compute the len */
	len=strlen(input);
	end=input+len;
	lst=subst_run(se, input, msg, count);
	if (lst==0){
		DBG("subst_str: no match\n");
		return 0;
	}
	for (l=lst; l; l=l->next)
		len+=(int)(l->rpl.len)-l->size;
	res=pkg_malloc(sizeof(str));
	if (res==0){
		LOG(L_ERR, "ERROR: subst_str: mem. allocation error\n");
		goto error;
	}
	res->s=pkg_malloc(len+1); /* space for null termination */
	if (res->s==0){
		LOG(L_ERR, "ERROR: subst_str: mem. allocation error (res->s)\n");
		goto error;
	}
	res->s[len]=0;
	res->len=len;
	
	/* replace */
	dest=res->s;
	p=input;
	for(l=lst; l; l=l->next){
		size=l->offset+input-p;
		memcpy(dest, p, size); /* copy till offset */
		p+=size + l->size; /* skip l->size bytes */
		dest+=size;
		if (l->rpl.len){
			memcpy(dest, l->rpl.s, l->rpl.len);
			dest+=l->rpl.len;
		}
	}
	memcpy(dest, p, end-p);
	if(lst) replace_lst_free(lst);
	return res;
error:
	if (lst) replace_lst_free(lst);
	if (res){
		if (res->s) pkg_free(res->s);
		pkg_free(res);
	}
	if (count) *count=-1;
	return 0;
}