parser/parse_content.c
ec147e2e
 /*
  * $Id$
  *
  *
c32feee5
  * Copyright (C) 2001-2003 FhG Fokus
ec147e2e
  *
  * This file is part of ser, a free SIP server.
  *
  * ser is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version
  *
  * For a license to use the ser software under conditions
  * other than those described here, or to purchase support for this
  * software, please contact iptel.org by e-mail at the following addresses:
  *    info@iptel.org
  *
  * ser is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License 
  * along with this program; if not, write to the Free Software 
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2f43c732
  *
  * History:
  * 2003-08-04 parse_content_type_hdr separates type from subtype inside
  * the mime type (bogdan)
  * 2003-08-04 CPL subtype added (bogdan)
  * 2003-08-05 parse_accept_hdr function added (bogdan)
ec147e2e
  */
 
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
 #include <unistd.h>
2f43c732
 #include "../mem/mem.h"
ec147e2e
 #include "../dprint.h"
 #include "../str.h"
 #include "../ut.h"
 #include "parse_content.h"
 
2f43c732
 
 #define is_mime_char(_c_) \
4d329576
 	(isalpha((int)_c_) || (_c_)=='-' || (_c_)=='+' || (_c_)=='.')
2f43c732
 #define is_char_equal(_c_,_cs_) \
23af40c8
 	( (isalpha((int)_c_)?(((_c_)|0x20)==(_cs_)):((_c_)==(_cs_)))==1 )
2f43c732
 
 
695ff13f
 /*
  * Node of the type's tree; this tree contains all the known types;
  */
ec147e2e
 typedef struct type_node_s {
695ff13f
 	char c;                      /* char contained by this node */
2f43c732
 	unsigned char final;         /* says what mime type/subtype was detected
 	                              * if string ends at this node */
695ff13f
 	unsigned char nr_sons;       /* the number of sub-nodes */
 	int next;                    /* the next sibling node */
ec147e2e
 }type_node_t;
 
 
2f43c732
 static type_node_t type_tree[] = {
4d329576
 	{'t',TYPE_UNKNOWN,1,4}, /* 0 */
2f43c732
 		{'e',TYPE_UNKNOWN,1,-1},
 			{'x',TYPE_UNKNOWN,1,-1},
 				{'t',TYPE_TEXT,0,-1},
4d329576
 	{'m',TYPE_UNKNOWN,2,19}, /* 4 */
 		{'e',TYPE_UNKNOWN,1,11}, /* 5 */
2f43c732
 			{'s',TYPE_UNKNOWN,1,-1},
 				{'s',TYPE_UNKNOWN,1,-1},
 					{'a',TYPE_UNKNOWN,1,-1},
 						{'g',TYPE_UNKNOWN,1,-1},
 							{'e',TYPE_MESSAGE,0,-1},
4d329576
 		{'u',TYPE_UNKNOWN,1,-1}, /* 11 */
 			{'l',TYPE_UNKNOWN,1,-1},
 				{'t',TYPE_UNKNOWN,1,-1},
 					{'i',TYPE_UNKNOWN,1,-1},
 						{'p',TYPE_UNKNOWN,1,-1},
 							{'a',TYPE_UNKNOWN,1,-1},
 								{'r',TYPE_UNKNOWN,1,-1},
 									{'t',TYPE_MULTIPART,0,-1},
 	{'a',TYPE_UNKNOWN,1,-1}, /* 19 */
2f43c732
 		{'p',TYPE_UNKNOWN,1,-1},
 			{'p',TYPE_UNKNOWN,1,-1},
 				{'l',TYPE_UNKNOWN,1,-1},
 					{'i',TYPE_UNKNOWN,1,-1},
 						{'c',TYPE_UNKNOWN,1,-1},
 							{'a',TYPE_UNKNOWN,1,-1},
 								{'t',TYPE_UNKNOWN,1,-1},
 									{'i',TYPE_UNKNOWN,1,-1},
 										{'o',TYPE_UNKNOWN,1,-1},
ac892f2a
 											{'n',TYPE_APPLICATION,0,-1},
2f43c732
 	};
 
 static type_node_t subtype_tree[] = {
ac892f2a
 	{'p',SUBTYPE_UNKNOWN,2,13},
 		{'l',SUBTYPE_UNKNOWN,1,5},
 			{'a',SUBTYPE_UNKNOWN,1,-1},
 				{'i',SUBTYPE_UNKNOWN,1,-1},
 					{'n',SUBTYPE_PLAIN,0,-1},
4d329576
 		{'i',SUBTYPE_UNKNOWN,1,-1}, /* 5 */
ac892f2a
 			{'d',SUBTYPE_UNKNOWN,1,-1},
 				{'f',SUBTYPE_UNKNOWN,1,-1},
4d329576
 					{'+',TYPE_UNKNOWN,1,-1},
 						{'x',TYPE_UNKNOWN,1,-1},
 							{'m',TYPE_UNKNOWN,1,-1},
 								{'l',SUBTYPE_PIDFXML,0,-1},
ac892f2a
 									{'l',SUBTYPE_PIDFXML,0,-1},
4d329576
 	{'s',SUBTYPE_UNKNOWN,1,16}, /* 13 */
ac892f2a
 		{'d',SUBTYPE_UNKNOWN,1,-1},
 			{'p',SUBTYPE_SDP,0,-1},
4526254b
 	{'c',SUBTYPE_UNKNOWN,1,34}, /* 16 */
ac892f2a
 		{'p',SUBTYPE_UNKNOWN,2,-1},
4526254b
 			{'i',SUBTYPE_UNKNOWN,1,29},
 				{'m',SUBTYPE_CPIM,1,-1},
 					{'-',SUBTYPE_UNKNOWN,1,-1},
 						{'p',SUBTYPE_UNKNOWN,1,-1},
 							{'i',SUBTYPE_UNKNOWN,1,-1},
 								{'d',SUBTYPE_UNKNOWN,1,-1},
 									{'f',SUBTYPE_UNKNOWN,1,-1},
 										{'+',SUBTYPE_UNKNOWN,1,-1},
 											{'x',SUBTYPE_UNKNOWN,1,-1},
 												{'m',SUBTYPE_UNKNOWN,1,-1},
 													{'l',SUBTYPE_CPIM_PIDFXML,0,-1},
 			{'l',SUBTYPE_UNKNOWN,1,-1}, /* 29 */
ac892f2a
 				{'+',TYPE_UNKNOWN,1,-1},
 					{'x',TYPE_UNKNOWN,1,-1},
 						{'m',TYPE_UNKNOWN,1,-1},
 							{'l',SUBTYPE_CPLXML,0,-1},
4526254b
 	{'r',SUBTYPE_UNKNOWN,2,48}, /* 34 */
 		{'l',SUBTYPE_UNKNOWN,1,42},/* 35 */
ac892f2a
 			{'m',SUBTYPE_UNKNOWN,1,-1},
 				{'i',SUBTYPE_UNKNOWN,1,-1},
6885a474
 					{'+',TYPE_UNKNOWN,1,-1},
 						{'x',TYPE_UNKNOWN,1,-1},
 							{'m',TYPE_UNKNOWN,1,-1},
ac892f2a
 								{'l',SUBTYPE_RLMIXML,0,-1},
4526254b
 		{'e',SUBTYPE_UNKNOWN,1,-1}, /* 42 */
ac892f2a
 			{'l',SUBTYPE_UNKNOWN,1,-1},
 				{'a',SUBTYPE_UNKNOWN,1,-1},
 					{'t',SUBTYPE_UNKNOWN,1,-1},
 						{'e',SUBTYPE_UNKNOWN,1,-1},
 							{'d',SUBTYPE_RELATED,0,-1},
4526254b
 	{'l',SUBTYPE_UNKNOWN,1,57}, /* 48 */
ac892f2a
 		{'p',SUBTYPE_UNKNOWN,1,-1},
 			{'i',SUBTYPE_UNKNOWN,1,-1},
 				{'d',SUBTYPE_UNKNOWN,1,-1},
 					{'f',SUBTYPE_UNKNOWN,1,-1},
4d329576
 						{'+',SUBTYPE_UNKNOWN,1,-1},
 							{'x',SUBTYPE_UNKNOWN,1,-1},
 								{'m',SUBTYPE_UNKNOWN,1,-1},
 									{'l',SUBTYPE_LPIDFXML,0,-1},
4526254b
 	{'w',SUBTYPE_UNKNOWN,1,72}, /* 57 */
ac892f2a
 		{'a',SUBTYPE_UNKNOWN,1,-1},
 			{'t',SUBTYPE_UNKNOWN,1,-1},
 				{'c',SUBTYPE_UNKNOWN,1,-1},
 					{'h',SUBTYPE_UNKNOWN,1,-1},
 						{'e',SUBTYPE_UNKNOWN,1,-1},
 							{'r',SUBTYPE_UNKNOWN,1,-1},
 								{'i',TYPE_UNKNOWN,1,-1},
 									{'n',TYPE_UNKNOWN,1,-1},
 										{'f',TYPE_UNKNOWN,1,-1},
 											{'o',TYPE_UNKNOWN,1,-1},
 												{'+',TYPE_UNKNOWN,1,-1},
 													{'x',TYPE_UNKNOWN,1,-1},
 														{'m',TYPE_UNKNOWN,1,-1},
 															{'l',SUBTYPE_WATCHERINFOXML,0,-1},
4526254b
 	{'x',SUBTYPE_UNKNOWN,2,94}, /* 72 */
 		{'p',SUBTYPE_UNKNOWN,1,81}, /* 73 */
ac892f2a
 			{'i',SUBTYPE_UNKNOWN,1,-1},
 				{'d',SUBTYPE_UNKNOWN,1,-1},
 					{'f',SUBTYPE_UNKNOWN,1,-1},
4d329576
 						{'+',SUBTYPE_UNKNOWN,1,-1},
 							{'x',SUBTYPE_UNKNOWN,1,-1},
 								{'m',SUBTYPE_UNKNOWN,1,-1},
 									{'l',SUBTYPE_XPIDFXML,0,-1},
4526254b
 		{'m',SUBTYPE_UNKNOWN,1,-1}, /* 81 */
4d329576
 			{'l',SUBTYPE_UNKNOWN,1,-1},
 				{'+',SUBTYPE_UNKNOWN,1,-1},
 					{'m',SUBTYPE_UNKNOWN,1,-1},
 						{'s',SUBTYPE_UNKNOWN,1,-1},
 							{'r',SUBTYPE_UNKNOWN,1,-1},
 								{'t',SUBTYPE_UNKNOWN,1,-1},
 									{'c',SUBTYPE_UNKNOWN,1,-1},
 										{'.',SUBTYPE_UNKNOWN,1,-1},
 											{'p',SUBTYPE_UNKNOWN,1,-1},
 												{'i',SUBTYPE_UNKNOWN,1,-1}, 
 													{'d',SUBTYPE_UNKNOWN,1,-1},
 														{'f',SUBTYPE_XML_MSRTC_PIDF,0,-1},
4526254b
 	{'e',SUBTYPE_UNKNOWN,1,-1}, /* 94 */
ac892f2a
 		{'x',SUBTYPE_UNKNOWN,1,-1},
 			{'t',SUBTYPE_UNKNOWN,1,-1},
 				{'e',SUBTYPE_UNKNOWN,1,-1},
 					{'r',SUBTYPE_UNKNOWN,1,-1},
 						{'n',SUBTYPE_UNKNOWN,1,-1},
4d329576
 							{'a',SUBTYPE_UNKNOWN,1,-1},
 								{'l',SUBTYPE_UNKNOWN,1,-1},
 									{'-',SUBTYPE_UNKNOWN,1,-1},
 										{'b',SUBTYPE_UNKNOWN,1,-1},
 											{'o',SUBTYPE_UNKNOWN,1,-1},
 												{'d',SUBTYPE_UNKNOWN,1,-1},
 													{'y',SUBTYPE_EXTERNAL_BODY,0,-1},
ac892f2a
 
2f43c732
 	};
 
695ff13f
 
 
ec147e2e
 char* parse_content_length( char* buffer, char* end, int* length)
 {
 	int number;
 	char *p;
 	int  size;
 
 	p = buffer;
 	/* search the begining of the number */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 	if (p==end)
 		goto error;
 	/* parse the number */
 	size = 0;
 	number = 0;
 	while (p<end && *p>='0' && *p<='9') {
 		number = number*10 + (*p)-'0';
 		size ++;
 		p++;
 	}
 	if (p==end || size==0)
 		goto error;
 	/* now we should have only spaces at the end */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 	if (p==end)
 		goto error;
 	/* the header ends proper? */
 	if ( (*(p++)!='\n') && (*(p-1)!='\r' || *(p++)!='\n' ) )
 		goto error;
 
 	*length = number;
 	return p;
 error:
 	LOG(L_ERR,"ERROR:parse_content_length: parse error near char [%d][%c]\n",
 		*p,*p);
 	return 0;
 }
 
 
 
2f43c732
 char* decode_mime_type(char *start, char *end, unsigned int *mime_type)
ec147e2e
 {
 	int node;
695ff13f
 	char *mark;
2f43c732
 	char *p;
695ff13f
 
2f43c732
 	p = start;
ec147e2e
 
 	/* search the begining of the type */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 	if (p==end)
 		goto error;
 
 	/* parse the type */
2f43c732
 	if (*p=='*') {
 		*mime_type = TYPE_ALL<<16;
ec147e2e
 		p++;
2f43c732
 	} else {
 		node = 0;
 		mark = p;
 		while (p<end && is_mime_char(*p)  ) {
 			while ( node!=-1 && !is_char_equal(*p,type_tree[node].c) ){
 				node = type_tree[node].next;
 			}
 			if (node!=-1 && type_tree[node].nr_sons)
4526254b
 				node++; 
 				/* ? increment only for (p < end - 1), 
 				 * otherwise will not work for final nodes with children */
2f43c732
 			p++;
 		}
 		if (p==end || mark==p)
 			goto error;
 		if (node!=-1)
 			*mime_type = type_tree[node].final<<16;
 		else
 			*mime_type = TYPE_UNKNOWN<<16;
ec147e2e
 	}
 
 	/* search the '/' separator */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 	if ( p==end || *(p++)!='/')
 		goto error;
 
 	/* search the begining of the sub-type */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 	if (p==end)
 		goto error;
 
 	/* parse the sub-type */
2f43c732
 	if (*p=='*') {
 		*mime_type |= SUBTYPE_ALL;
ec147e2e
 		p++;
2f43c732
 	} else {
 		node = 0;
 		mark = p;
 		while (p<end && is_mime_char(*p) ) {
 			while(node!=-1 && !is_char_equal(*p,subtype_tree[node].c) )
 				node = subtype_tree[node].next;
4526254b
 			if (node!=-1 && subtype_tree[node].nr_sons && (p < end - 1))
2f43c732
 				node++;
 			p++;
 		}
 		if (p==mark)
 			goto error;
 		if (node!=-1)
 			*mime_type |= subtype_tree[node].final;
 		else
 			*mime_type |= SUBTYPE_UNKNOWN;
ec147e2e
 	}
 
 	/* now its possible to have some spaces */
 	while ( p<end && (*p==' ' || *p=='\t' ||
 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
 		p++;
 
2f43c732
 	/* if there are params, ignore them!! -> eat everything to
 	 * the end or to the first ',' */
 	if ( p<end && *p==';' )
 		for(p++; p<end && *p!=','; p++);
 
 	/* is this the correct end? */
 	if (p!=end && *p!=',' )
 		goto error;
 
 	/* check the format of the decoded mime */
 	if ((*mime_type)>>16==TYPE_ALL && ((*mime_type)&0x00ff)!=SUBTYPE_ALL) {
4d329576
 		LOG(L_ERR,"ERROR:decode_mime_type: invalid mime format found "
256c108b
 			" <*/submime> in [%.*s]!!\n", (int)(end-start),start);
2f43c732
 		return 0;
 	}
 
 	return p;
 error:
4d329576
 	LOG(L_ERR,"ERROR:decode_mime_type: parse error near in [%.*s] char"
256c108b
 		"[%d][%c] offset=%d\n", (int)(end-start),start,*p,*p,(int)(p-start));
2f43c732
 	return 0;
 }
 
 
 
 /* returns: > 0 mime found
  *          = 0 hdr not found
  *          =-1 error */
 int parse_content_type_hdr( struct sip_msg *msg )
 {
 	char *end;
 	char *ret;
1e3474c8
 	unsigned int  mime;
2f43c732
 
 	/* is the header already found? */
 	if ( msg->content_type==0 ) {
 		/* if not, found it */
5c28a534
 		if ( parse_headers(msg, HDR_CONTENTTYPE_F, 0)==-1)
2f43c732
 			goto error;
 		if ( msg->content_type==0 ) {
 			DBG("DEBUG:parse_content_type_hdr: missing Content-Type"
 				"header\n");
 			return 0;
 		}
 	}
 
 	/* maybe the header is already parsed! */
 	if ( msg->content_type->parsed!=0)
 		return get_content_type(msg);
 
 	/* it seams we have to parse it! :-( */
 	end = msg->content_type->body.s + msg->content_type->body.len;
 	ret = decode_mime_type(msg->content_type->body.s, end , &mime);
 	if (ret==0)
ec147e2e
 		goto error;
2f43c732
 	if (ret!=end) {
c32feee5
 		LOG(L_ERR,"ERROR:parse_content_type_hdr: CONTENT_TYPE hdr contains "
2f43c732
 			"more then one mime type :-(!\n");
 		goto error;
 	}
 	if ((mime&0x00ff)==SUBTYPE_ALL || (mime>>16)==TYPE_ALL) {
 		LOG(L_ERR,"ERROR:parse_content_type_hdr: invalid mime with wildcard "
 			"'*' in Content-Type hdr!\n");
 		goto error;
 	}
ec147e2e
 
1e3474c8
 	msg->content_type->parsed = (void*)(unsigned long)mime;
695ff13f
 	return mime;
2f43c732
 
ec147e2e
 error:
695ff13f
 	return -1;
ec147e2e
 }
 
 
 
2f43c732
 /* returns: > 0 ok
  *          = 0 hdr not found
  *          = -1 error */
 int parse_accept_hdr( struct sip_msg *msg )
 {
1e3474c8
 	static unsigned int mimes[MAX_MIMES_NR];
2f43c732
 	int nr_mimes;
1e3474c8
 	unsigned int mime;
2f43c732
 	char *end;
 	char *ret;
 
 	/* is the header already found? */
 	if ( msg->accept==0 ) {
 		/* if not, found it */
5c28a534
 		if ( parse_headers(msg, HDR_ACCEPT_F, 0)==-1)
2f43c732
 			goto error;
 		if ( msg->accept==0 ) {
 			DBG("DEBUG:parse_accept_hdr: missing Accept header\n");
 			return 0;
 		}
 	}
 
 	/* maybe the header is already parsed! */
 	if ( msg->accept->parsed!=0)
 		return 1;
 
 	/* it seams we have to parse it! :-( */
 	ret = msg->accept->body.s;
 	end = ret + msg->accept->body.len;
 	nr_mimes = 0;
 	while (1){
 		ret = decode_mime_type(ret, end , &mime);
 		if (ret==0)
 			goto error;
 		/* a new mime was found  -> put it into array */
 		if (nr_mimes==MAX_MIMES_NR) {
 			LOG(L_ERR,"ERROR:parse_accept_hdr: Accept hdr contains more than"
c32feee5
 				" %d mime type -> buffer overflow!!\n",MAX_MIMES_NR);
2f43c732
 			goto error;
 		}
 		mimes[nr_mimes++] = mime;
 		/* is another mime following? */
 		if (ret==end )
 			break;
 		/* parse the mime separator ',' */
 		if (*ret!=',' || ret+1==end) {
 			LOG(L_ERR,"ERROR:parse_accept_hdr: parse error between mimes at "
 				"char <%x> (offset=%d) in <%.*s>!\n",
256c108b
 				*ret, (int)(ret-msg->accept->body.s),
 				msg->accept->body.len, msg->accept->body.s);
2f43c732
 			goto error;
 		}
 		/* skip the ',' */
 		ret++;
 	}
 
 	/* copy and link the mime buffer into the message */
 	msg->accept->parsed = (void*)pkg_malloc((nr_mimes+1)*sizeof(int));
 	if (msg->accept->parsed==0) {
 		LOG(L_ERR,"ERROR:parse_accept_hdr: no more pkg memory\n");
 		goto error;
 	}
 	memcpy(msg->accept->parsed,mimes,nr_mimes*sizeof(int));
 	/* make the buffer null terminated */
 	((int*)msg->accept->parsed)[nr_mimes] = 0;
 
 	return 1;
 error:
 	return -1;
 }