Browse code

modules/tm, modules_k/pua: Fix for concurrency issue in PUA module

- It is quite possible for the mandatory NOTIFY request sent by a
presence server on establishment of a SUBSCRIBE dialog to
over-take (either on the wire or within Kamailio) the 2xx response
to the SUBSCRIBE. When this happens Kamailio outputs an error
message and does not update the rls_presentity table.

- The change to the tm module is to make t_request_outside take the
same arguments (and exhibit mostly the same behaviour) as t_request.
This is safe to do as a search of the code-base has shown that
t_request_outside was not actually used anywhere. The difference
between t_request and t_request_outside is that t_request frees the
dialog structure it creates, whereas t_request_outside leaves the
dialog structure so that the caller can use it to find things like
the Call-ID and local tag generated for the request.

The hash table implementation in pua has been modified to enable
temporary dialogs to be found (new function
get_temporary_dialog()). A temporary dialog contains the minimal
information that was available when the SUBSCRIBE request was sent.
Temporary dialogs are replaced with proper ones when a 2xx response
is received. The delete_htable() function has been updated so that
it can delete both full and temporary dialogs.

pua.c has been modified to fix a bug in db_update() - n_query_cols
was being incorrectly decremented in a certain case within a double
loop. db_update() has also been changed to cope with needing to
insert temporary (and therefore not fully filled in) dialogs into
the database.

send_subscribe.c has been modified to create temporary dialogs
whenever an initial SUBSCRIBE is sent. The SUBSCRIBE callback
function searches for and removes any temporary dialogs relating
to the transaction before it returns. In normal (non error
handling behaviour) temporary dialogs are not removed until after
a full dialog has been created and added to the hash table.

pd authored on 11/08/2011 16:28:19
Showing 6 changed files
... ...
@@ -707,7 +707,7 @@ int req_within(uac_req_t *uac_r)
707 707
  * Send an initial request that will start a dialog
708 708
  * WARNING: writes uac_r->dialog
709 709
  */
710
-int req_outside(uac_req_t *uac_r, str* to, str* from)
710
+int req_outside(uac_req_t *uac_r, str* ruri, str* to, str* from, str *next_hop)
711 711
 {
712 712
 	str callid, fromtag;
713 713
 
... ...
@@ -721,6 +721,15 @@ int req_outside(uac_req_t *uac_r, str* to, str* from)
721 721
 		goto err;
722 722
 	}
723 723
 
724
+	if (ruri) {
725
+		uac_r->dialog->rem_target.s = ruri->s;
726
+		uac_r->dialog->rem_target.len = ruri->len;
727
+		/* hooks will be set from w_calculate_hooks */
728
+	}
729
+
730
+	if (next_hop) uac_r->dialog->dst_uri = *next_hop;
731
+	w_calculate_hooks(uac_r->dialog);
732
+
724 733
 	return t_uac(uac_r);
725 734
 
726 735
  err:
... ...
@@ -83,7 +83,7 @@ extern int goto_on_local_req;
83 83
  * Function prototypes
84 84
  */
85 85
 typedef int (*reqwith_t)(uac_req_t *uac_r);
86
-typedef int (*reqout_t)(uac_req_t *uac_r, str* to, str* from);
86
+typedef int (*reqout_t)(uac_req_t *uac_r, str* ruri, str* to, str* from, str *next_hop);
87 87
 typedef int (*req_t)(uac_req_t *uac_r, str* ruri, str* to, str* from, str *next_hop);
88 88
 typedef int (*t_uac_t)(uac_req_t *uac_r);
89 89
 typedef int (*t_uac_with_ids_t)(uac_req_t *uac_r,
... ...
@@ -128,7 +128,7 @@ int req_within(uac_req_t *uac_r);
128 128
 /*
129 129
  * Send an initial request that will start a dialog
130 130
  */
131
-int req_outside(uac_req_t *uac_r, str* to, str* from);
131
+int req_outside(uac_req_t *uac_r, str* ruri, str* to, str* from, str* next_hop);
132 132
 
133 133
 
134 134
 #ifdef WITH_AS_SUPPORT
... ...
@@ -244,28 +244,31 @@ void insert_htable(ua_pres_t* presentity)
244 244
 
245 245
 }
246 246
 
247
+/* This function used to perform a search to find the hash table
248
+   entry that matches the presentity it is passed.  However,
249
+   everywhere it is used it is passed a pointer to the correct
250
+   hash table entry already...  so let's just delete that */
247 251
 void delete_htable(ua_pres_t* presentity, unsigned int hash_code)
248 252
 { 
249
-	ua_pres_t* p= NULL, *q= NULL;
253
+	ua_pres_t *q = NULL;
250 254
 
251
-	p= search_htable(presentity, hash_code);
252
-	if(p== NULL)
255
+	if (presentity == NULL)
253 256
 		return;
254 257
 
255
-	q=HashT->p_records[hash_code].entity;
258
+	q = HashT->p_records[hash_code].entity;
256 259
 
257
-	while(q->next!=p)
258
-		q= q->next;
259
-	q->next=p->next;
260
+	while (q->next != presentity)
261
+		q = q->next;
262
+	q->next = presentity->next;
260 263
 	
261
-	if(p->etag.s)
262
-		shm_free(p->etag.s);
264
+	if(presentity->etag.s)
265
+		shm_free(presentity->etag.s);
263 266
 	else
264
-		if(p->remote_contact.s)
265
-			shm_free(p->remote_contact.s);
267
+		if(presentity->remote_contact.s)
268
+			shm_free(presentity->remote_contact.s);
266 269
 
267
-	shm_free(p);
268
-	p= NULL;
270
+	shm_free(presentity);
271
+	presentity = NULL;
269 272
 
270 273
 }
271 274
 	
... ...
@@ -323,7 +326,7 @@ ua_pres_t* get_dialog(ua_pres_t* dialog, unsigned int hash_code)
323 326
 			if((p->pres_uri->len== dialog->pres_uri->len) &&
324 327
 				(strncmp(p->pres_uri->s, dialog->pres_uri->s,p->pres_uri->len)==0)&&
325 328
 				(p->watcher_uri->len== dialog->watcher_uri->len) &&
326
- 	    		(strncmp(p->watcher_uri->s,dialog->watcher_uri->s,p->watcher_uri->len )==0)&&
329
+				(strncmp(p->watcher_uri->s,dialog->watcher_uri->s,p->watcher_uri->len )==0)&&
327 330
 				(strncmp(p->call_id.s, dialog->call_id.s, p->call_id.len)== 0) &&
328 331
 				(strncmp(p->to_tag.s, dialog->to_tag.s, p->to_tag.len)== 0) &&
329 332
 				(strncmp(p->from_tag.s, dialog->from_tag.s, p->from_tag.len)== 0) )
... ...
@@ -338,6 +341,39 @@ ua_pres_t* get_dialog(ua_pres_t* dialog, unsigned int hash_code)
338 341
 	return p;
339 342
 }
340 343
 
344
+/* must lock the record line before calling this function*/
345
+ua_pres_t* get_temporary_dialog(ua_pres_t* dialog, unsigned int hash_code)
346
+{
347
+	ua_pres_t* p= NULL, *L;
348
+	LM_DBG("core_hash= %u\n", hash_code);
349
+
350
+	L= HashT->p_records[hash_code].entity;
351
+	for(p= L->next; p; p=p->next)
352
+	{
353
+		LM_DBG("pres_uri= %.*s\twatcher_uri=%.*s\n\t"
354
+				"callid= %.*s\tfrom_tag= %.*s\n",
355
+			p->pres_uri->len, p->pres_uri->s, p->watcher_uri->len,
356
+			p->watcher_uri->s,p->call_id.len, p->call_id.s,
357
+			p->from_tag.len, p->from_tag.s);
358
+
359
+		if((p->pres_uri->len== dialog->pres_uri->len) &&
360
+			(strncmp(p->pres_uri->s, dialog->pres_uri->s,p->pres_uri->len)==0)&&
361
+			(p->watcher_uri->len== dialog->watcher_uri->len) &&
362
+			(strncmp(p->watcher_uri->s,dialog->watcher_uri->s,p->watcher_uri->len )==0)&&
363
+			(p->call_id.len == dialog->call_id.len) &&
364
+			(strncmp(p->call_id.s, dialog->call_id.s, p->call_id.len)== 0) &&
365
+			(p->from_tag.len == dialog->from_tag.len) &&
366
+			(strncmp(p->from_tag.s, dialog->from_tag.s, p->from_tag.len)== 0) &&
367
+			p->to_tag.len == 0)
368
+			{
369
+				LM_DBG("FOUND temporary dialog\n");
370
+				break;
371
+			}
372
+	}
373
+
374
+	return p;
375
+}
376
+
341 377
 int get_record_id(ua_pres_t* dialog, str** rec_id)
342 378
 {
343 379
 	unsigned int hash_code;
... ...
@@ -352,9 +388,14 @@ int get_record_id(ua_pres_t* dialog, str** rec_id)
352 388
 	rec= get_dialog(dialog, hash_code);
353 389
 	if(rec== NULL)
354 390
 	{
355
-		LM_DBG("Record not found\n");
356
-		lock_release(&HashT->p_records[hash_code].lock);
357
-		return 0;
391
+		LM_DBG("Record not found - looking for temporary\n");
392
+		rec = get_temporary_dialog(dialog, hash_code);
393
+		if (rec == NULL)
394
+		{
395
+			LM_DBG("Temporary record not found\n");
396
+			lock_release(&HashT->p_records[hash_code].lock);
397
+			return 0;
398
+		}
358 399
 	}
359 400
 	id= (str*)pkg_malloc(sizeof(str));
360 401
 	if(id== NULL)
... ...
@@ -125,6 +125,7 @@ void destroy_htable(void);
125 125
 int is_dialog(ua_pres_t* dialog);
126 126
 
127 127
 ua_pres_t* get_dialog(ua_pres_t* dialog, unsigned int hash_code);
128
+ua_pres_t* get_temporary_dialog(ua_pres_t* dialog, unsigned int hash_code);
128 129
 
129 130
 int get_record_id(ua_pres_t* dialog, str** rec_id);
130 131
 typedef int (*get_record_id_t)(ua_pres_t* dialog, str** rec_id);
... ...
@@ -749,14 +749,14 @@ static void db_update(unsigned int ticks,void *param)
749 749
 	db_key_t db_cols[5];
750 750
 	db_val_t q_vals[20], db_vals[5];
751 751
 	db_op_t  db_ops[1] ;
752
-	int n_query_cols= 0, n_query_update= 0;
752
+	int n_query_cols= 0, n_query_update= 0, n_actual_query_cols= 0;
753 753
 	int n_update_cols= 0;
754 754
 	int i;
755 755
 	int puri_col,pid_col,expires_col,flag_col,etag_col,tuple_col,event_col;
756 756
 	int watcher_col,callid_col,totag_col,fromtag_col,record_route_col,cseq_col;
757 757
 	int no_lock= 0, contact_col, desired_expires_col, extra_headers_col;
758 758
 	int remote_contact_col, version_col;
759
-	
759
+
760 760
 	if(ticks== 0 && param == NULL)
761 761
 		no_lock= 1;
762 762
 
... ...
@@ -765,7 +765,7 @@ static void db_update(unsigned int ticks,void *param)
765 765
 	q_vals[puri_col].type = DB1_STR;
766 766
 	q_vals[puri_col].nul = 0;
767 767
 	n_query_cols++;
768
-	
768
+
769 769
 	q_cols[pid_col= n_query_cols] = &str_pres_id_col;	
770 770
 	q_vals[pid_col].type = DB1_STR;
771 771
 	q_vals[pid_col].nul = 0;
... ...
@@ -1003,21 +1003,43 @@ static void db_update(unsigned int ticks,void *param)
1003 1003
 					q_vals[puri_col].val.str_val = *(p->pres_uri);
1004 1004
 					q_vals[pid_col].val.str_val = p->id;
1005 1005
 					q_vals[flag_col].val.int_val = p->flag;
1006
-					if((p->watcher_uri))
1007
-						q_vals[watcher_col].val.str_val = *(p->watcher_uri);
1008
-					else
1009
-						memset(& q_vals[watcher_col].val.str_val ,0, sizeof(str));
1010
-					q_vals[tuple_col].val.str_val = p->tuple_id;
1011
-					q_vals[etag_col].val.str_val = p->etag;
1012 1006
 					q_vals[callid_col].val.str_val = p->call_id;
1013
-					q_vals[totag_col].val.str_val = p->to_tag;
1014 1007
 					q_vals[fromtag_col].val.str_val = p->from_tag;
1015 1008
 					q_vals[cseq_col].val.int_val= p->cseq;
1016 1009
 					q_vals[expires_col].val.int_val = p->expires;
1017 1010
 					q_vals[desired_expires_col].val.int_val = p->desired_expires;
1018 1011
 					q_vals[event_col].val.int_val = p->event;
1019 1012
 					q_vals[version_col].val.int_val = p->version;
1020
-					
1013
+
1014
+					if((p->watcher_uri))
1015
+						q_vals[watcher_col].val.str_val = *(p->watcher_uri);
1016
+					else
1017
+						memset(& q_vals[watcher_col].val.str_val ,0, sizeof(str));
1018
+
1019
+					if(p->tuple_id.s == NULL)
1020
+					{
1021
+						q_vals[tuple_col].val.str_val.s="";
1022
+						q_vals[tuple_col].val.str_val.len=0;
1023
+					}
1024
+					else
1025
+						q_vals[tuple_col].val.str_val = p->tuple_id;
1026
+
1027
+					if(p->etag.s == NULL)
1028
+					{
1029
+						q_vals[etag_col].val.str_val.s="";
1030
+						q_vals[etag_col].val.str_val.len=0;
1031
+					}
1032
+					else
1033
+						q_vals[etag_col].val.str_val = p->etag;
1034
+
1035
+					if (p->to_tag.s == NULL)
1036
+					{
1037
+						q_vals[totag_col].val.str_val.s="";
1038
+						q_vals[totag_col].val.str_val.len=0;
1039
+					}
1040
+					else
1041
+						q_vals[totag_col].val.str_val = p->to_tag;
1042
+
1021 1043
 					if(p->record_route.s== NULL)
1022 1044
 					{
1023 1045
 						q_vals[record_route_col].val.str_val.s= "";
... ...
@@ -1025,8 +1047,15 @@ static void db_update(unsigned int ticks,void *param)
1025 1047
 					}
1026 1048
 					else
1027 1049
 						q_vals[record_route_col].val.str_val = p->record_route;
1028
-					
1029
-					q_vals[contact_col].val.str_val = p->contact;
1050
+
1051
+					if(p->contact.s == NULL)
1052
+					{
1053
+						q_vals[contact_col].val.str_val.s = "";
1054
+						q_vals[contact_col].val.str_val.len = 0;
1055
+					}
1056
+					else
1057
+						q_vals[contact_col].val.str_val = p->contact;
1058
+
1030 1059
 					if(p->remote_contact.s)
1031 1060
 					{
1032 1061
 						q_vals[remote_contact_col].val.str_val = p->remote_contact;
... ...
@@ -1039,11 +1068,14 @@ static void db_update(unsigned int ticks,void *param)
1039 1068
 					}
1040 1069
 
1041 1070
 					if(p->extra_headers)
1071
+					{
1072
+						n_actual_query_cols = n_query_cols;
1042 1073
 						q_vals[extra_headers_col].val.str_val = *(p->extra_headers);
1074
+					}
1043 1075
 					else
1044
-						n_query_cols--;
1076
+						n_actual_query_cols = n_query_cols - 1;
1045 1077
 						
1046
-					if(pua_dbf.insert(pua_db, q_cols, q_vals,n_query_cols )<0)
1078
+					if(pua_dbf.insert(pua_db, q_cols, q_vals,n_actual_query_cols )<0)
1047 1079
 					{
1048 1080
 						LM_ERR("while inserting in db table pua\n");
1049 1081
 						if(!no_lock)
... ...
@@ -344,7 +344,6 @@ void subs_cback_func(struct cell *t, int cb_type, struct tmcb_params *ps)
344 344
 		hentity->call_id=  msg->callid->body;
345 345
 		hentity->to_tag= pto->tag_value;
346 346
 		hentity->from_tag= pfrom->tag_value;
347
-	
348 347
 	}
349 348
 
350 349
 	/* extract the other necesary information for inserting a new record */		
... ...
@@ -608,6 +607,12 @@ done:
608 607
 		run_pua_callbacks( hentity, msg);
609 608
 	}
610 609
 error:	
610
+	lock_get(&HashT->p_records[hash_code].lock);
611
+	presentity = get_temporary_dialog(hentity, hash_code);
612
+	if (presentity!=NULL)
613
+		delete_htable(presentity, hash_code);
614
+	lock_release(&HashT->p_records[hash_code].lock);
615
+
611 616
 	if(hentity)
612 617
 	{	
613 618
 		shm_free(hentity);
... ...
@@ -858,6 +863,7 @@ int send_subscribe(subs_info_t* subs)
858 863
 	
859 864
 	if(presentity== NULL )
860 865
 	{
866
+		int size;
861 867
 insert:
862 868
 		lock_release(&HashT->p_records[hash_code].lock); 
863 869
 		if(subs->flag & UPDATE_TYPE)
... ...
@@ -887,7 +893,7 @@ insert:
887 893
 
888 894
 		set_uac_req(&uac_r, &met, str_hdr, 0, 0, TMCB_LOCAL_COMPLETED,
889 895
 				subs_cback_func, (void*)hentity);
890
-		result= tmb.t_request
896
+		result= tmb.t_request_outside
891 897
 			(&uac_r,						  /* Type of the message */
892 898
 		subs->remote_target?subs->remote_target:subs->pres_uri,/* Request-URI*/
893 899
 			subs->pres_uri,				  /* To */
... ...
@@ -897,9 +903,74 @@ insert:
897 903
 		if(result< 0)
898 904
 		{
899 905
 			LM_ERR("while sending request with t_request\n");
906
+			if (uac_r.dialog != NULL)
907
+			{
908
+				uac_r.dialog->rem_target.s = 0;
909
+				uac_r.dialog->dst_uri.s = 0;
910
+				tmb.free_dlg(uac_r.dialog);
911
+				uac_r.dialog = 0;
912
+			}
900 913
 			shm_free(hentity);
901 914
 			goto  done;
902 915
 		}
916
+
917
+		/* Now create a temporary hash table entry.
918
+		   This is needed to deal with the race-hazard when NOTIFYs
919
+		   arrive before the 2xx response to the SUBSCRIBE. */
920
+		size = sizeof(ua_pres_t)+ 2 * sizeof(str) + (
921
+			subs->pres_uri->len +
922
+			subs->watcher_uri->len +
923
+			uac_r.dialog->id.loc_tag.len +
924
+			uac_r.dialog->id.call_id.len +
925
+			subs->id.len) * sizeof(char);
926
+
927
+		presentity= (ua_pres_t*)shm_malloc(size);
928
+		if(presentity== NULL)
929
+		{
930
+			LM_ERR("no more share memory\n");
931
+			goto done;
932
+		}
933
+		memset(presentity, 0, size);
934
+		size= sizeof(ua_pres_t);
935
+
936
+		presentity->pres_uri = (str *) ((char *) presentity + size);
937
+		size += sizeof(str);
938
+		presentity->pres_uri->s= (char *) presentity + size;
939
+		memcpy(presentity->pres_uri->s, subs->pres_uri->s, subs->pres_uri->len);
940
+		presentity->pres_uri->len= subs->pres_uri->len;
941
+		size+= subs->pres_uri->len;
942
+
943
+		presentity->watcher_uri= (str *) ((char *) presentity + size);
944
+		size += sizeof(str);
945
+		presentity->watcher_uri->s= (char *) presentity + size;
946
+		memcpy(presentity->watcher_uri->s, subs->watcher_uri->s, subs->watcher_uri->len);
947
+		presentity->watcher_uri->len = subs->watcher_uri->len;
948
+		size += subs->watcher_uri->len;
949
+
950
+		presentity->call_id.s = (char *) presentity + size;
951
+		memcpy(presentity->call_id.s, uac_r.dialog->id.call_id.s, uac_r.dialog->id.call_id.len);
952
+		presentity->call_id.len = uac_r.dialog->id.call_id.len;
953
+		size += uac_r.dialog->id.call_id.len;
954
+
955
+		presentity->from_tag.s = (char *) presentity + size;
956
+		memcpy(presentity->from_tag.s, uac_r.dialog->id.loc_tag.s, uac_r.dialog->id.loc_tag.len);
957
+		presentity->from_tag.len= uac_r.dialog->id.loc_tag.len;
958
+		size += uac_r.dialog->id.loc_tag.len;
959
+
960
+		presentity->id.s = (char *) presentity+ size;
961
+		memcpy(presentity->id.s, subs->id.s, subs->id.len);
962
+		presentity->id.len = subs->id.len;
963
+		size += subs->id.len;
964
+
965
+		/* Set the temporary record expiry for 2 * 64T1 seconds from now */
966
+		presentity->expires= (int)time(NULL) + 64;
967
+
968
+		insert_htable(presentity);
969
+
970
+		uac_r.dialog->rem_target.s = 0;
971
+		uac_r.dialog->dst_uri.s = 0;
972
+		tmb.free_dlg(uac_r.dialog);
973
+		uac_r.dialog = 0;
903 974
 	}
904 975
 	else
905 976
 	{