Browse code

- Fixed the following race condition:

Process_A sends a 487 over looback to the same ser instance
(this process did not set up the retransmission timer yet because
of the kernel context switch, see below)

-- kernel switches context now to process_B

Process_B wakes up to process the 487 and it sends an ACK
immediately (over loopback again)

-- kernel switches context again now to process_C

Process_C wakes up to process the ACK, it tries to find the original
transaction (the one which sent 487) and tries to remove the
transaction from the retransmission timer list (which has not been
set yet in Process_A)

The process finishes now.

-- kernel switches context back to Process_A

Now the process prints info that 487 was relayed and sets up
the retransmission timer

The process finishes

<after approximately 500ms>

Process_D (timer) wakes up and generates a 487 retransmission although
an ACK has already been received and processed

Jan Janak authored on 01/06/2005 15:14:51
Showing 2 changed files
... ...
@@ -1099,6 +1099,13 @@ enum rps relay_reply( struct cell *t, struct sip_msg *p_msg, int branch,
1099 1099
 
1100 1100
 	UNLOCK_REPLIES( t );
1101 1101
 
1102
+	     /* Set retransmission timer before the reply is sent out to avoid
1103
+	      * race conditions
1104
+	      */
1105
+	if (reply_status == RPS_COMPLETED) {
1106
+		set_final_timer(t);
1107
+	}
1108
+
1102 1109
 	/* send it now (from the private buffer) */
1103 1110
 	if (relay >= 0) {
1104 1111
 		SEND_PR_BUFFER( uas_rb, buf, res_len );
... ...
@@ -1270,14 +1277,14 @@ int reply_received( struct sip_msg  *p_msg )
1270 1270
 		 * on_reply processing, which may take very long, like if it
1271 1271
 		 * is attempted to establish a TCP connection to a fail-over dst */
1272 1272
 		
1273
-	if (t->flags & T_IS_INVITE_FLAG) {
1273
+	if (is_invite(t)) {
1274 1274
 		if (msg_status >= 300) {
1275 1275
 			ack = build_ack(p_msg, t, branch, &ack_len);
1276 1276
 			if (ack) {
1277 1277
 				SEND_PR_BUFFER(&uac->request, ack, ack_len);
1278 1278
 				shm_free(ack);
1279 1279
 			}
1280
-		} else if ((t->flags & T_IS_LOCAL_FLAG) && msg_status >= 200) {
1280
+		} else if (is_local(t) && msg_status >= 200) {
1281 1281
 			ack = build_local_ack(p_msg, t, branch, &ack_len, &next_hop);
1282 1282
 			if (ack) {
1283 1283
 				if (send_local_ack(p_msg, &next_hop, ack, ack_len) < 0) {
... ...
@@ -1304,25 +1311,33 @@ int reply_received( struct sip_msg  *p_msg )
1304 1304
 	LOCK_REPLIES( t );
1305 1305
 	if ( is_local(t) ) {
1306 1306
 		reply_status=local_reply( t, p_msg, branch, msg_status, &cancel_bitmap );
1307
+		if (reply_status == RPS_COMPLETED) {
1308
+			     /* no more UAC FR/RETR (if I received a 2xx, there may
1309
+			      * be still pending branches ...
1310
+			      */
1311
+			cleanup_uac_timers( t );	
1312
+			if (is_invite(t)) cancel_uacs( t, cancel_bitmap );
1313
+			     /* FR for negative INVITES, WAIT anything else */
1314
+			put_on_wait(t);
1315
+		}
1307 1316
 	} else {
1308 1317
 		reply_status=relay_reply( t, p_msg, branch, msg_status, 
1309 1318
 			&cancel_bitmap );
1319
+		if (reply_status == RPS_COMPLETED) {
1320
+			     /* no more UAC FR/RETR (if I received a 2xx, there may
1321
+				be still pending branches ...
1322
+			     */
1323
+			cleanup_uac_timers( t );	
1324
+			if (is_invite(t)) cancel_uacs( t, cancel_bitmap );
1325
+			     /* FR for negative INVITES, WAIT anything else */
1326
+			     /* set_final_timer(t) */
1327
+		}
1328
+
1310 1329
 	}
1311 1330
 
1312 1331
 	if (reply_status==RPS_ERROR)
1313 1332
 		goto done;
1314 1333
 
1315
-	/* clean-up the transaction when transaction completed */
1316
-	if (reply_status==RPS_COMPLETED) {
1317
-		/* no more UAC FR/RETR (if I received a 2xx, there may
1318
-		   be still pending branches ...
1319
-		*/
1320
-		cleanup_uac_timers( t );	
1321
-		if (is_invite(t)) cancel_uacs( t, cancel_bitmap );
1322
-		/* FR for negative INVITES, WAIT anything else */
1323
-		set_final_timer(  t );
1324
-	} 
1325
-
1326 1334
 	/* update FR/RETR timers on provisional replies */
1327 1335
 	if (msg_status<200 && ( restart_fr_on_each_reply ||
1328 1336
 				( (last_uac_status<msg_status) &&
... ...
@@ -246,30 +246,39 @@ static void fake_reply(struct cell *t, int branch, int code )
246 246
 	cancel_bitmap=do_cancel_branch ? 1<<branch : 0;
247 247
 	if ( is_local(t) ) {
248 248
 		reply_status=local_reply( t, FAKED_REPLY, branch, 
249
-			code, &cancel_bitmap );
249
+					  code, &cancel_bitmap );
250
+		if (reply_status == RPS_COMPLETED) {
251
+			put_on_wait(t);
252
+		}
250 253
 	} else {
251 254
 		reply_status=relay_reply( t, FAKED_REPLY, branch, code,
252
-			&cancel_bitmap );
255
+					  &cancel_bitmap );
256
+
257
+#ifdef 0
258
+		if (reply_status==RPS_COMPLETED) {
259
+			     /* don't need to cleanup uac_timers -- they were cleaned
260
+				branch by branch and this last branch's timers are
261
+				reset now too
262
+			     */
263
+			     /* don't need to issue cancels -- local cancels have been
264
+				issued branch by branch and this last branch was
265
+				canceled now too
266
+			     */
267
+			     /* then the only thing to do now is to put the transaction
268
+				on FR/wait state 
269
+			     */
270
+			     /*
271
+			       set_final_timer(  t );
272
+			     */
273
+		}
274
+#endif
275
+
253 276
 	}
254 277
 	/* now when out-of-lock do the cancel I/O */
255 278
 	if (do_cancel_branch) cancel_branch(t, branch );
256 279
 	/* it's cleaned up on error; if no error occurred and transaction
257 280
 	   completed regularly, I have to clean-up myself
258 281
 	*/
259
-	if (reply_status==RPS_COMPLETED) {
260
-		/* don't need to cleanup uac_timers -- they were cleaned
261
-		   branch by branch and this last branch's timers are
262
-		   reset now too
263
-		*/
264
-		/* don't need to issue cancels -- local cancels have been
265
-		   issued branch by branch and this last branch was
266
-		   canceled now too
267
-		*/
268
-		/* then the only thing to do now is to put the transaction
269
-		   on FR/wait state 
270
-		*/
271
-		set_final_timer(  t );
272
-	}
273 282
 }
274 283
 
275 284