Browse code

- deadlock on no-detach (-DD) start-up error shutdown fixed (closes SER-210)

Andrei Pelinescu-Onciul authored on 18/01/2007 20:01:37
Showing 2 changed files
... ...
@@ -67,7 +67,7 @@ MAIN_NAME=ser
67 67
 VERSION = 0
68 68
 PATCHLEVEL = 10
69 69
 SUBLEVEL =   99
70
-EXTRAVERSION = -dev64
70
+EXTRAVERSION = -dev65
71 71
 
72 72
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
73 73
 			$(SUBLEVEL) )
... ...
@@ -68,6 +68,8 @@
68 68
  *               defined; improved exit kill timeout (andrei)
69 69
  *              init_childs(PROC_MAIN) before starting tcp_main, to allow
70 70
  *               tcp usage for module started processes (andrei)
71
+ * 2007-01-18  children shutdown procedure moved into shutdown_children;
72
+*               safer shutdown on start-up error (andrei)
71 73
  */
72 74
 
73 75
 
... ...
@@ -480,15 +482,20 @@ static void kill_all_children(int signum)
480 480
 
481 481
 	if (own_pgid) kill(0, signum);
482 482
 	else if (pt){
483
-		lock_get(process_lock);
483
+		 /* lock processes table only if this is a child process
484
+		  * (only main can add processes, so from main is safe not to lock
485
+		  *  and moreover it avoids the lock-holding suicidal children problem)
486
+		  */
487
+		if (!is_main) lock_get(process_lock); 
484 488
 		for (r=1; r<*process_count; r++){
489
+			if (r==process_no) continue; /* try not to be suicidal */
485 490
 			if (pt[r].pid) {
486 491
 				kill(pt[r].pid, signum);
487 492
 			}
488 493
 			else LOG(L_CRIT, "BUG: killing: %s > %d no pid!!!\n",
489 494
 							pt[r].desc, pt[r].pid);
490 495
 		}
491
-		lock_release(process_lock);
496
+		if (!is_main) lock_release(process_lock);
492 497
 	}
493 498
 }
494 499
 
... ...
@@ -517,6 +524,25 @@ static void sig_alarm_abort(int signo)
517 517
 
518 518
 
519 519
 
520
+static void shutdown_children(int sig, int show_status)
521
+{
522
+	kill_all_children(sig);
523
+	if (set_sig_h(SIGALRM, sig_alarm_kill) == SIG_ERR ) {
524
+		LOG(L_ERR, "ERROR: shutdown: could not install SIGALARM handler\n");
525
+		/* continue, the process will die anyway if no
526
+		 * alarm is installed which is exactly what we want */
527
+	}
528
+	alarm(ser_kill_timeout);
529
+	while((wait(0) > 0) || (errno==EINTR)); /* wait for all the 
530
+											   children to terminate*/
531
+	set_sig_h(SIGALRM, sig_alarm_abort);
532
+	cleanup(show_status); /* cleanup & show status*/
533
+	alarm(0);
534
+	set_sig_h(SIGALRM, SIG_IGN);
535
+}
536
+
537
+
538
+
520 539
 void handle_sigs()
521 540
 {
522 541
 	pid_t	chld;
... ...
@@ -537,20 +563,8 @@ void handle_sigs()
537 537
 				DBG("INT received, program terminates\n");
538 538
 			else
539 539
 				DBG("SIGTERM received, program terminates\n");
540
-
541
-			/* first of all, kill the children also */
542
-			kill_all_children(SIGTERM);
543
-			if (set_sig_h(SIGALRM, sig_alarm_kill) == SIG_ERR ) {
544
-				LOG(L_ERR, "ERROR: could not install SIGALARM handler\n");
545
-				/* continue, the process will die anyway if no
546
-				 * alarm is installed which is exactly what we want */
547
-			}
548
-			alarm(ser_kill_timeout);
549
-			/* Wait for all the children to die */
550
-			while((wait(0) > 0) || (errno==EINTR));
551
-			set_sig_h(SIGALRM, sig_alarm_abort);
552
-			cleanup(1); /* cleanup & show status*/
553
-			alarm(0);
540
+			/* shutdown/kill all the children */
541
+			shutdown_children(SIGTERM, 1);
554 542
 			dprint("Thank you for flying " NAME "\n");
555 543
 			exit(0);
556 544
 			break;
... ...
@@ -595,19 +609,7 @@ void handle_sigs()
595 595
 			LOG(L_INFO, "INFO: terminating due to SIGCHLD\n");
596 596
 #endif
597 597
 			/* exit */
598
-			kill_all_children(SIGTERM);
599
-			if (set_sig_h(SIGALRM, sig_alarm_kill) == SIG_ERR ) {
600
-				LOG(L_ERR, "ERROR: could not install SIGALARM handler\n");
601
-				/* continue, the process will die anyway if no
602
-				 * alarm is installed which is exactly what we want */
603
-			}
604
-			alarm(ser_kill_timeout);
605
-			while((wait(0) > 0) || (errno==EINTR)); /* wait for all the 
606
-													   children to terminate*/
607
-			set_sig_h(SIGALRM, sig_alarm_abort);
608
-			cleanup(1); /* cleanup & show status*/
609
-			alarm(0);
610
-			set_sig_h(SIGALRM, SIG_IGN);
598
+			shutdown_children(SIGTERM, 1);
611 599
 			DBG("terminating due to SIGCHLD\n");
612 600
 			exit(0);
613 601
 			break;
... ...
@@ -1081,8 +1083,8 @@ int main_loop()
1081 1081
 #endif
1082 1082
 
1083 1083
 	for(;;){
1084
-			pause();
1085 1084
 			handle_sigs();
1085
+			pause();
1086 1086
 	}
1087 1087
 
1088 1088
 
... ...
@@ -1578,16 +1580,13 @@ try_again:
1578 1578
 
1579 1579
 	ret=main_loop();
1580 1580
 	/*kill everything*/
1581
-	kill_all_children(SIGTERM);
1582
-	/*clean-up*/
1583
-	cleanup(0);
1581
+	if (is_main) shutdown_children(SIGTERM, 0);
1582
+	/* else terminate process */
1584 1583
 	return ret;
1585 1584
 
1586 1585
 error:
1587 1586
 	/*kill everything*/
1588
-	kill_all_children(SIGTERM);
1589
-	/*clean-up*/
1590
-	cleanup(0);
1587
+	if (is_main) shutdown_children(SIGTERM, 0);
1591 1588
 	return -1;
1592 1589
 
1593 1590
 }