Browse code

- log messages in children signal handlers off if NO_SIG_DEBUG is defined (safer, but not default for now) - new config param.: exit_timeout (how much time ser will wait for its shutdown to complete, when it expires it will kill everything) - missing timeout added to SIGTERM triggered shutdown

Andrei Pelinescu-Onciul authored on 25/10/2006 16:44:21
Showing 8 changed files
... ...
@@ -67,7 +67,7 @@ MAIN_NAME=ser
67 67
 VERSION = 0
68 68
 PATCHLEVEL = 10
69 69
 SUBLEVEL =   99
70
-EXTRAVERSION = -dev53-tm_fixes
70
+EXTRAVERSION = -dev54-tm_fixes
71 71
 
72 72
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
73 73
 			$(SUBLEVEL) )
... ...
@@ -353,6 +353,9 @@ endif
353 353
 #		Don't forget to set PROFILE (see below)
354 354
 # -DUSE_STUN
355 355
 #		compiles in stun support
356
+# -DNO_SIG_DEBUG
357
+#        turns off debugging messages in signal handlers (which might be 
358
+#         unsafe)
356 359
 
357 360
 # Sometimes is needes correct non-quoted $OS. HACK: gcc translates known OS to number ('linux'), so there is added underscore
358 361
 
... ...
@@ -383,7 +386,8 @@ DEFS+= $(extra_defs) \
383 383
 	 #-DUSE_SHM_MEM \
384 384
 	 #-DSTATS \
385 385
 	 #-DNO_LOG \
386
-	 #-DPROFILING
386
+	 #-DPROFILING \
387
+	 #-DNO_SIG_DEBUG
387 388
 
388 389
 #PROFILE=  -pg	# set this if you want profiling
389 390
 				# you may also want to set -DPROFILING
... ...
@@ -177,9 +177,14 @@ core:
177 177
    are available (see tm docs)
178 178
 - avps directly accessible from script with %avp_name (variable style)
179 179
 new config variables:
180
-   stun_refresh_interval = number in milisecond (default 0); value for attribute
181
-       REFRESH INTERVAL
182
-   stun_allow_stun = 0 | 1 (off | on - defaul 1); use STUN or not if compiled
180
+   exit_timeout = seconds - how much time ser will wait for all the shutdown
181
+       procedures to complete. If this time is exceeded, all the remaining
182
+       processes are immediately killed and ser exits immediately (it might
183
+       also generate a core dump if the cleanup part takes too long).
184
+       Default: 60 s. Use 0 to disable.
185
+   stun_refresh_interval = number in millisecond (default 0); value for 
186
+      attribute REFRESH INTERVAL
187
+   stun_allow_stun = 0 | 1 (off | on - default 1); use STUN or not if compiled
183 188
    stun_allow_fp = 0 | 1 (off | on - default 1); use FINGERPRINT attribute
184 189
    use_dns_cache = on | off  (default on)  
185 190
    use_dns_failover = on | off (default off)
... ...
@@ -290,6 +290,7 @@ OPEN_FD_LIMIT		"open_files_limit"
290 290
 MCAST_LOOPBACK		"mcast_loopback"
291 291
 MCAST_TTL		"mcast_ttl"
292 292
 TOS			"tos"
293
+KILL_TIMEOUT	"exit_timeout"|"ser_kill_timeout"
293 294
 
294 295
 /* stun config variables */
295 296
 STUN_REFRESH_INTERVAL "stun_refresh_interval"
... ...
@@ -532,6 +533,8 @@ EAT_ABLE	[\ \t\b\r]
532 532
 									return MCAST_TTL; }
533 533
 <INITIAL>{TOS}			{	count(); yylval.strval=yytext;
534 534
 									return TOS; }
535
+<INITIAL>{KILL_TIMEOUT}			{	count(); yylval.strval=yytext;
536
+									return KILL_TIMEOUT; }
535 537
 <INITIAL>{LOADMODULE}	{ count(); yylval.strval=yytext; return LOADMODULE; }
536 538
 <INITIAL>{MODPARAM}     { count(); yylval.strval=yytext; return MODPARAM; }
537 539
 
... ...
@@ -320,6 +320,7 @@ static struct socket_id* mk_listen_id(char*, int, int);
320 320
 %token MCAST_LOOPBACK
321 321
 %token MCAST_TTL
322 322
 %token TOS
323
+%token KILL_TIMEOUT
323 324
 
324 325
 %token FLAGS_DECL
325 326
 %token AVPFLAGS_DECL
... ...
@@ -869,6 +870,8 @@ assign_stm:
869 869
 	| MCAST_TTL EQUAL error { yyerror("number expected"); }
870 870
 	| TOS EQUAL NUMBER { tos=$3; }
871 871
 	| TOS EQUAL error { yyerror("number expected"); }
872
+	| KILL_TIMEOUT EQUAL NUMBER { ser_kill_timeout=$3; }
873
+	| KILL_TIMEOUT EQUAL error { yyerror("number expected"); }
872 874
 	| STUN_REFRESH_INTERVAL EQUAL NUMBER { 
873 875
 		#ifdef USE_STUN
874 876
 			stun_refresh_interval=$3;
... ...
@@ -180,7 +180,7 @@
180 180
 								+ 1 /*sep*/ + 8 /*int2hex*/ + \
181 181
 								1 /*extra space, needed by t_calc_branch*/)
182 182
 
183
-
183
+#define DEFAULT_SER_KILL_TIMEOUT 60 /* seconds */
184 184
 
185 185
 /* maximum path length */
186 186
 #define PATH_MAX_GUESS	1024
... ...
@@ -50,9 +50,15 @@ extern int log_facility;
50 50
 extern volatile int dprint_crit; /* protection against "simultaneous"
51 51
 									printing from signal handlers */
52 52
 
53
+#ifdef NO_SIG_DEBUG
54
+#define DPRINT_NON_CRIT		(1)
55
+#define DPRINT_CRIT_ENTER
56
+#define DPRINT_CRIT_EXIT
57
+#else
53 58
 #define DPRINT_NON_CRIT		(dprint_crit==0)
54 59
 #define DPRINT_CRIT_ENTER	(dprint_crit++)
55 60
 #define DPRINT_CRIT_EXIT	(dprint_crit--)
61
+#endif
56 62
 
57 63
 #define DPRINT_LEV	1
58 64
 /* priority at which we log */
... ...
@@ -166,6 +166,9 @@ extern str default_global_address;
166 166
 /* pre-ser ports */
167 167
 extern str default_global_port;
168 168
 
169
+/* how much time to allow for shutdown, before killing everything */
170
+int ser_kill_timeout;
171
+
169 172
 /* core dump and file limits */
170 173
 extern int disable_core_dump;
171 174
 extern int open_files_limit;
... ...
@@ -63,7 +63,9 @@
63 63
  *  2005-07-25  use sigaction for setting the signal handlers (andrei)
64 64
  *  2006-07-13  added dns cache/failover init. (andrei)
65 65
  *  2006-10-13  added global variables stun_refresh_interval, stun_allow_stun
66
- *              and stun_allow_fp (vlada)
66
+ *               and stun_allow_fp (vlada)
67
+ *  2006-10-25  don't log messages from signal hanlders if NO_SIG_DEBUG is
68
+ *               defined; improved exit kill timeout (andrei)
67 69
  */
68 70
 
69 71
 
... ...
@@ -146,6 +148,13 @@
146 146
 #endif
147 147
 #include "version.h"
148 148
 
149
+/* define SIG_DEBUG by default */
150
+#ifdef NO_SIG_DEBUG
151
+#undef SIG_DEBUG
152
+#else
153
+#define SIG_DEBUG
154
+#endif
155
+
149 156
 static char id[]="@(#) $Id$";
150 157
 static char* version=SER_FULL_VERSION;
151 158
 static char* flags=SER_COMPILE_FLAGS;
... ...
@@ -369,6 +378,9 @@ struct host_alias* aliases=0; /* name aliases list */
369 369
 /* Parameter to child_init */
370 370
 int child_rank = 0;
371 371
 
372
+/* how much to wait for children to terminate, before taking extreme measures*/
373
+int ser_kill_timeout=DEFAULT_SER_KILL_TIMEOUT;
374
+
372 375
 /* process_bm_t process_bit = 0; */
373 376
 #ifdef ROUTE_SRV
374 377
 #endif
... ...
@@ -523,11 +535,17 @@ void handle_sigs()
523 523
 
524 524
 			/* first of all, kill the children also */
525 525
 			kill_all_children(SIGTERM);
526
-
527
-			     /* Wait for all the children to die */
528
-			while(wait(0) > 0);
529
-
526
+			if (set_sig_h(SIGALRM, sig_alarm_kill) == SIG_ERR ) {
527
+				LOG(L_ERR, "ERROR: could not install SIGALARM handler\n");
528
+				/* continue, the process will die anyway if no
529
+				 * alarm is installed which is exactly what we want */
530
+			}
531
+			alarm(ser_kill_timeout);
532
+			/* Wait for all the children to die */
533
+			while((wait(0) > 0) || (errno==EINTR));
534
+			set_sig_h(SIGALRM, sig_alarm_abort);
530 535
 			cleanup(1); /* cleanup & show status*/
536
+			alarm(0);
531 537
 			dprint("Thank you for flying " NAME "\n");
532 538
 			exit(0);
533 539
 			break;
... ...
@@ -578,8 +596,9 @@ void handle_sigs()
578 578
 				/* continue, the process will die anyway if no
579 579
 				 * alarm is installed which is exactly what we want */
580 580
 			}
581
-			alarm(60); /* 1 minute close timeout */
582
-			while(wait(0) > 0); /* wait for all the children to terminate*/
581
+			alarm(ser_kill_timeout);
582
+			while((wait(0) > 0) || (errno==EINTR)); /* wait for all the 
583
+													   children to terminate*/
583 584
 			set_sig_h(SIGALRM, sig_alarm_abort);
584 585
 			cleanup(1); /* cleanup & show status*/
585 586
 			alarm(0);
... ...
@@ -621,16 +640,20 @@ static void sig_usr(int signo)
621 621
 		/* process the important signals */
622 622
 		switch(signo){
623 623
 			case SIGPIPE:
624
+#ifdef SIG_DEBUG /* signal unsafe stuff follows */
624 625
 					LOG(L_INFO, "INFO: signal %d received\n", signo);
626
+#endif
625 627
 				break;
626 628
 			case SIGINT:
627 629
 			case SIGTERM:
630
+#ifdef SIG_DEBUG /* signal unsafe stuff follows */
628 631
 					LOG(L_INFO, "INFO: signal %d received\n", signo);
629 632
 					/* print memory stats for non-main too */
630 633
 					#ifdef PKG_MALLOC
631 634
 					LOG(memlog, "Memory status (pkg):\n");
632 635
 					pkg_status();
633 636
 					#endif
637
+#endif
634 638
 					exit(0);
635 639
 					break;
636 640
 			case SIGUSR1:
... ...
@@ -642,11 +665,14 @@ static void sig_usr(int signo)
642 642
 					break;
643 643
 			case SIGCHLD:
644 644
 #ifndef 			STOP_JIRIS_CHANGES
645
+#ifdef SIG_DEBUG /* signal unsafe stuff follows */
645 646
 					DBG("SIGCHLD received: "
646 647
 						"we do not worry about grand-children\n");
648
+#endif
647 649
 #else
648 650
 					exit(0); /* terminate if one child died */
649 651
 #endif
652
+					break;
650 653
 		}
651 654
 	}
652 655
 }