... | ... |
@@ -49,7 +49,11 @@ |
49 | 49 |
* added support for increasing the open files limit (andrei) |
50 | 50 |
* 2004-04-28 sock_{user,group,uid,gid,mode} added |
51 | 51 |
* user2uid() & user2gid() added (andrei) |
52 |
- * |
|
52 |
+ * 2004-09-11 added timeout on children shutdown and final cleanup |
|
53 |
+ * (if it takes more than 60s => something is definitely wrong |
|
54 |
+ * => kill all or abort) (andrei) |
|
55 |
+ * force a shm_unlock before cleaning-up, in case we have a |
|
56 |
+ * crashed childvwhich still holds the lock (andrei) |
|
53 | 57 |
*/ |
54 | 58 |
|
55 | 59 |
|
... | ... |
@@ -419,6 +423,9 @@ char* pgid_file = 0; |
419 | 423 |
void cleanup(show_status) |
420 | 424 |
{ |
421 | 425 |
/*clean-up*/ |
426 |
+ shm_unlock(); /* hack: force-unlock the shared memory lock in case |
|
427 |
+ some process crashed and let it locked; this will |
|
428 |
+ allow an almost gracious shutdown */ |
|
422 | 429 |
destroy_modules(); |
423 | 430 |
#ifdef USE_TCP |
424 | 431 |
destroy_tcp(); |
... | ... |
@@ -451,7 +458,6 @@ void cleanup(show_status) |
451 | 458 |
} |
452 | 459 |
|
453 | 460 |
|
454 |
- |
|
455 | 461 |
/* tries to send a signal to all our processes |
456 | 462 |
* if daemonized is ok to send the signal to all the process group, |
457 | 463 |
* however if not daemonized we might end up sending the signal also |
... | ... |
@@ -475,6 +481,29 @@ static void kill_all_children(int signum) |
475 | 481 |
|
476 | 482 |
|
477 | 483 |
|
484 |
+/* if this handler is called, a critical timeout has occured while |
|
485 |
+ * waiting for the children to finish => we should kill everything and exit */ |
|
486 |
+static void sig_alarm_kill(int signo) |
|
487 |
+{ |
|
488 |
+ kill_all_children(SIGKILL); /* this will kill the whole group |
|
489 |
+ including "this" process; |
|
490 |
+ for debugging replace with SIGABRT |
|
491 |
+ (but warning: it might generate lots |
|
492 |
+ of cores) */ |
|
493 |
+} |
|
494 |
+ |
|
495 |
+ |
|
496 |
+/* like sig_alarm_kill, but the timeout has occured when cleaning up |
|
497 |
+ * => try to leave a core for future diagnostics */ |
|
498 |
+static void sig_alarm_abort(int signo) |
|
499 |
+{ |
|
500 |
+ /* LOG is not signal safe, but who cares, we are abort-ing anyway :-) */ |
|
501 |
+ LOG(L_CRIT, "BUG: shutdown timeout triggered, dying..."); |
|
502 |
+ abort(); |
|
503 |
+} |
|
504 |
+ |
|
505 |
+ |
|
506 |
+ |
|
478 | 507 |
void handle_sigs() |
479 | 508 |
{ |
480 | 509 |
pid_t chld; |
... | ... |
@@ -548,8 +577,17 @@ void handle_sigs() |
548 | 577 |
#endif |
549 | 578 |
/* exit */ |
550 | 579 |
kill_all_children(SIGTERM); |
580 |
+ if (signal(SIGALRM, sig_alarm_kill) == SIG_ERR ) { |
|
581 |
+ LOG(L_ERR, "ERROR: could not install SIGALARM handler\n"); |
|
582 |
+ /* continue, the process will die anyway if no |
|
583 |
+ * alarm is installed which is exactly what we want */ |
|
584 |
+ } |
|
585 |
+ alarm(60); /* 1 minute close timeout */ |
|
551 | 586 |
while(wait(0) > 0); /* wait for all the children to terminate*/ |
587 |
+ signal(SIGALRM, sig_alarm_abort); |
|
552 | 588 |
cleanup(1); /* cleanup & show status*/ |
589 |
+ alarm(0); |
|
590 |
+ signal(SIGALRM, SIG_IGN); |
|
553 | 591 |
DBG("terminating due to SIGCHLD\n"); |
554 | 592 |
exit(0); |
555 | 593 |
break; |