Browse code

- tcp fixes: tcpconn_timeout(); expire now timeout; switched to "raw" ticks (the conversion to s was plagued by truncation errors => in some corner cases one could end up with tcp connections that never expire) - if tcp_con_lifetime is set to a negative value, use the maximum possible value instead

Andrei Pelinescu-Onciul authored on 04/11/2006 15:55:08
Showing 2 changed files
... ...
@@ -67,7 +67,7 @@ MAIN_NAME=ser
67 67
 VERSION = 0
68 68
 PATCHLEVEL = 10
69 69
 SUBLEVEL =   99
70
-EXTRAVERSION = -dev56-tm_fixes
70
+EXTRAVERSION = -dev57-tm_fixes
71 71
 
72 72
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
73 73
 			$(SUBLEVEL) )
... ...
@@ -71,6 +71,8 @@
71 71
  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72 72
  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
73 73
  *               (andrei)
74
+ *  2006-11-04  switched to raw ticks (to fix conversion errors which could
75
+ *               result in inf. lifetime) (andrei)
74 76
  */
75 77
 
76 78
 
... ...
@@ -147,11 +149,15 @@
147 147
 #define SEND_FD_QUEUE_TIMEOUT	MS_TO_TICKS(2000)  /* 2 s */
148 148
 #endif
149 149
 
150
+/* maximum accepted lifetime (maximum possible is  ~ MAXINT/2) */
151
+#define MAX_TCP_CON_LIFETIME	(1U<<(sizeof(ticks_t)*8-1))
152
+/* minimum interval tcpconn_timeout() is allowed to run, in ticks */
153
+#define TCPCONN_TIMEOUT_MIN_RUN S_TO_TICKS(1)  /* once per s */
150 154
 
151 155
 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
152 156
 				F_TCPCONN, F_TCPCHILD, F_PROC };
153 157
 
154
-
158
+static int is_tcp_main=0;
155 159
 
156 160
 int tcp_accept_aliases=0; /* by default don't accept aliases */
157 161
 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
... ...
@@ -441,7 +447,7 @@ struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
441 441
 	{
442 442
 		c->type=PROTO_TCP;
443 443
 		c->rcv.proto=PROTO_TCP;
444
-		c->timeout=get_ticks()+tcp_con_lifetime;
444
+		c->timeout=get_ticks_raw()+tcp_con_lifetime;
445 445
 	}
446 446
 	c->flags|=F_CONN_REMOVED;
447 447
 	
... ...
@@ -522,6 +528,8 @@ error:
522 522
 
523 523
 
524 524
 
525
+/* adds a tcp connection to the tcpconn hashes
526
+ * Note: it's called _only_ from the tcp_main process */
525 527
 struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
526 528
 {
527 529
 
... ...
@@ -627,14 +635,14 @@ struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port)
627 627
 
628 628
 /* _tcpconn_find with locks and timeout */
629 629
 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
630
-									int timeout)
630
+									ticks_t timeout)
631 631
 {
632 632
 	struct tcp_connection* c;
633 633
 	TCPCONN_LOCK;
634 634
 	c=_tcpconn_find(id, ip, port);
635 635
 	if (c){ 
636 636
 			atomic_inc(&c->refcnt);
637
-			c->timeout=get_ticks()+timeout;
637
+			c->timeout=get_ticks_raw()+timeout;
638 638
 	}
639 639
 	TCPCONN_UNLOCK;
640 640
 	return c;
... ...
@@ -818,7 +826,7 @@ send_it:
818 818
 		LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
819 819
 		/* error on the connection , mark it as bad and set 0 timeout */
820 820
 		c->state=S_CONN_BAD;
821
-		c->timeout=0;
821
+		c->timeout=get_ticks_raw();
822 822
 		/* tell "main" it should drop this (optional it will t/o anyway?)*/
823 823
 		response[0]=(long)c;
824 824
 		response[1]=CONN_ERROR;
... ...
@@ -937,7 +945,9 @@ error:
937 937
 
938 938
 
939 939
 
940
-/* used internally by tcp_main_loop() */
940
+/* used internally by tcp_main_loop()
941
+ * tries to destroy a tcp connection (if it cannot it will force a timeout)
942
+ * Note: it's called _only_ from the tcp_main process */
941 943
 static void tcpconn_destroy(struct tcp_connection* tcpconn)
942 944
 {
943 945
 	int fd;
... ...
@@ -957,7 +967,7 @@ static void tcpconn_destroy(struct tcp_connection* tcpconn)
957 957
 		(*tcp_connections_no)--;
958 958
 	}else{
959 959
 		/* force timeout */
960
-		tcpconn->timeout=0;
960
+		tcpconn->timeout=get_ticks_raw();
961 961
 		tcpconn->state=S_CONN_BAD;
962 962
 		DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
963 963
 				tcpconn, tcpconn->flags);
... ...
@@ -1182,7 +1192,7 @@ inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1182 1182
 				break;
1183 1183
 			}
1184 1184
 			/* update the timeout*/
1185
-			tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1185
+			tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1186 1186
 			tcpconn_put(tcpconn);
1187 1187
 			/* must be after the de-ref*/
1188 1188
 			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
... ...
@@ -1322,7 +1332,7 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
1322 1322
 			/* add tcpconn to the list*/
1323 1323
 			tcpconn_add(tcpconn);
1324 1324
 			/* update the timeout*/
1325
-			tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1325
+			tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1326 1326
 			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1327 1327
 			tcpconn->flags&=~F_CONN_REMOVED;
1328 1328
 			break;
... ...
@@ -1519,16 +1529,6 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1519 1519
 	if (send2child(tcpconn)<0){
1520 1520
 		LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1521 1521
 		tcpconn_destroy(tcpconn);
1522
-#if 0
1523
-		TCPCONN_LOCK;
1524
-		tcpconn->refcnt--;
1525
-		if (tcpconn->refcnt==0){
1526
-			fd=tcpconn->s;
1527
-			_tcpconn_rm(tcpconn);
1528
-			close(fd);
1529
-		}else tcpconn->timeout=0; /* force expire*/
1530
-		TCPCONN_UNLOCK;
1531
-#endif
1532 1522
 	}
1533 1523
 	return 0; /* we are not interested in possibly queued io events, 
1534 1524
 				 the fd was either passed to a child, or closed */
... ...
@@ -1584,18 +1584,22 @@ error:
1584 1584
 
1585 1585
 /* very inefficient for now - FIXME
1586 1586
  * keep in sync with tcpconn_destroy, the "delete" part should be
1587
- * the same except for io_watch_del..*/
1587
+ * the same except for io_watch_del..
1588
+ * Note: this function is called only from the tcp_main process with 1 
1589
+ * exception: on shutdown it's called also by the main ser process via
1590
+ * cleanup() => with the ser shutdown exception, it cannot execute in parallel
1591
+ * with tcpconn_add() or tcpconn_destroy()*/
1588 1592
 static inline void tcpconn_timeout(int force)
1589 1593
 {
1590
-	static int prev_ticks=0;
1594
+	static ticks_t prev_ticks=0;
1591 1595
 	struct tcp_connection *c, *next;
1592
-	unsigned int ticks;
1596
+	ticks_t ticks;
1593 1597
 	unsigned h;
1594 1598
 	int fd;
1595 1599
 	
1596 1600
 	
1597
-	ticks=get_ticks();
1598
-	if ((ticks==prev_ticks) && !force) return;
1601
+	ticks=get_ticks_raw();
1602
+	if (((ticks-prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN) && !force) return;
1599 1603
 	prev_ticks=ticks;
1600 1604
 	TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1601 1605
 	for(h=0; h<TCP_ID_HASH_SIZE; h++){
... ...
@@ -1603,21 +1607,27 @@ static inline void tcpconn_timeout(int force)
1603 1603
 		while(c){
1604 1604
 			next=c->id_next;
1605 1605
 			if (force ||((atomic_get(&c->refcnt)==0) &&
1606
-						((int)(ticks-c->timeout)>=0))){
1606
+						((s_ticks_t)(ticks-c->timeout)>=0))){
1607 1607
 				if (!force)
1608 1608
 					DBG("tcpconn_timeout: timeout for hash=%d - %p"
1609 1609
 							" (%d > %d)\n", h, c, ticks, c->timeout);
1610
-				fd=c->s;
1610
+				if (c->s>0 && is_tcp_main){
1611
+					/* we cannot close or remove the fd if we are not in the
1612
+					 * tcp main proc.*/
1613
+					fd=c->s;
1614
+					if (!(c->flags & F_CONN_REMOVED)){
1615
+						io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1616
+						c->flags|=F_CONN_REMOVED;
1617
+					}
1618
+				}else{
1619
+					fd=-1;
1620
+				}
1611 1621
 #ifdef USE_TLS
1612 1622
 				if (c->type==PROTO_TLS)
1613 1623
 					tls_close(c, fd);
1614 1624
 #endif
1615 1625
 				_tcpconn_rm(c);
1616
-				if ((fd>0)&&(atomic_get(&c->refcnt)==0)) {
1617
-					if (!(c->flags & F_CONN_REMOVED)){
1618
-						io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1619
-						c->flags|=F_CONN_REMOVED;
1620
-					}
1626
+				if (fd>0) {
1621 1627
 					close(fd);
1622 1628
 				}
1623 1629
 				(*tcp_connections_no)--;
... ...
@@ -1637,6 +1647,8 @@ void tcp_main_loop()
1637 1637
 	struct socket_info* si;
1638 1638
 	int r;
1639 1639
 	
1640
+	is_tcp_main=1; /* mark this process as tcp main */
1641
+	
1640 1642
 	/* init send fd queues (here because we want mem. alloc only in the tcp
1641 1643
 	 *  process */
1642 1644
 #ifdef SEND_FD_QUEUE
... ...
@@ -1648,12 +1660,11 @@ void tcp_main_loop()
1648 1648
 	/* init io_wait (here because we want the memory allocated only in
1649 1649
 	 * the tcp_main process) */
1650 1650
 	
1651
-	/* FIXME: TODO: make tcp_max_fd_no a config param */
1652 1651
 	if  (init_io_wait(&io_h, tcp_max_fd_no, tcp_poll_method)<0)
1653 1652
 		goto error;
1654 1653
 	/* init: start watching all the fds*/
1655 1654
 	
1656
-	/* add all the sockets we listens on for connections */
1655
+	/* add all the sockets we listen on for connections */
1657 1656
 	for (si=tcp_listen; si; si=si->next){
1658 1657
 		if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1659 1658
 			if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
... ...
@@ -1869,11 +1880,28 @@ int init_tcp()
1869 1869
 			TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
1870 1870
 	
1871 1871
 	/* fix config variables */
1872
-	/* they can have only positive values due the config parser so we can
1873
-	 * ignore most of them */
1872
+	if (tcp_connect_timeout<0)
1873
+		tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
1874
+	if (tcp_send_timeout<0)
1875
+		tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
1876
+	if (tcp_con_lifetime<0){
1877
+		/* set to max value (~ 1/2 MAX_INT) */
1878
+		tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
1879
+	}else{
1880
+		if ((unsigned)tcp_con_lifetime > 
1881
+				(unsigned)TICKS_TO_S(MAX_TCP_CON_LIFETIME)){
1882
+			LOG(L_WARN, "init_tcp: tcp_con_lifetime too big (%u s), "
1883
+					" the maximum value is %u\n", tcp_con_lifetime,
1884
+					TICKS_TO_S(MAX_TCP_CON_LIFETIME));
1885
+			tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
1886
+		}else{
1887
+			tcp_con_lifetime=S_TO_TICKS(tcp_con_lifetime);
1888
+		}
1889
+	}
1890
+	
1874 1891
 		poll_err=check_poll_method(tcp_poll_method);
1875 1892
 	
1876
-	/* set an appropiate poll method */
1893
+	/* set an appropriate poll method */
1877 1894
 	if (poll_err || (tcp_poll_method==0)){
1878 1895
 		tcp_poll_method=choose_poll_method();
1879 1896
 		if (poll_err){