Browse code

tcp: fix dispatching closed connections to tcp readers

Under very heavy load it is possible that send2child() might try
to send an already closed connection/fd to a tcp reader.
This can happen only if the tcp connection is watched for read
(POLLIN) by tcp_main (and not by a tcp reader), the connection
becomes available for reading (either new data received, EOF or
RST) and tcp_main chooses a specific tcp reader to send the
connection to while in the same time the same tcp reader tries to
send on the same connection, fails for some reason (no more space
for buffering, EOF, RST a.s.o) and sends a close command back to
tcp_main. Because send2child() executes first any pending commands
from the choosen tcp_reader, this might lead to closing the fd
before attempting to send it to the tcp_reader.
Under normal circumstances the impact is only an extra syscall and
some log messages, however it is possible (but highly unlikely)
that after sending the close command the tcp_reader opens a new
connection for sending and sends its fd back to tcp_main. This new
fd might get the same number as the freshly closed fd and
send2child might send the wrong (fd, tcp connection) pair.

Andrei Pelinescu-Onciul authored on 07/07/2010 09:59:30
Showing 1 changed files
... ...
@@ -2891,6 +2891,7 @@ close_again:
2891 2891
 					su2a(&tcpconn->rcv.src_su, sizeof(tcpconn->rcv.src_su)),
2892 2892
 					fd, tcpconn->flags, strerror(errno), errno);
2893 2893
 	}
2894
+	tcpconn->s=-1;
2894 2895
 }
2895 2896
 
2896 2897
 
... ...
@@ -3836,10 +3837,20 @@ inline static int send2child(struct tcp_connection* tcpconn)
3836 3837
 	 * send a release command, but the master fills its socket buffer
3837 3838
 	 * with new connection commands => deadlock) */
3838 3839
 	/* answer tcp_send requests first */
3839
-	while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
3840
+	while(unlikely((tcpconn->state != S_CONN_BAD) &&
3841
+					(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0)));
3840 3842
 	/* process tcp readers requests */
3841
-	while(handle_tcp_child(&tcp_children[idx], -1)>0);
3842
-		
3843
+	while(unlikely((tcpconn->state != S_CONN_BAD &&
3844
+					(handle_tcp_child(&tcp_children[idx], -1)>0))));
3845
+	
3846
+	/* the above possible pending requests might have included a
3847
+	   command to close this tcpconn (e.g. CONN_ERROR, CONN_EOF).
3848
+	   In this case the fd is already closed here (and possible
3849
+	   even replaced by another one with the same number) so it
3850
+	   must not be sent to a reader anymore */
3851
+	if (unlikely(tcpconn->state == S_CONN_BAD ||
3852
+					(tcpconn->flags & F_CONN_FD_CLOSED)))
3853
+		return -1;
3843 3854
 #ifdef SEND_FD_QUEUE
3844 3855
 	/* if queue full, try to queue the io */
3845 3856
 	if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
... ...
@@ -3961,8 +3972,6 @@ static inline int handle_new_connect(struct socket_info* si)
3961 3972
 		DBG("handle_new_connect: new connection from %s: %p %d flags: %04x\n",
3962 3973
 			su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags);
3963 3974
 		if(unlikely(send2child(tcpconn)<0)){
3964
-			LOG(L_ERR,"ERROR: handle_new_connect: no children "
3965
-					"available\n");
3966 3975
 			tcpconn->flags&=~F_CONN_READER;
3967 3976
 			tcpconn_put(tcpconn);
3968 3977
 			tcpconn_try_unhash(tcpconn);
... ...
@@ -4142,7 +4151,6 @@ send_to_child:
4142 4151
 		tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
4143 4152
 		tcpconn_ref(tcpconn); /* refcnt ++ */
4144 4153
 		if (unlikely(send2child(tcpconn)<0)){
4145
-			LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
4146 4154
 			tcpconn->flags&=~F_CONN_READER;
4147 4155
 #ifdef TCP_ASYNC
4148 4156
 			if (tcpconn->flags & F_CONN_WRITE_W){