Browse code

tcp: fix dispatching closed connections to tcp readers

Under very heavy load it is possible that send2child() might try
to send an already closed connection/fd to a tcp reader.
This can happen only if the tcp connection is watched for read
(POLLIN) by tcp_main (and not by a tcp reader), the connection
becomes available for reading (either new data received, EOF or
RST) and tcp_main chooses a specific tcp reader to send the
connection to while in the same time the same tcp reader tries to
send on the same connection, fails for some reason (no more space
for buffering, EOF, RST a.s.o) and sends a close command back to
tcp_main. Because send2child() executes first any pending commands
from the choosen tcp_reader, this might lead to closing the fd
before attempting to send it to the tcp_reader.
Under normal circumstances the impact is only an extra syscall and
some log messages, however it is possible (but highly unlikely)
that after sending the close command the tcp_reader opens a new
connection for sending and sends its fd back to tcp_main. This new
fd might get the same number as the freshly closed fd and
send2child might send the wrong (fd, tcp connection) pair.

Andrei Pelinescu-Onciul authored on 07/07/2010 09:59:30
Showing 1 changed files
... ...
@@ -2891,6 +2891,7 @@ close_again:
2891 2891
 					su2a(&tcpconn->rcv.src_su, sizeof(tcpconn->rcv.src_su)),
2892 2892
 					fd, tcpconn->flags, strerror(errno), errno);
2893 2893
 	}
2894
+	tcpconn->s=-1;
2894 2895
 }
2895 2896
 
2896 2897
 
... ...
@@ -3836,10 +3837,20 @@ inline static int send2child(struct tcp_connection* tcpconn)
3836 3836
 	 * send a release command, but the master fills its socket buffer
3837 3837
 	 * with new connection commands => deadlock) */
3838 3838
 	/* answer tcp_send requests first */
3839
-	while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
3839
+	while(unlikely((tcpconn->state != S_CONN_BAD) &&
3840
+					(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0)));
3840 3841
 	/* process tcp readers requests */
3841
-	while(handle_tcp_child(&tcp_children[idx], -1)>0);
3842
-		
3842
+	while(unlikely((tcpconn->state != S_CONN_BAD &&
3843
+					(handle_tcp_child(&tcp_children[idx], -1)>0))));
3844
+	
3845
+	/* the above possible pending requests might have included a
3846
+	   command to close this tcpconn (e.g. CONN_ERROR, CONN_EOF).
3847
+	   In this case the fd is already closed here (and possible
3848
+	   even replaced by another one with the same number) so it
3849
+	   must not be sent to a reader anymore */
3850
+	if (unlikely(tcpconn->state == S_CONN_BAD ||
3851
+					(tcpconn->flags & F_CONN_FD_CLOSED)))
3852
+		return -1;
3843 3853
 #ifdef SEND_FD_QUEUE
3844 3854
 	/* if queue full, try to queue the io */
3845 3855
 	if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
... ...
@@ -3961,8 +3972,6 @@ static inline int handle_new_connect(struct socket_info* si)
3961 3961
 		DBG("handle_new_connect: new connection from %s: %p %d flags: %04x\n",
3962 3962
 			su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags);
3963 3963
 		if(unlikely(send2child(tcpconn)<0)){
3964
-			LOG(L_ERR,"ERROR: handle_new_connect: no children "
3965
-					"available\n");
3966 3964
 			tcpconn->flags&=~F_CONN_READER;
3967 3965
 			tcpconn_put(tcpconn);
3968 3966
 			tcpconn_try_unhash(tcpconn);
... ...
@@ -4142,7 +4151,6 @@ send_to_child:
4142 4142
 		tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
4143 4143
 		tcpconn_ref(tcpconn); /* refcnt ++ */
4144 4144
 		if (unlikely(send2child(tcpconn)<0)){
4145
-			LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
4146 4145
 			tcpconn->flags&=~F_CONN_READER;
4147 4146
 #ifdef TCP_ASYNC
4148 4147
 			if (tcpconn->flags & F_CONN_WRITE_W){