Browse code

tcp: verbose and safer close()

- added tcp_safe_close(): a wrapper over close().
- repeat every close() on EINTR (not only the one called from
tcp_main). In general according to posix the state of the file
descriptor after an interrupted close() is undefined. However in
the ser case it's always safe to retry the close (there are no
other threads that might open new fds while the close() is
retried).
- ignore "expected" close() errors. On *BSDs (for example)
close()-ing a socket will return the last error
(e.g it can return ECONNRESET, EPIPE a.s.o.).
Note that this is not true on linux.
- on close failure be more verbose if there is enough information
for a useful error message (the corresp. tcp connection
structure is known and can still be used).

Andrei Pelinescu-Onciul authored on 16/07/2010 13:14:11
Showing 1 changed files
... ...
@@ -214,7 +214,6 @@
214 214
 #endif
215 215
 #define MAX_SEND_FD_QUEUE_SIZE	tcp_main_max_fd_no
216 216
 #define SEND_FD_QUEUE_SIZE		128  /* initial size */
217
-#define MAX_SEND_FD_RETRIES		96	 /* FIXME: not used for now */
218 217
 #define SEND_FD_QUEUE_TIMEOUT	MS_TO_TICKS(2000)  /* 2 s */
219 218
 #endif
220 219
 
... ...
@@ -480,6 +479,42 @@ error:
480 480
 
481 481
 
482 482
 
483
+/** close a socket, handling errno.
484
+ * On EINTR, repeat the close().
485
+ * Filter expected errors (return success if close() failed because
486
+ * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
487
+ * does not fail for socket level errors).
488
+ * @param s - open valid socket.
489
+ * @return - 0 on success, < 0 on error (whatever close() returns). On error
490
+ *           errno is set.
491
+ */
492
+static int tcp_safe_close(int s)
493
+{
494
+	int ret;
495
+retry:
496
+	if (unlikely((ret = close(s)) < 0 )) {
497
+		switch(errno) {
498
+			case EINTR:
499
+				goto retry;
500
+			case EPIPE:
501
+			case ENOTCONN:
502
+			case ECONNRESET:
503
+			case ECONNREFUSED:
504
+			case ENETUNREACH:
505
+			case EHOSTUNREACH:
506
+				/* on *BSD we really get these errors at close() time 
507
+				   => ignore them */
508
+				ret = 0;
509
+				break;
510
+			default:
511
+				break;
512
+		}
513
+	}
514
+	return ret;
515
+}
516
+
517
+
518
+
483 519
 /* blocking connect on a non-blocking fd; it will timeout after
484 520
  * tcp_connect_timeout 
485 521
  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
... ...
@@ -1201,7 +1236,7 @@ find_socket:
1201 1201
 	*res_local_addr=*from;
1202 1202
 	return s;
1203 1203
 error:
1204
-	if (s!=-1) close(s);
1204
+	if (s!=-1) tcp_safe_close(s);
1205 1205
 	return -1;
1206 1206
 }
1207 1207
 
... ...
@@ -1240,9 +1275,8 @@ struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
1240 1240
 	}
1241 1241
 	tcpconn_set_send_flags(con, *send_flags);
1242 1242
 	return con;
1243
-	/*FIXME: set sock idx! */
1244 1243
 error:
1245
-	if (s!=-1) close(s); /* close the opened socket */
1244
+	if (s!=-1) tcp_safe_close(s); /* close the opened socket */
1246 1245
 	return 0;
1247 1246
 }
1248 1247
 
... ...
@@ -1702,7 +1736,7 @@ inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
1702 1702
 	
1703 1703
 	h=c->id%TCP_FD_CACHE_SIZE;
1704 1704
 	if (likely(fd_cache[h].fd>0))
1705
-		close(fd_cache[h].fd);
1705
+		tcp_safe_close(fd_cache[h].fd);
1706 1706
 	fd_cache[h].fd=fd;
1707 1707
 	fd_cache[h].id=c->id;
1708 1708
 	fd_cache[h].con=c;
... ...
@@ -2072,14 +2106,14 @@ redo_tls_encode:
2072 2072
 			   again on exit */
2073 2073
 			if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2074 2074
 				/* on error or eof, close fd */
2075
-				close(fd);
2075
+				tcp_safe_close(fd);
2076 2076
 			} else if (response[1] == CONN_QUEUED_WRITE) {
2077 2077
 #ifdef TCP_FD_CACHE
2078 2078
 				if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2079 2079
 					tcp_fd_cache_add(c, fd);
2080 2080
 				} else
2081 2081
 #endif /* TCP_FD_CACHE */
2082
-					close(fd);
2082
+					tcp_safe_close(fd);
2083 2083
 			} else {
2084 2084
 				BUG("unexpected tcpconn_do_send() return & response:"
2085 2085
 						" %d, %ld\n", n, response[1]);
... ...
@@ -2091,7 +2125,7 @@ redo_tls_encode:
2091 2091
 			tcp_fd_cache_add(c, fd);
2092 2092
 		}else
2093 2093
 #endif /* TCP_FD_CACHE */
2094
-			close(fd);
2094
+			tcp_safe_close(fd);
2095 2095
 	/* here we can have only commands that _do_ _not_ dec refcnt.
2096 2096
 	   (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2097 2097
 		goto release_c;
... ...
@@ -2107,7 +2141,11 @@ conn_wait_success:
2107 2107
 		tcp_fd_cache_add(c, fd);
2108 2108
 	} else
2109 2109
 #endif /* TCP_FD_CACHE */
2110
-		close(fd);
2110
+		if (unlikely (tcp_safe_close(fd) < 0))
2111
+			LOG(L_ERR, "closing temporary send fd for %p: %s: "
2112
+					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
2113
+					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2114
+					fd, c->flags, strerror(errno), errno);
2111 2115
 	tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2112 2116
 	return n;
2113 2117
 conn_wait_error:
... ...
@@ -2122,7 +2160,13 @@ conn_wait_close:
2122 2122
 	c->state=S_CONN_BAD;
2123 2123
 	/* we are here only if we opened a new fd (and not reused a cached or
2124 2124
 	   a reader one) => if the connect was successful close the fd */
2125
-	if (fd>=0) close(fd);
2125
+	if (fd>=0) {
2126
+		if (unlikely(tcp_safe_close(fd) < 0 ))
2127
+			LOG(L_ERR, "closing temporary send fd for %p: %s: "
2128
+					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
2129
+					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2130
+					fd, c->flags, strerror(errno), errno);
2131
+	}
2126 2132
 	TCPCONN_LOCK;
2127 2133
 		if (c->flags & F_CONN_HASHED){
2128 2134
 			/* if some other parallel tcp_send did send CONN_ERROR to
... ...
@@ -2356,17 +2400,17 @@ error:
2356 2356
 			if (unlikely(fd_cache_e)){
2357 2357
 				tcp_fd_cache_rm(fd_cache_e);
2358 2358
 				fd_cache_e = 0;
2359
-				close(fd);
2359
+				tcp_safe_close(fd);
2360 2360
 			}else
2361 2361
 #endif /* TCP_FD_CACHE */
2362
-				if (do_close_fd) close(fd);
2362
+				if (do_close_fd) tcp_safe_close(fd);
2363 2363
 		} else if (response[1] == CONN_QUEUED_WRITE) {
2364 2364
 #ifdef TCP_FD_CACHE
2365 2365
 			if (unlikely((fd_cache_e==0) && use_fd_cache)){
2366 2366
 				tcp_fd_cache_add(c, fd);
2367 2367
 			}else
2368 2368
 #endif /* TCP_FD_CACHE */
2369
-				if (do_close_fd) close(fd);
2369
+				if (do_close_fd) tcp_safe_close(fd);
2370 2370
 		} else {
2371 2371
 			BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
2372 2372
 					n, response[1]);
... ...
@@ -2379,7 +2423,13 @@ end:
2379 2379
 		tcp_fd_cache_add(c, fd);
2380 2380
 	}else
2381 2381
 #endif /* TCP_FD_CACHE */
2382
-	if (do_close_fd) close(fd);
2382
+	if (do_close_fd) {
2383
+		if (unlikely(tcp_safe_close(fd) < 0))
2384
+			LOG(L_ERR, "closing temporary send fd for %p: %s: "
2385
+					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
2386
+					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2387
+					fd, c->flags, strerror(errno), errno);
2388
+	}
2383 2389
 	/* here we can have only commands that _do_ _not_ dec refcnt.
2384 2390
 	   (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2385 2391
 release_c:
... ...
@@ -2859,7 +2909,7 @@ int tcp_init(struct socket_info* sock_info)
2859 2859
 	return 0;
2860 2860
 error:
2861 2861
 	if (sock_info->socket!=-1){
2862
-		close(sock_info->socket);
2862
+		tcp_safe_close(sock_info->socket);
2863 2863
 		sock_info->socket=-1;
2864 2864
 	}
2865 2865
 	return -1;
... ...
@@ -2882,15 +2932,11 @@ inline static void tcpconn_close_main_fd(struct tcp_connection* tcpconn)
2882 2882
 #ifdef TCP_FD_CACHE
2883 2883
 	if (likely(cfg_get(tcp, tcp_cfg, fd_cache))) shutdown(fd, SHUT_RDWR);
2884 2884
 #endif /* TCP_FD_CACHE */
2885
-close_again:
2886
-	if (unlikely(close(fd)<0)){
2887
-		if (errno==EINTR)
2888
-			goto close_again;
2885
+	if (unlikely(tcp_safe_close(fd)<0))
2889 2886
 		LOG(L_ERR, "ERROR: tcpconn_close_main_fd(%p): %s "
2890 2887
 					"close(%d) failed (flags 0x%x): %s (%d)\n", tcpconn,
2891 2888
 					su2a(&tcpconn->rcv.src_su, sizeof(tcpconn->rcv.src_su)),
2892 2889
 					fd, tcpconn->flags, strerror(errno), errno);
2893
-	}
2894 2890
 	tcpconn->s=-1;
2895 2891
 }
2896 2892
 
... ...
@@ -3917,13 +3963,13 @@ static inline int handle_new_connect(struct socket_info* si)
3917 3917
 		LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
3918 3918
 					*tcp_connections_no,
3919 3919
 					cfg_get(tcp, tcp_cfg, max_connections));
3920
-		close(new_sock);
3920
+		tcp_safe_close(new_sock);
3921 3921
 		TCP_STATS_LOCAL_REJECT();
3922 3922
 		return 1; /* success, because the accept was succesfull */
3923 3923
 	}
3924 3924
 	if (unlikely(init_sock_opt_accept(new_sock)<0)){
3925 3925
 		LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
3926
-		close(new_sock);
3926
+		tcp_safe_close(new_sock);
3927 3927
 		return 1; /* success, because the accept was succesfull */
3928 3928
 	}
3929 3929
 	(*tcp_connections_no)++;
... ...
@@ -3981,7 +4027,7 @@ static inline int handle_new_connect(struct socket_info* si)
3981 3981
 	}else{ /*tcpconn==0 */
3982 3982
 		LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
3983 3983
 				"closing socket\n");
3984
-		close(new_sock);
3984
+		tcp_safe_close(new_sock);
3985 3985
 		(*tcp_connections_no)--;
3986 3986
 	}
3987 3987
 	return 1; /* accept() was succesfull */
... ...
@@ -4363,7 +4409,7 @@ static inline void tcpconn_destroy_all()
4363 4363
 					if (likely(cfg_get(tcp, tcp_cfg, fd_cache)))
4364 4364
 						shutdown(fd, SHUT_RDWR);
4365 4365
 #endif /* TCP_FD_CACHE */
4366
-					close(fd);
4366
+					tcp_safe_close(fd);
4367 4367
 				}
4368 4368
 				(*tcp_connections_no)--;
4369 4369
 			c=next;