Browse code

tcp: blacklist at tcp level if possible

- automatically blacklist destinations if connecting to them fails
(BLST_ERR_CONNECT) or send fails (BLST_ERR_SEND), either due to a
protocol error (RST, protocol level timeout a.s.o), or because
of a ser level send/connect timeout.
Note: in this cases the sip_msg parameter of the blacklist will
be null (since in general the message triggering the error is not
known), so if you register a blacklist callback you should make
sure it works with null sip_msgs too.

- if a connection is in a connect pending state (S_CONN_CONNECT)
and something is read on it, move it into established state
(S_CONN_OK). This can happen only in tcp async mode.

- fix transition directly to S_CONN_OK from S_CONN_PENDING (should go
through S_CONN_CONNECT first)

Andrei Pelinescu-Onciul authored on 26/02/2009 23:13:22
Showing 2 changed files
... ...
@@ -97,6 +97,7 @@
97 97
  *               POLLHUP (andrei)
98 98
  *              on write error check if there's still data in the socket 
99 99
  *               read buffer and process it first (andrei)
100
+ *  2009-02-26  direct blacklist support (andrei)
100 101
  */
101 102
 
102 103
 
... ...
@@ -161,7 +162,10 @@
161 162
 #else
162 163
 #include "tls_hooks_init.h"
163 164
 #include "tls_hooks.h"
164
-#endif
165
+#endif /* CORE_TLS*/
166
+#ifdef USE_DST_BLACKLIST
167
+#include "dst_blacklist.h"
168
+#endif /* USE_DST_BLACKLIST */
165 169
 
166 170
 #include "tcp_info.h"
167 171
 #include "tcp_options.h"
... ...
@@ -481,7 +485,8 @@ error:
481 485
  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
482 486
  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
483 487
  */
484
-static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
488
+static int tcp_blocking_connect(int fd, int type,
489
+								const struct sockaddr *servaddr,
485 490
 								socklen_t addrlen)
486 491
 {
487 492
 	int n;
... ...
@@ -511,6 +516,19 @@ again:
511 516
 			else goto error_timeout;
512 517
 		}
513 518
 		if (errno!=EINPROGRESS && errno!=EALREADY){
519
+#ifdef USE_DST_BLACKLIST
520
+			if (cfg_get(core, core_cfg, use_dst_blacklist))
521
+				switch(errno){
522
+					case ECONNREFUSED:
523
+					case ENETUNREACH:
524
+					case ETIMEDOUT:
525
+					case ECONNRESET:
526
+					case EHOSTUNREACH:
527
+						dst_blacklist_su(BLST_ERR_CONNECT, type,
528
+										 (union sockaddr_union*)servaddr, 0);
529
+						break;
530
+				}
531
+#endif /* USE_DST_BLACKLIST */
514 532
 			LOG(L_ERR, "ERROR: tcp_blocking_connect %s: (%d) %s\n",
515 533
 					su2a((union sockaddr_union*)servaddr, addrlen),
516 534
 					errno, strerror(errno));
... ...
@@ -574,6 +592,11 @@ again:
574 592
 	}
575 593
 error_timeout:
576 594
 	/* timeout */
595
+#ifdef USE_DST_BLACKLIST
596
+	if (cfg_get(core, core_cfg, use_dst_blacklist))
597
+		dst_blacklist_su(BLST_ERR_CONNECT, type,
598
+							(union sockaddr_union*)servaddr, 0);
599
+#endif /* USE_DST_BLACKLIST */
577 600
 	LOG(L_ERR, "ERROR: tcp_blocking_connect %s: timeout %d s elapsed "
578 601
 				"from %d s\n", su2a((union sockaddr_union*)servaddr, addrlen),
579 602
 				elapsed, tcp_connect_timeout);
... ...
@@ -619,6 +642,15 @@ inline static int _wbufq_add(struct  tcp_connection* c, char* data,
619 642
 					" (%d, total %d, last write %d s ago)\n",
620 643
 					size, q->queued, *tcp_total_wq,
621 644
 					TICKS_TO_S(t-q->wr_timeout-tcp_options.tcp_wq_timeout));
645
+#ifdef USE_DST_BLACKLIST
646
+		if (q->first && TICKS_LT(q->wr_timeout, t) &&
647
+				cfg_get(core, core_cfg, use_dst_blacklist)){
648
+			ERR("blacklisting, state=%d\n", c->state);
649
+			dst_blacklist_su((c->state==S_CONN_CONNECT)?  BLST_ERR_CONNECT:
650
+									BLST_ERR_SEND,
651
+								c->rcv.proto, &c->rcv.src_su, 0);
652
+		}
653
+#endif /* USE_DST_BLACKLIST */
622 654
 		goto error;
623 655
 	}
624 656
 	
... ...
@@ -682,7 +714,7 @@ inline static int _wbufq_insert(struct  tcp_connection* c, char* data,
682 714
 		return _wbufq_add(c, data, size);
683 715
 	
684 716
 	if (unlikely((*tcp_total_wq+size)>tcp_options.tcp_wq_max)){
685
-		LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full or timeout"
717
+		LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full"
686 718
 					" (%d, total %d, last write %d s ago)\n",
687 719
 					size, q->queued, *tcp_total_wq,
688 720
 					TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
... ...
@@ -784,11 +816,24 @@ inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
784 816
 				break;
785 817
 			}
786 818
 			q->wr_timeout=t+tcp_options.tcp_wq_timeout;
787
-			c->state=S_CONN_OK;
788 819
 		}else{
789 820
 			if (n<0){
790 821
 				/* EINTR is handled inside _tcpconn_write_nb */
791 822
 				if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
823
+#ifdef USE_DST_BLACKLIST
824
+					if (cfg_get(core, core_cfg, use_dst_blacklist))
825
+						switch(errno){
826
+							case ENETUNREACH:
827
+							case ECONNRESET:
828
+							/*case EHOSTUNREACH: -- not posix */
829
+								dst_blacklist_su((c->state==S_CONN_CONNECT)?
830
+														BLST_ERR_CONNECT:
831
+														BLST_ERR_SEND,
832
+														c->rcv.proto,
833
+														&c->rcv.src_su, 0);
834
+								break;
835
+						}
836
+#endif /* USE_DST_BLACKLIST */
792 837
 					ret=-1;
793 838
 					LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
794 839
 						strerror(errno), errno);
... ...
@@ -994,6 +1039,19 @@ again:
994 1039
 			if (likely(errno==EINPROGRESS))
995 1040
 				*state=S_CONN_CONNECT;
996 1041
 			else if (errno!=EALREADY){
1042
+#ifdef USE_DST_BLACKLIST
1043
+				if (cfg_get(core, core_cfg, use_dst_blacklist))
1044
+					switch(errno){
1045
+						case ECONNREFUSED:
1046
+						case ENETUNREACH:
1047
+						case ETIMEDOUT:
1048
+						case ECONNRESET:
1049
+						case EHOSTUNREACH:
1050
+							dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1051
+												0);
1052
+							break;
1053
+				}
1054
+#endif /* USE_DST_BLACKLIST */
997 1055
 				LOG(L_ERR, "ERROR: tcp_do_connect: connect %s: (%d) %s\n",
998 1056
 							su2a(server, sizeof(*server)),
999 1057
 							errno, strerror(errno));
... ...
@@ -1002,7 +1060,8 @@ again:
1002 1060
 		}
1003 1061
 	}else{
1004 1062
 #endif /* TCP_BUF_WRITE */
1005
-		if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
1063
+		if (tcp_blocking_connect(s, type, &server->s,
1064
+									sockaddru_len(*server))<0){
1006 1065
 			LOG(L_ERR, "ERROR: tcp_do_connect: tcp_blocking_connect %s"
1007 1066
 						" failed\n", su2a(server, sizeof(*server)));
1008 1067
 			goto error;
... ...
@@ -1656,6 +1715,8 @@ no_id:
1656 1715
 				/* do connect and if src ip or port changed, update the 
1657 1716
 				 * aliases */
1658 1717
 				if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
1718
+					/* tcpconn_finish_connect will automatically blacklist
1719
+					   on error => no need to do it here */
1659 1720
 					LOG(L_ERR, "ERROR: tcp_send %s: tcpconn_finish_connect(%p)"
1660 1721
 							" failed\n", su2a(&dst->to, sizeof(dst->to)),
1661 1722
 								c);
... ...
@@ -1703,6 +1764,18 @@ no_id:
1703 1764
 						n=len;
1704 1765
 						goto end;
1705 1766
 					}
1767
+#ifdef USE_DST_BLACKLIST
1768
+					if (cfg_get(core, core_cfg, use_dst_blacklist))
1769
+						switch(errno){
1770
+							case ENETUNREACH:
1771
+							case ECONNRESET:
1772
+							/*case EHOSTUNREACH: -- not posix */
1773
+								/* if first write failed it's most likely a
1774
+								   connect error */
1775
+								dst_blacklist_add( BLST_ERR_CONNECT, dst, 0);
1776
+								break;
1777
+						}
1778
+#endif /* USE_DST_BLACKLIST */
1706 1779
 					/* error: destroy it directly */
1707 1780
 					LOG(L_ERR, "ERROR: tcp_send %s: connect & send "
1708 1781
 										" for %p failed:" " %s (%d)\n",
... ...
@@ -1899,6 +1972,20 @@ send_it:
1899 1972
 			lock_release(&c->write_lock);
1900 1973
 		}
1901 1974
 #endif /* TCP_BUF_WRITE */
1975
+#ifdef USE_DST_BLACKLIST
1976
+		if (cfg_get(core, core_cfg, use_dst_blacklist))
1977
+			switch(errno){
1978
+				case ENETUNREACH:
1979
+				case ECONNRESET:
1980
+				/*case EHOSTUNREACH: -- not posix */
1981
+					dst_blacklist_su((c->state==S_CONN_CONNECT)?
1982
+											BLST_ERR_CONNECT:
1983
+											BLST_ERR_SEND,
1984
+										c->rcv.proto,
1985
+										&c->rcv.src_su, 0);
1986
+					break;
1987
+			}
1988
+#endif /* USE_DST_BLACKLIST */
1902 1989
 		LOG(L_ERR, "ERROR: tcp_send: failed to send on %p (%s:%d->%s): %s (%d)"
1903 1990
 					"\n", c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
1904 1991
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
... ...
@@ -2579,6 +2666,14 @@ inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
2579 2666
 							"refcnt= %d\n", tcpconn,
2580 2667
 							atomic_get(&tcpconn->refcnt));
2581 2668
 					/* timeout */
2669
+#ifdef USE_DST_BLACKLIST
2670
+					if (cfg_get(core, core_cfg, use_dst_blacklist))
2671
+						dst_blacklist_su((tcpconn->state==S_CONN_CONNECT)?
2672
+													BLST_ERR_CONNECT:
2673
+													BLST_ERR_SEND,
2674
+													tcpconn->rcv.proto,
2675
+													&tcpconn->rcv.src_su, 0);
2676
+#endif /* USE_DST_BLACKLIST */
2582 2677
 					if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
2583 2678
 						io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2584 2679
 						tcpconn->flags&=~F_CONN_WRITE_W;
... ...
@@ -2893,7 +2988,6 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
2893 2988
 				tcpconn_put_destroy(tcpconn);
2894 2989
 				break;
2895 2990
 			}
2896
-			tcpconn->state=S_CONN_OK;
2897 2991
 			(*tcp_connections_no)++;
2898 2992
 			tcpconn->s=fd;
2899 2993
 			/* update the timeout*/
... ...
@@ -2905,6 +2999,7 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
2905 2999
 								tcp_con_lifetime, t);
2906 3000
 			tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD;
2907 3001
 			if (unlikely(cmd==CONN_NEW_COMPLETE)){
3002
+				tcpconn->state=S_CONN_OK;
2908 3003
 				/* check if needs to be watched for write */
2909 3004
 				lock_get(&tcpconn->write_lock);
2910 3005
 					/* if queue non empty watch it for write */
... ...
@@ -2914,6 +3009,10 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
2914 3009
 									(F_CONN_WRITE_W|F_CONN_WANTS_WR);
2915 3010
 			}else{
2916 3011
 				/* CONN_NEW_PENDING_WRITE */
3012
+				/* we don't know if we successfully sent anything, but
3013
+				   for sure we haven't sent all what we wanted, so consider
3014
+				   the connection in "connecting" state */
3015
+				tcpconn->state=S_CONN_CONNECT;
2917 3016
 				/* no need to check, we have something queued for write */
2918 3017
 				flags=POLLOUT;
2919 3018
 				tcpconn->flags|=(F_CONN_WRITE_W|F_CONN_WANTS_WR);
... ...
@@ -3351,6 +3450,15 @@ static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
3351 3450
 		else
3352 3451
 			return (ticks_t)(c->timeout - t);
3353 3452
 	}
3453
+#ifdef USE_DST_BLACKLIST
3454
+	/* if time out due to write, add it to the blacklist */
3455
+	if (tcp_options.tcp_buf_write && _wbufq_non_empty(c) &&
3456
+			TICKS_GE(t, c->wbuf_q.wr_timeout) &&
3457
+			cfg_get(core, core_cfg, use_dst_blacklist))
3458
+		dst_blacklist_su((c->state==S_CONN_CONNECT)?  BLST_ERR_CONNECT:
3459
+										BLST_ERR_SEND,
3460
+								c->rcv.proto, &c->rcv.src_su, 0);
3461
+#endif /* USE_DST_BLACKLIST */
3354 3462
 #else /* ! TCP_BUF_WRITE */
3355 3463
 	if (TICKS_LT(t, c->timeout)){
3356 3464
 		/* timeout extended, exit */
... ...
@@ -40,6 +40,7 @@
40 40
  * 2007-11-26  improved tcp timers: switched to local_timer (andrei)
41 41
  * 2008-02-04  optimizations: handle POLLRDHUP (if supported), detect short
42 42
  *              reads (sock. buffer empty) (andrei)
43
+ * 2009-02-26  direct blacklist support (andrei)
43 44
  */
44 45
 
45 46
 #ifdef USE_TCP
... ...
@@ -72,7 +73,10 @@
72 73
 #include "tls/tls_server.h"
73 74
 #else
74 75
 #include "tls_hooks.h"
75
-#endif
76
+#endif /* CORE_TLS */
77
+#ifdef USE_DST_BLACKLIST
78
+#include "dst_blacklist.h"
79
+#endif /* USE_DST_BLACKLIST */
76 80
 
77 81
 #define HANDLE_IO_INLINE
78 82
 #include "io_wait.h"
... ...
@@ -142,6 +146,19 @@ again:
142 146
 				bytes_read=0; /* nothing has been read */
143 147
 			}else if (errno == EINTR) goto again;
144 148
 			else{
149
+#ifdef USE_DST_BLACKLIST
150
+				if (cfg_get(core, core_cfg, use_dst_blacklist))
151
+					switch(errno){
152
+						case ECONNRESET:
153
+						case ETIMEDOUT:
154
+							dst_blacklist_su((c->state==S_CONN_CONNECT)?
155
+													BLST_ERR_CONNECT:
156
+													BLST_ERR_SEND,
157
+													c->rcv.proto,
158
+													&c->rcv.src_su, 0);
159
+							break;
160
+					}
161
+#endif /* USE_DST_BLACKLIST */
145 162
 				LOG(L_ERR, "ERROR: tcp_read: error reading: %s (%d)\n",
146 163
 							strerror(errno), errno);
147 164
 				r->error=TCP_READ_ERROR;
... ...
@@ -152,10 +169,16 @@ again:
152 169
 			c->state=S_CONN_EOF;
153 170
 			*flags|=RD_CONN_EOF;
154 171
 			DBG("tcp_read: EOF on %p, FD %d\n", c, fd);
172
+		}else{
173
+			if (unlikely(c->state==S_CONN_CONNECT))
174
+				c->state=S_CONN_OK;
155 175
 		}
156 176
 		/* short read */
157 177
 		*flags|=RD_CONN_SHORT_READ;
158
-	} /* else normal full read */
178
+	}else{ /* else normal full read */
179
+		if (unlikely(c->state==S_CONN_CONNECT))
180
+			c->state=S_CONN_OK;
181
+	}
159 182
 #ifdef EXTRA_DEBUG
160 183
 	DBG("tcp_read: read %d bytes:\n%.*s\n", bytes_read, bytes_read, r->pos);
161 184
 #endif