Browse code

- advanced tcp options support: - support for defering tcp accepts until some data is received (linux & freebsd), default off. See NEWS: tcp_defer_accept. - support for delaying the final ACK from the 3-way handshake until some data is sent (the ACK will come with the 1st data segment). Default on when supported (linux only). See NEWS: tcp_delayed_ack. - support for limiting the number of retransmitted SYNs (linux only, see NEW: tcp_syncnt) - support for limiting the lifetime of orphaned sockets in FIN_WAIT2 (linux only, see NEWS: tcp_linger2) - keepalive support, see NEWS: tcp_keepalive (default on), tcp_keepidle, tcp_keepintvl and tcp_keepcnt

- the FD cache can now be turned off from ser.cfg (see NEW: tcp_fd_cache)

Andrei Pelinescu-Onciul authored on 28/11/2007 19:20:47
Showing 8 changed files
... ...
@@ -8,6 +8,13 @@ $Id$
8 8
 2.1.0 changes
9 9
 
10 10
 modules:
11
+ - auth      - added extra authentication checks support, to protect
12
+               against various reply attacks.
13
+             - params:
14
+                       - auth_extra_checks - flags specifying which extra
15
+                          message part/parts will be checked for change before
16
+                          allowing nonce reuse. See the auth module docs for
17
+                          for more information (modules/auth/README).
11 18
  - blst      - new module containing script blacklist manipulations functions
12 19
                (the source of a message can be blacklisted, removed from the
13 20
                 blacklist or checked for presence in the blacklist).
... ...
@@ -95,6 +102,8 @@ modules:
95 95
                         - t_set_retr(t1, t2) - changes the retransmissions
96 96
                            intervals on the fly, on a per transaction basis.
97 97
 core:
98
+             - tcp improvements (better tcp timers, send fd cache, special
99
+                options support)
98 100
              - dns naptr support (see dns_try_naptr and dns_<proto>_pref)
99 101
              - dns srv based load balancing support (see dns_srv_lb)
100 102
              - support for locking ser's pages in memory, pre-mapping
... ...
@@ -107,6 +116,37 @@ core:
107 107
                between the short name and long name in cache as CNAME record
108 108
 
109 109
 new config variables:
110
+  tcp_fd_cache = yes | no (default yes) - if enabled FDs used for sending
111
+     will be cached inside the process calling tcp_send (performance increase
112
+     for sending over tcp at the cost of slightly slower connection closing and
113
+     extra FDs kept open)
114
+  tcp_defer_accept =  yes | no (default no) on freebsd  / number of seconds
115
+        before timeout on linux (default disabled) - tcp accepts will be 
116
+        delayed until some data is received (improves performance on proxies
117
+        with lots of opened tcp connections). See linux tcp(7) TCP_DEFER_ACCEPT
118
+        or freebsd ACCF_DATA(0). For now linux and freebsd only.
119
+        WARNING: the  linux TCP_DEFER_ACCEPT is buggy (<=2.6.23) and doesn't 
120
+         work exactly as expected (if no data is received it will retransmit 
121
+         syn acks for ~ 190 s, irrespective of the set timeout and then it will
122
+         silently drop the connection without sending a RST or FIN). Try to 
123
+         use it together with tcp_syncnt (this way the number of retrans.
124
+          SYNACKs can be limited => the timeout can be controlled in some way).
125
+  tcp_delayed_ack  = yes | no (default yes when supported) - initial ACK for
126
+        opened connections will be delayed and sent with the first data
127
+        segment (see linux tcp(7) TCP_QUICKACK). For now linux only.
128
+  tcp_syncnt = number of syn retr. (default not set) - number of SYN 
129
+        retransmissions before aborting a connect attempt (see linux tcp(7)
130
+        TCP_SYNCNT). Linux only.
131
+  tcp_linger2 = seconds (not set by default) - lifetime of orphaned sockets
132
+        in FIN_WAIT2 state (overrides tcp_fin_timeout on, see linux tcp(7) 
133
+        TCP_LINGER2). Linux only.
134
+  tcp_keepalive = yes | no (default yes) - enables keepalive for tcp.
135
+  tcp_keepidle  = seconds (not set by default) - time before starting to send
136
+         keepalives, if the connection is idle. Linux only.
137
+  tcp_keepintvl = seconds (not set by default) - time interval between 
138
+         keepalive probes, when the previous probe failed. Linux only.
139
+  tcp_keepcnt = number (not set by default) - number of keepalives sent before
140
+         dropping the connection. Linux only.
110 141
   pmtu_discovery = 0 | 1 (default 0) - set DF bit in outbound IP if enabled
111 142
   dns_srv_lb = yes | no (default no) - enable dns srv weight based load 
112 143
     balancing (see doc/dns.txt)
... ...
@@ -72,6 +72,8 @@
72 72
  *  2007-09-10  introduced phone2tel option which allows NOT to consider
73 73
  *              user=phone URIs as TEL URIs (jiri)
74 74
  *  2007-10-10  added DNS_SEARCH_FMATCH (mma)
75
+ *  2007-11-28  added TCP_OPT_{FD_CACHE, DEFER_ACCEPT, DELAYED_ACK, SYNCNT,
76
+ *              LINGER2, KEEPALIVE, KEEPIDLE, KEEPINTVL, KEEPCNT} (andrei)
75 77
 */
76 78
 
77 79
 
... ...
@@ -290,6 +292,15 @@ TCP_POLL_METHOD		"tcp_poll_method"
290 290
 TCP_MAX_CONNECTIONS	"tcp_max_connections"
291 291
 TCP_SOURCE_IPV4		"tcp_source_ipv4"
292 292
 TCP_SOURCE_IPV6		"tcp_source_ipv6"
293
+TCP_OPT_FD_CACHE	"tcp_fd_cache"
294
+TCP_OPT_DEFER_ACCEPT "tcp_defer_accept"
295
+TCP_OPT_DELAYED_ACK	"tcp_delayed_ack"
296
+TCP_OPT_SYNCNT		"tcp_syncnt"
297
+TCP_OPT_LINGER2		"tcp_linger2"
298
+TCP_OPT_KEEPALIVE	"tcp_keepalive"
299
+TCP_OPT_KEEPIDLE	"tcp_keepidle"
300
+TCP_OPT_KEEPINTVL	"tcp_keepintvl"
301
+TCP_OPT_KEEPCNT		"tcp_keepcnt"
293 302
 DISABLE_TLS		"disable_tls"|"tls_disable"
294 303
 ENABLE_TLS		"enable_tls"|"tls_enable"
295 304
 TLSLOG			"tlslog"|"tls_log"
... ...
@@ -548,6 +559,24 @@ EAT_ABLE	[\ \t\b\r]
548 548
 									return TCP_SOURCE_IPV4; }
549 549
 <INITIAL>{TCP_SOURCE_IPV6}		{ count(); yylval.strval=yytext;
550 550
 									return TCP_SOURCE_IPV6; }
551
+<INITIAL>{TCP_OPT_FD_CACHE}		{ count(); yylval.strval=yytext;
552
+									return TCP_OPT_FD_CACHE; }
553
+<INITIAL>{TCP_OPT_DEFER_ACCEPT}	{ count(); yylval.strval=yytext;
554
+									return TCP_OPT_DEFER_ACCEPT; }
555
+<INITIAL>{TCP_OPT_DELAYED_ACK}	{ count(); yylval.strval=yytext;
556
+									return TCP_OPT_DELAYED_ACK; }
557
+<INITIAL>{TCP_OPT_SYNCNT}		{ count(); yylval.strval=yytext;
558
+									return TCP_OPT_SYNCNT; }
559
+<INITIAL>{TCP_OPT_LINGER2}		{ count(); yylval.strval=yytext;
560
+									return TCP_OPT_LINGER2; }
561
+<INITIAL>{TCP_OPT_KEEPALIVE}	{ count(); yylval.strval=yytext;
562
+									return TCP_OPT_KEEPALIVE; }
563
+<INITIAL>{TCP_OPT_KEEPIDLE}		{ count(); yylval.strval=yytext;
564
+									return TCP_OPT_KEEPIDLE; }
565
+<INITIAL>{TCP_OPT_KEEPINTVL}	{ count(); yylval.strval=yytext;
566
+									return TCP_OPT_KEEPINTVL; }
567
+<INITIAL>{TCP_OPT_KEEPCNT}	{ count(); yylval.strval=yytext;
568
+									return TCP_OPT_KEEPCNT; }
551 569
 <INITIAL>{DISABLE_TLS}	{ count(); yylval.strval=yytext; return DISABLE_TLS; }
552 570
 <INITIAL>{ENABLE_TLS}	{ count(); yylval.strval=yytext; return ENABLE_TLS; }
553 571
 <INITIAL>{TLSLOG}		{ count(); yylval.strval=yytext; return TLS_PORT_NO; }
... ...
@@ -85,6 +85,8 @@
85 85
  * 2007-09-10  introduced phone2tel option which allows NOT to consider
86 86
  *             user=phone URIs as TEL URIs (jiri)
87 87
  * 2007-10-10  added DNS_SEARCH_FMATCH (mma)
88
+ * 2007-11-28  added TCP_OPT_{FD_CACHE, DEFER_ACCEPT, DELAYED_ACK, SYNCNT,
89
+ *              LINGER2, KEEPALIVE, KEEPIDLE, KEEPINTVL, KEEPCNT} (andrei)
88 90
 */
89 91
 
90 92
 %{
... ...
@@ -112,6 +114,7 @@
112 112
 #include "select.h"
113 113
 #include "flags.h"
114 114
 #include "tcp_init.h"
115
+#include "tcp_options.h"
115 116
 
116 117
 #include "config.h"
117 118
 #ifdef CORE_TLS
... ...
@@ -330,6 +333,15 @@ static struct socket_id* mk_listen_id(char*, int, int);
330 330
 %token TCP_MAX_CONNECTIONS
331 331
 %token TCP_SOURCE_IPV4
332 332
 %token TCP_SOURCE_IPV6
333
+%token TCP_OPT_FD_CACHE
334
+%token TCP_OPT_DEFER_ACCEPT
335
+%token TCP_OPT_DELAYED_ACK
336
+%token TCP_OPT_SYNCNT
337
+%token TCP_OPT_LINGER2
338
+%token TCP_OPT_KEEPALIVE
339
+%token TCP_OPT_KEEPIDLE
340
+%token TCP_OPT_KEEPINTVL
341
+%token TCP_OPT_KEEPCNT
333 342
 %token DISABLE_TLS
334 343
 %token ENABLE_TLS
335 344
 %token TLSLOG
... ...
@@ -783,6 +795,78 @@ assign_stm:
783 783
 		pkg_free($3);
784 784
 	}
785 785
 	| TCP_SOURCE_IPV6 EQUAL error { yyerror("IPv6 address expected"); }
786
+	| TCP_OPT_FD_CACHE EQUAL NUMBER {
787
+		#ifdef USE_TCP
788
+			tcp_options.fd_cache=$3;
789
+		#else
790
+			warn("tcp support not compiled in");
791
+		#endif
792
+	}
793
+	| TCP_OPT_FD_CACHE EQUAL error { yyerror("boolean value expected"); }
794
+	| TCP_OPT_DEFER_ACCEPT EQUAL NUMBER {
795
+		#ifdef USE_TCP
796
+			tcp_options.defer_accept=$3;
797
+		#else
798
+			warn("tcp support not compiled in");
799
+		#endif
800
+	}
801
+	| TCP_OPT_DEFER_ACCEPT EQUAL error { yyerror("boolean value expected"); }
802
+	| TCP_OPT_DELAYED_ACK EQUAL NUMBER {
803
+		#ifdef USE_TCP
804
+			tcp_options.delayed_ack=$3;
805
+		#else
806
+			warn("tcp support not compiled in");
807
+		#endif
808
+	}
809
+	| TCP_OPT_DELAYED_ACK EQUAL error { yyerror("boolean value expected"); }
810
+	| TCP_OPT_SYNCNT EQUAL NUMBER {
811
+		#ifdef USE_TCP
812
+			tcp_options.syncnt=$3;
813
+		#else
814
+			warn("tcp support not compiled in");
815
+		#endif
816
+	}
817
+	| TCP_OPT_SYNCNT EQUAL error { yyerror("number expected"); }
818
+	| TCP_OPT_LINGER2 EQUAL NUMBER {
819
+		#ifdef USE_TCP
820
+			tcp_options.linger2=$3;
821
+		#else
822
+			warn("tcp support not compiled in");
823
+		#endif
824
+	}
825
+	| TCP_OPT_LINGER2 EQUAL error { yyerror("number expected"); }
826
+	| TCP_OPT_KEEPALIVE EQUAL NUMBER {
827
+		#ifdef USE_TCP
828
+			tcp_options.keepalive=$3;
829
+		#else
830
+			warn("tcp support not compiled in");
831
+		#endif
832
+	}
833
+	| TCP_OPT_KEEPALIVE EQUAL error { yyerror("boolean value expected");}
834
+	| TCP_OPT_KEEPIDLE EQUAL NUMBER {
835
+		#ifdef USE_TCP
836
+			tcp_options.keepidle=$3;
837
+		#else
838
+			warn("tcp support not compiled in");
839
+		#endif
840
+	}
841
+	| TCP_OPT_KEEPIDLE EQUAL error { yyerror("number expected"); }
842
+	| TCP_OPT_KEEPINTVL EQUAL NUMBER {
843
+		#ifdef USE_TCP
844
+			tcp_options.keepintvl=$3;
845
+		#else
846
+			warn("tcp support not compiled in");
847
+		#endif
848
+	}
849
+	| TCP_OPT_KEEPINTVL EQUAL error { yyerror("number expected"); }
850
+	| TCP_OPT_KEEPCNT EQUAL NUMBER {
851
+		#ifdef USE_TCP
852
+			tcp_options.keepcnt=$3;
853
+		#else
854
+			warn("tcp support not compiled in");
855
+		#endif
856
+	}
857
+	| TCP_OPT_KEEPCNT EQUAL error { yyerror("number expected"); }
786 858
 	| DISABLE_TLS EQUAL NUMBER {
787 859
 		#ifdef USE_TLS
788 860
 			tls_disable=$3;
... ...
@@ -37,6 +37,7 @@
37 37
 #include "pt.h"
38 38
 #include "ut.h"
39 39
 #include "tcp_info.h"
40
+#include "tcp_options.h"
40 41
 #include "core_cmd.h"
41 42
 
42 43
 #ifdef USE_DNS_CACHE
... ...
@@ -470,12 +471,11 @@ all:
470 470
 			}
471 471
 		}
472 472
 		rpc->add(c, "{", &handle);
473
-		rpc->struct_add(handle, "ddddddd",
473
+		rpc->struct_add(handle, "dddddd",
474 474
 			"pool  ", i,
475 475
 			"frags ", (unsigned int)frags,
476 476
 			"t. misses", (unsigned int)misses,
477 477
 			"mem   ", (unsigned int)mem,
478
-			"bitmap", (unsigned int)shm_block->pool[i].bitmap,
479 478
 			"missed", (unsigned int)shm_block->pool[i].missed,
480 479
 			"hits",   (unsigned int)shm_block->pool[i].hits
481 480
 		);
... ...
@@ -490,7 +490,7 @@ all:
490 490
 		main_b_frags+=shm_block->free_hash[r].no;
491 491
 	}
492 492
 	rpc->add(c, "{", &handle);
493
-	rpc->struct_add(handle, "dddddddddddddd",
493
+	rpc->struct_add(handle, "ddddddddddddd",
494 494
 		"max_frags      ", (unsigned int)max_frags,
495 495
 		"max_frags_pool ", max_frags_pool,
496 496
 		"max_frags_hash", max_frags_hash,
... ...
@@ -503,8 +503,7 @@ all:
503 503
 		"in_pools_frags ", (unsigned int)pool_frags,
504 504
 		"main_s_frags   ", (unsigned int)main_s_frags,
505 505
 		"main_b_frags   ", (unsigned int)main_b_frags,
506
-		"main_frags     ", (unsigned int)(main_b_frags+main_s_frags),
507
-		"main_bitmap    ", (unsigned int)shm_block->bitmap
506
+		"main_frags     ", (unsigned int)(main_b_frags+main_s_frags)
508 507
 	);
509 508
 }
510 509
 
... ...
@@ -546,6 +545,43 @@ static void core_tcpinfo(rpc_t* rpc, void* c)
546 546
 #endif
547 547
 }
548 548
 
549
+
550
+
551
+static const char* core_tcp_options_doc[] = {
552
+	"Returns active tcp options.",    /* Documentation string */
553
+	0                                 /* Method signature(s) */
554
+};
555
+
556
+static void core_tcp_options(rpc_t* rpc, void* c)
557
+{
558
+	void *handle;
559
+#ifdef USE_TCP
560
+	struct tcp_cfg_options t;
561
+
562
+	if (!tcp_disable){
563
+		tcp_options_get(&t);
564
+		rpc->add(c, "{", &handle);
565
+		rpc->struct_add(handle, "ddddddddd",
566
+			"fd_cache",		t.fd_cache,
567
+			"defer_accept",	t.defer_accept,
568
+			"delayed_ack",	t.delayed_ack,
569
+			"syncnt",		t.syncnt,
570
+			"linger2",		t.linger2,
571
+			"keepalive",	t.keepalive,
572
+			"keepidle",		t.keepidle,
573
+			"keepintvl",	t.keepintvl,
574
+			"keepcnt",		t.keepcnt
575
+		);
576
+	}else{
577
+		rpc->fault(c, 500, "tcp support disabled");
578
+	}
579
+#else
580
+	rpc->fault(c, 500, "tcp support not compiled");
581
+#endif
582
+}
583
+
584
+
585
+
549 586
 /*
550 587
  * RPC Methods exported by this module
551 588
  */
... ...
@@ -564,7 +600,8 @@ rpc_export_t core_rpc_methods[] = {
564 564
 #if defined(SF_MALLOC) || defined(LL_MALLOC)
565 565
 	{"core.sfmalloc",          core_sfmalloc,          core_sfmalloc_doc,   0},
566 566
 #endif
567
-	{"core.tcp_info",          core_tcpinfo,           core_tcpinfo_doc,          0	},
567
+	{"core.tcp_info",          core_tcpinfo,           core_tcpinfo_doc,    0},
568
+	{"core.tcp_options",       core_tcp_options,       core_tcp_options_doc,0},
568 569
 #ifdef USE_DNS_CACHE
569 570
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,     0	},
570 571
 	{"dns.debug",          dns_cache_debug,           dns_cache_debug_doc,        0	},
... ...
@@ -42,5 +42,4 @@ struct tcp_gen_info{
42 42
 
43 43
 void tcp_get_info(struct tcp_gen_info* ti);
44 44
 
45
-
46 45
 #endif
... ...
@@ -85,6 +85,8 @@
85 85
  *               io_watch_add-ing its fd - it's safer this way (andrei)
86 86
  *  2007-11-26  improved tcp timers: switched to local_timer (andrei)
87 87
  *  2007-11-27  added send fd cache and reader fd reuse (andrei)
88
+ *  2007-11-28  added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
89
+ *               KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
88 90
  */
89 91
 
90 92
 
... ...
@@ -142,6 +144,7 @@
142 142
 #endif
143 143
 
144 144
 #include "tcp_info.h"
145
+#include "tcp_options.h"
145 146
 
146 147
 #define local_malloc pkg_malloc
147 148
 #define local_free   pkg_free
... ...
@@ -178,8 +181,6 @@ enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
178 178
 				F_TCPCONN, F_TCPCHILD, F_PROC };
179 179
 
180 180
 
181
-#define TCP_FD_CACHE
182
-
183 181
 #ifdef TCP_FD_CACHE
184 182
 
185 183
 #define TCP_FD_CACHE_SIZE 8
... ...
@@ -270,6 +271,56 @@ int tcp_set_src_addr(struct ip_addr* ip)
270 270
 
271 271
 
272 272
 
273
+static inline int init_sock_keepalive(int s)
274
+{
275
+	int optval;
276
+	
277
+#ifdef HAVE_SO_KEEPALIVE
278
+	if (tcp_options.keepalive){
279
+		optval=1;
280
+		if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
281
+						sizeof(optval))<0){
282
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
283
+						" SO_KEEPALIVE: %s\n", strerror(errno));
284
+			return -1;
285
+		}
286
+	}
287
+#endif
288
+#ifdef HAVE_TCP_KEEPINTVL
289
+	if (tcp_options.keepintvl){
290
+		optval=tcp_options.keepintvl;
291
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
292
+						sizeof(optval))<0){
293
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
294
+						" keepalive probes interval: %s\n", strerror(errno));
295
+		}
296
+	}
297
+#endif
298
+#ifdef HAVE_TCP_KEEPIDLE
299
+	if (tcp_options.keepidle){
300
+		optval=tcp_options.keepidle;
301
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
302
+						sizeof(optval))<0){
303
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
304
+						" keepalive idle interval: %s\n", strerror(errno));
305
+		}
306
+	}
307
+#endif
308
+#ifdef HAVE_TCP_KEEPCNT
309
+	if (tcp_options.keepcnt){
310
+		optval=tcp_options.keepcnt;
311
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
312
+						sizeof(optval))<0){
313
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
314
+						" maximum keepalive count: %s\n", strerror(errno));
315
+		}
316
+	}
317
+#endif
318
+	return 0;
319
+}
320
+
321
+
322
+
273 323
 /* set all socket/fd options for new sockets (e.g. before connect): 
274 324
  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
275 325
  *
... ...
@@ -303,6 +354,37 @@ static int init_sock_opt(int s)
303 303
 		/* continue, not critical */
304 304
 	}
305 305
 #endif /* !TCP_DONT_REUSEADDR */
306
+#ifdef HAVE_TCP_SYNCNT
307
+	if (tcp_options.syncnt){
308
+		optval=tcp_options.syncnt;
309
+		if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
310
+						sizeof(optval))<0){
311
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
312
+						" maximum SYN retr. count: %s\n", strerror(errno));
313
+		}
314
+	}
315
+#endif
316
+#ifdef HAVE_TCP_LINGER2
317
+	if (tcp_options.linger2){
318
+		optval=tcp_options.linger2;
319
+		if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
320
+						sizeof(optval))<0){
321
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
322
+						" maximum LINGER2 timeout: %s\n", strerror(errno));
323
+		}
324
+	}
325
+#endif
326
+#ifdef HAVE_TCP_QUICKACK
327
+	if (tcp_options.delayed_ack){
328
+		optval=0; /* reset quick ack => delayed ack */
329
+		if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
330
+						sizeof(optval))<0){
331
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
332
+						" TCP_QUICKACK: %s\n", strerror(errno));
333
+		}
334
+	}
335
+#endif /* HAVE_TCP_QUICKACK */
336
+	init_sock_keepalive(s);
306 337
 	
307 338
 	/* non-blocking */
308 339
 	flags=fcntl(s, F_GETFL);
... ...
@@ -1130,7 +1212,8 @@ get_fd:
1130 1130
 			fd=c->fd;
1131 1131
 			do_close_fd=0; /* don't close the fd on exit, it's in use */
1132 1132
 #ifdef TCP_FD_CACHE
1133
-		}else if (likely((fd_cache_e=tcp_fd_cache_get(c))!=0)){
1133
+		}else if (likely(tcp_options.fd_cache && 
1134
+							((fd_cache_e=tcp_fd_cache_get(c))!=0))){
1134 1135
 			fd=fd_cache_e->fd;
1135 1136
 			do_close_fd=0;
1136 1137
 			DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
... ...
@@ -1213,7 +1296,7 @@ send_it:
1213 1213
 	}
1214 1214
 end:
1215 1215
 #ifdef TCP_FD_CACHE
1216
-	if (unlikely(fd_cache_e==0)){
1216
+	if (unlikely((fd_cache_e==0) && tcp_options.fd_cache)){
1217 1217
 		tcp_fd_cache_add(c, fd);
1218 1218
 	}else
1219 1219
 #endif /* TCP_FD_CACHE */
... ...
@@ -1229,6 +1312,9 @@ int tcp_init(struct socket_info* sock_info)
1229 1229
 {
1230 1230
 	union sockaddr_union* addr;
1231 1231
 	int optval;
1232
+#ifdef HAVE_TCP_ACCEPT_FILTER
1233
+	struct accept_filter_arg afa;
1234
+#endif /* HAVE_TCP_ACCEPT_FILTER */
1232 1235
 #ifdef DISABLE_NAGLE
1233 1236
 	int flag;
1234 1237
 	struct protoent* pe;
... ...
@@ -1291,6 +1377,52 @@ int tcp_init(struct socket_info* sock_info)
1291 1291
 		LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
1292 1292
 		/* continue since this is not critical */
1293 1293
 	}
1294
+#ifdef HAVE_TCP_DEFER_ACCEPT
1295
+	/* linux only */
1296
+	if (tcp_options.defer_accept){
1297
+		optval=tcp_options.defer_accept;
1298
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
1299
+					(void*)&optval, sizeof(optval)) ==-1){
1300
+			LOG(L_WARN, "WARNING: tcp_init: setsockopt TCP_DEFER_ACCEPT %s\n",
1301
+						strerror(errno));
1302
+		/* continue since this is not critical */
1303
+		}
1304
+	}
1305
+#endif /* HAVE_TCP_DEFFER_ACCEPT */
1306
+#ifdef HAVE_TCP_SYNCNT
1307
+	if (tcp_options.syncnt){
1308
+		optval=tcp_options.syncnt;
1309
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
1310
+						sizeof(optval))<0){
1311
+			LOG(L_WARN, "WARNING: tcp_init: failed to set"
1312
+						" maximum SYN retr. count: %s\n", strerror(errno));
1313
+		}
1314
+	}
1315
+#endif
1316
+#ifdef HAVE_TCP_ACCEPT_FILTER
1317
+	/* freebsd */
1318
+	if (tcp_options.defer_accept){
1319
+		memset(&afa, 0, sizeof(afa));
1320
+		strcpy(afa.af_name, "dataready");
1321
+		if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
1322
+					(void*)&afal, sizeof(afa)) ==-1){
1323
+			LOG(L_WARN, "WARNING: tcp_init: setsockopt SO_ACCEPTFILTER %s\n",
1324
+						strerror(errno));
1325
+		/* continue since this is not critical */
1326
+		}
1327
+	}
1328
+#endif /* HAVE_TCP_ACCEPT_FILTER */
1329
+#ifdef HAVE_TCP_LINGER2
1330
+	if (tcp_options.linger2){
1331
+		optval=tcp_options.linger2;
1332
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
1333
+						sizeof(optval))<0){
1334
+			LOG(L_WARN, "WARNING: tcp_init: failed to set"
1335
+						" maximum LINGER2 timeout: %s\n", strerror(errno));
1336
+		}
1337
+	}
1338
+#endif
1339
+	init_sock_keepalive(sock_info->socket);
1294 1340
 	if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
1295 1341
 		LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
1296 1342
 				sock_info->socket,  &addr->s, 
... ...
@@ -1347,7 +1479,7 @@ static void tcpconn_destroy(struct tcp_connection* tcpconn)
1347 1347
 #endif
1348 1348
 		_tcpconn_free(tcpconn);
1349 1349
 #ifdef TCP_FD_CACHE
1350
-		shutdown(fd, SHUT_RDWR);
1350
+		if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
1351 1351
 #endif /* TCP_FD_CACHE */
1352 1352
 		close(fd);
1353 1353
 		(*tcp_connections_no)--;
... ...
@@ -2060,7 +2192,7 @@ static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
2060 2060
 #endif /* USE_TLS */
2061 2061
 					_tcpconn_free(c);
2062 2062
 #ifdef TCP_FD_CACHE
2063
-					shutdown(fd, SHUT_RDWR);
2063
+					if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2064 2064
 #endif /* TCP_FD_CACHE */
2065 2065
 					close(fd);
2066 2066
 				}
... ...
@@ -2129,7 +2261,7 @@ static inline void tcpconn_destroy_all()
2129 2129
 				_tcpconn_rm(c);
2130 2130
 				if (fd>0) {
2131 2131
 #ifdef TCP_FD_CACHE
2132
-					shutdown(fd, SHUT_RDWR);
2132
+					if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2133 2133
 #endif /* TCP_FD_CACHE */
2134 2134
 					close(fd);
2135 2135
 				}
... ...
@@ -2172,7 +2304,7 @@ void tcp_main_loop()
2172 2172
 		goto error;
2173 2173
 	}
2174 2174
 #ifdef TCP_FD_CACHE
2175
-	tcp_fd_cache_init();
2175
+	if (tcp_options.fd_cache) tcp_fd_cache_init();
2176 2176
 #endif /* TCP_FD_CACHE */
2177 2177
 	
2178 2178
 	/* add all the sockets we listen on for connections */
... ...
@@ -2347,6 +2479,7 @@ int init_tcp()
2347 2347
 {
2348 2348
 	char* poll_err;
2349 2349
 	
2350
+	tcp_options_check();
2350 2351
 	/* init lock */
2351 2352
 	tcpconn_lock=lock_alloc();
2352 2353
 	if (tcpconn_lock==0){
2353 2354
new file mode 100644
... ...
@@ -0,0 +1,111 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2007 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/*
18
+ * tcp options
19
+ *
20
+ * History:
21
+ * --------
22
+ *  2007-11-28  created by andrei
23
+ */
24
+
25
+#include "tcp_options.h"
26
+#include "dprint.h"
27
+
28
+
29
+struct tcp_cfg_options tcp_options;
30
+
31
+
32
+/* set defaults */
33
+void init_tcp_options()
34
+{
35
+
36
+#ifdef TCP_FD_CACHE
37
+	tcp_options.fd_cache=1;
38
+#endif
39
+#ifdef HAVE_SO_KEEPALIVE
40
+	tcp_options.keepalive=1;
41
+#endif
42
+/*
43
+#if defined HAVE_TCP_DEFER_ACCEPT || defined HAVE_TCP_ACCEPT_FILTER
44
+	tcp_options.defer_accept=1;
45
+#endif
46
+*/
47
+#ifdef HAVE_TCP_QUICKACK
48
+	tcp_options.delayed_ack=1;
49
+#endif
50
+}
51
+
52
+
53
+
54
+#define W_OPT_NC(option) \
55
+	if (tcp_options.option){\
56
+		WARN("tcp_options: tcp_" ##option \
57
+				"cannot be enabled (recompile needed)\n"); \
58
+	}
59
+
60
+
61
+
62
+#define W_OPT_NS(option) \
63
+	if (tcp_options.option){\
64
+		WARN("tcp_options: tcp_" ##option \
65
+				"cannot be enabled (no OS support)\n"); \
66
+	}
67
+
68
+
69
+/* checks & warns if some tcp_option cannot be enabled */
70
+void tcp_options_check()
71
+{
72
+#ifndef TCP_FD_CACHE
73
+	W_OPT_NC(defer_accept);
74
+#endif
75
+
76
+#if ! defined HAVE_TCP_DEFER_ACCEPT && ! defined HAVE_TCP_ACCEPT_FILTER
77
+	W_OPT_NS(defer_accept);
78
+#endif
79
+#ifndef HAVE_TCP_SYNCNT
80
+	W_OPT_NS(syncnt);
81
+#endif
82
+#ifndef HAVE_TCP_LINGER2
83
+	W_OPT_NS(linger2);
84
+#endif
85
+#ifndef HAVE_TCP_KEEPINTVL
86
+	W_OPT_NS(keepintvl);
87
+#endif
88
+#ifndef HAVE_TCP_KEEPIDLE
89
+	W_OPT_NS(keepidle);
90
+#endif
91
+#ifndef HAVE_TCP_KEEPCNT
92
+	W_OPT_NS(keepcnt);
93
+#endif
94
+	if (tcp_options.keepintvl || tcp_options.keepidle || tcp_options.keepcnt){
95
+		tcp_options.keepalive=1; /* force on */
96
+	}
97
+#ifndef HAVE_SO_KEEPALIVE
98
+	W_OPT_NS(keepalive);
99
+#endif
100
+#ifndef HAVE_TCP_QUICKACK
101
+	W_OPT_NS(delayed_ack);
102
+#endif
103
+}
104
+
105
+
106
+
107
+void tcp_options_get(struct tcp_cfg_options* t)
108
+{
109
+	*t=tcp_options;
110
+}
0 111
new file mode 100644
... ...
@@ -0,0 +1,116 @@
0
+/* 
1
+ * $Id$
2
+ * 
3
+ * Copyright (C) 2007 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/*
18
+ * tcp options
19
+ *
20
+ * History:
21
+ * --------
22
+ *  2007-11-28  created by andrei
23
+ */
24
+
25
+#ifndef tcp_options_h
26
+#define tcp_options_h
27
+
28
+#ifndef NO_TCP_FD_CACHE
29
+#define TCP_FD_CACHE /* enable fd caching */
30
+#endif
31
+
32
+
33
+
34
+/* defer accept */
35
+#ifndef  NO_TCP_DEFER_ACCEPT
36
+#ifdef __OS_linux
37
+#define HAVE_TCP_DEFER_ACCEPT
38
+#elif define __OS_freebsd
39
+#define HAVE_TCP_ACCEPT_FILTER
40
+#endif /* __OS_ */
41
+#endif /* NO_TCP_DEFER_ACCEPT */
42
+
43
+
44
+/* syn count */
45
+#ifndef NO_TCP_SYNCNT
46
+#ifdef __OS_linux
47
+#define HAVE_TCP_SYNCNT
48
+#endif /* __OS_*/
49
+#endif /* NO_TCP_SYNCNT */
50
+
51
+/* tcp linger2 */
52
+#ifndef NO_TCP_LINGER2
53
+#ifdef __OS_linux
54
+#define HAVE_TCP_LINGER2
55
+#endif /* __OS_ */
56
+#endif /* NO_TCP_LINGER2 */
57
+
58
+/* keepalive */
59
+#ifndef NO_TCP_KEEPALIVE
60
+#define HAVE_SO_KEEPALIVE
61
+#endif /* NO_TCP_KEEPALIVE */
62
+
63
+/* keepintvl */
64
+#ifndef NO_TCP_KEEPINTVL
65
+#ifdef __OS_linux
66
+#define HAVE_TCP_KEEPINTVL
67
+#endif /* __OS_ */
68
+#endif /* NO_TCP_KEEPIDLE */
69
+
70
+/* keepidle */
71
+#ifndef NO_TCP_KEEPIDLE
72
+#ifdef __OS_linux
73
+#define HAVE_TCP_KEEPIDLE
74
+#endif /* __OS_*/
75
+#endif /* NO_TCP_KEEPIDLE */
76
+
77
+
78
+/* keepcnt */
79
+#ifndef NO_TCP_KEEPCNT
80
+#ifdef __OS_linux
81
+#define HAVE_TCP_KEEPCNT
82
+#endif /* __OS_ */
83
+#endif /* NO_TCP_KEEPCNT */
84
+
85
+
86
+/* delayed ack (quick_ack) */
87
+#ifndef NO_TCP_QUICKACK
88
+#ifdef __OS_linux
89
+#define HAVE_TCP_QUICKACK
90
+#endif /* __OS_ */
91
+#endif /* NO_TCP_QUICKACK */
92
+
93
+
94
+struct tcp_cfg_options{
95
+	/* ser tcp options */
96
+	int fd_cache; /* on /off */
97
+	/* tcp socket options */
98
+	int defer_accept; /* on / off */
99
+	int delayed_ack; /* delay ack on connect */ 
100
+	int syncnt;     /* numbers of SYNs retrs. before giving up connecting */
101
+	int linger2;    /* lifetime of orphaned  FIN_WAIT2 state sockets */
102
+	int keepalive;  /* on /off */
103
+	int keepidle;   /* idle time (s) before tcp starts sending keepalives */
104
+	int keepintvl;  /* interval between keep alives */
105
+	int keepcnt;    /* maximum no. of keepalives before giving up */
106
+};
107
+
108
+
109
+extern struct tcp_cfg_options tcp_options;
110
+
111
+void init_tcp_options();
112
+void tcp_options_check();
113
+void tcp_options_get(struct tcp_cfg_options* t);
114
+
115
+#endif /* tcp_options_h */