Browse code

- advanced tcp options support: - support for defering tcp accepts until some data is received (linux & freebsd), default off. See NEWS: tcp_defer_accept. - support for delaying the final ACK from the 3-way handshake until some data is sent (the ACK will come with the 1st data segment). Default on when supported (linux only). See NEWS: tcp_delayed_ack. - support for limiting the number of retransmitted SYNs (linux only, see NEW: tcp_syncnt) - support for limiting the lifetime of orphaned sockets in FIN_WAIT2 (linux only, see NEWS: tcp_linger2) - keepalive support, see NEWS: tcp_keepalive (default on), tcp_keepidle, tcp_keepintvl and tcp_keepcnt

- the FD cache can now be turned off from ser.cfg (see NEW: tcp_fd_cache)

Andrei Pelinescu-Onciul authored on 28/11/2007 19:20:47
Showing 8 changed files
... ...
@@ -8,6 +8,13 @@ $Id$
8 8
 2.1.0 changes
9 9
 
10 10
 modules:
11
+ - auth      - added extra authentication checks support, to protect
12
+               against various reply attacks.
13
+             - params:
14
+                       - auth_extra_checks - flags specifying which extra
15
+                          message part/parts will be checked for change before
16
+                          allowing nonce reuse. See the auth module docs for
17
+                          for more information (modules/auth/README).
11 18
  - blst      - new module containing script blacklist manipulations functions
12 19
                (the source of a message can be blacklisted, removed from the
13 20
                 blacklist or checked for presence in the blacklist).
... ...
@@ -95,6 +102,8 @@ modules:
95 102
                         - t_set_retr(t1, t2) - changes the retransmissions
96 103
                            intervals on the fly, on a per transaction basis.
97 104
 core:
105
+             - tcp improvements (better tcp timers, send fd cache, special
106
+                options support)
98 107
              - dns naptr support (see dns_try_naptr and dns_<proto>_pref)
99 108
              - dns srv based load balancing support (see dns_srv_lb)
100 109
              - support for locking ser's pages in memory, pre-mapping
... ...
@@ -107,6 +116,37 @@ core:
107 116
                between the short name and long name in cache as CNAME record
108 117
 
109 118
 new config variables:
119
+  tcp_fd_cache = yes | no (default yes) - if enabled FDs used for sending
120
+     will be cached inside the process calling tcp_send (performance increase
121
+     for sending over tcp at the cost of slightly slower connection closing and
122
+     extra FDs kept open)
123
+  tcp_defer_accept =  yes | no (default no) on freebsd  / number of seconds
124
+        before timeout on linux (default disabled) - tcp accepts will be 
125
+        delayed until some data is received (improves performance on proxies
126
+        with lots of opened tcp connections). See linux tcp(7) TCP_DEFER_ACCEPT
127
+        or freebsd ACCF_DATA(0). For now linux and freebsd only.
128
+        WARNING: the  linux TCP_DEFER_ACCEPT is buggy (<=2.6.23) and doesn't 
129
+         work exactly as expected (if no data is received it will retransmit 
130
+         syn acks for ~ 190 s, irrespective of the set timeout and then it will
131
+         silently drop the connection without sending a RST or FIN). Try to 
132
+         use it together with tcp_syncnt (this way the number of retrans.
133
+          SYNACKs can be limited => the timeout can be controlled in some way).
134
+  tcp_delayed_ack  = yes | no (default yes when supported) - initial ACK for
135
+        opened connections will be delayed and sent with the first data
136
+        segment (see linux tcp(7) TCP_QUICKACK). For now linux only.
137
+  tcp_syncnt = number of syn retr. (default not set) - number of SYN 
138
+        retransmissions before aborting a connect attempt (see linux tcp(7)
139
+        TCP_SYNCNT). Linux only.
140
+  tcp_linger2 = seconds (not set by default) - lifetime of orphaned sockets
141
+        in FIN_WAIT2 state (overrides tcp_fin_timeout on, see linux tcp(7) 
142
+        TCP_LINGER2). Linux only.
143
+  tcp_keepalive = yes | no (default yes) - enables keepalive for tcp.
144
+  tcp_keepidle  = seconds (not set by default) - time before starting to send
145
+         keepalives, if the connection is idle. Linux only.
146
+  tcp_keepintvl = seconds (not set by default) - time interval between 
147
+         keepalive probes, when the previous probe failed. Linux only.
148
+  tcp_keepcnt = number (not set by default) - number of keepalives sent before
149
+         dropping the connection. Linux only.
110 150
   pmtu_discovery = 0 | 1 (default 0) - set DF bit in outbound IP if enabled
111 151
   dns_srv_lb = yes | no (default no) - enable dns srv weight based load 
112 152
     balancing (see doc/dns.txt)
... ...
@@ -72,6 +72,8 @@
72 72
  *  2007-09-10  introduced phone2tel option which allows NOT to consider
73 73
  *              user=phone URIs as TEL URIs (jiri)
74 74
  *  2007-10-10  added DNS_SEARCH_FMATCH (mma)
75
+ *  2007-11-28  added TCP_OPT_{FD_CACHE, DEFER_ACCEPT, DELAYED_ACK, SYNCNT,
76
+ *              LINGER2, KEEPALIVE, KEEPIDLE, KEEPINTVL, KEEPCNT} (andrei)
75 77
 */
76 78
 
77 79
 
... ...
@@ -290,6 +292,15 @@ TCP_POLL_METHOD		"tcp_poll_method"
290 292
 TCP_MAX_CONNECTIONS	"tcp_max_connections"
291 293
 TCP_SOURCE_IPV4		"tcp_source_ipv4"
292 294
 TCP_SOURCE_IPV6		"tcp_source_ipv6"
295
+TCP_OPT_FD_CACHE	"tcp_fd_cache"
296
+TCP_OPT_DEFER_ACCEPT "tcp_defer_accept"
297
+TCP_OPT_DELAYED_ACK	"tcp_delayed_ack"
298
+TCP_OPT_SYNCNT		"tcp_syncnt"
299
+TCP_OPT_LINGER2		"tcp_linger2"
300
+TCP_OPT_KEEPALIVE	"tcp_keepalive"
301
+TCP_OPT_KEEPIDLE	"tcp_keepidle"
302
+TCP_OPT_KEEPINTVL	"tcp_keepintvl"
303
+TCP_OPT_KEEPCNT		"tcp_keepcnt"
293 304
 DISABLE_TLS		"disable_tls"|"tls_disable"
294 305
 ENABLE_TLS		"enable_tls"|"tls_enable"
295 306
 TLSLOG			"tlslog"|"tls_log"
... ...
@@ -548,6 +559,24 @@ EAT_ABLE	[\ \t\b\r]
548 559
 									return TCP_SOURCE_IPV4; }
549 560
 <INITIAL>{TCP_SOURCE_IPV6}		{ count(); yylval.strval=yytext;
550 561
 									return TCP_SOURCE_IPV6; }
562
+<INITIAL>{TCP_OPT_FD_CACHE}		{ count(); yylval.strval=yytext;
563
+									return TCP_OPT_FD_CACHE; }
564
+<INITIAL>{TCP_OPT_DEFER_ACCEPT}	{ count(); yylval.strval=yytext;
565
+									return TCP_OPT_DEFER_ACCEPT; }
566
+<INITIAL>{TCP_OPT_DELAYED_ACK}	{ count(); yylval.strval=yytext;
567
+									return TCP_OPT_DELAYED_ACK; }
568
+<INITIAL>{TCP_OPT_SYNCNT}		{ count(); yylval.strval=yytext;
569
+									return TCP_OPT_SYNCNT; }
570
+<INITIAL>{TCP_OPT_LINGER2}		{ count(); yylval.strval=yytext;
571
+									return TCP_OPT_LINGER2; }
572
+<INITIAL>{TCP_OPT_KEEPALIVE}	{ count(); yylval.strval=yytext;
573
+									return TCP_OPT_KEEPALIVE; }
574
+<INITIAL>{TCP_OPT_KEEPIDLE}		{ count(); yylval.strval=yytext;
575
+									return TCP_OPT_KEEPIDLE; }
576
+<INITIAL>{TCP_OPT_KEEPINTVL}	{ count(); yylval.strval=yytext;
577
+									return TCP_OPT_KEEPINTVL; }
578
+<INITIAL>{TCP_OPT_KEEPCNT}	{ count(); yylval.strval=yytext;
579
+									return TCP_OPT_KEEPCNT; }
551 580
 <INITIAL>{DISABLE_TLS}	{ count(); yylval.strval=yytext; return DISABLE_TLS; }
552 581
 <INITIAL>{ENABLE_TLS}	{ count(); yylval.strval=yytext; return ENABLE_TLS; }
553 582
 <INITIAL>{TLSLOG}		{ count(); yylval.strval=yytext; return TLS_PORT_NO; }
... ...
@@ -85,6 +85,8 @@
85 85
  * 2007-09-10  introduced phone2tel option which allows NOT to consider
86 86
  *             user=phone URIs as TEL URIs (jiri)
87 87
  * 2007-10-10  added DNS_SEARCH_FMATCH (mma)
88
+ * 2007-11-28  added TCP_OPT_{FD_CACHE, DEFER_ACCEPT, DELAYED_ACK, SYNCNT,
89
+ *              LINGER2, KEEPALIVE, KEEPIDLE, KEEPINTVL, KEEPCNT} (andrei)
88 90
 */
89 91
 
90 92
 %{
... ...
@@ -112,6 +114,7 @@
112 114
 #include "select.h"
113 115
 #include "flags.h"
114 116
 #include "tcp_init.h"
117
+#include "tcp_options.h"
115 118
 
116 119
 #include "config.h"
117 120
 #ifdef CORE_TLS
... ...
@@ -330,6 +333,15 @@ static struct socket_id* mk_listen_id(char*, int, int);
330 333
 %token TCP_MAX_CONNECTIONS
331 334
 %token TCP_SOURCE_IPV4
332 335
 %token TCP_SOURCE_IPV6
336
+%token TCP_OPT_FD_CACHE
337
+%token TCP_OPT_DEFER_ACCEPT
338
+%token TCP_OPT_DELAYED_ACK
339
+%token TCP_OPT_SYNCNT
340
+%token TCP_OPT_LINGER2
341
+%token TCP_OPT_KEEPALIVE
342
+%token TCP_OPT_KEEPIDLE
343
+%token TCP_OPT_KEEPINTVL
344
+%token TCP_OPT_KEEPCNT
333 345
 %token DISABLE_TLS
334 346
 %token ENABLE_TLS
335 347
 %token TLSLOG
... ...
@@ -783,6 +795,78 @@ assign_stm:
783 795
 		pkg_free($3);
784 796
 	}
785 797
 	| TCP_SOURCE_IPV6 EQUAL error { yyerror("IPv6 address expected"); }
798
+	| TCP_OPT_FD_CACHE EQUAL NUMBER {
799
+		#ifdef USE_TCP
800
+			tcp_options.fd_cache=$3;
801
+		#else
802
+			warn("tcp support not compiled in");
803
+		#endif
804
+	}
805
+	| TCP_OPT_FD_CACHE EQUAL error { yyerror("boolean value expected"); }
806
+	| TCP_OPT_DEFER_ACCEPT EQUAL NUMBER {
807
+		#ifdef USE_TCP
808
+			tcp_options.defer_accept=$3;
809
+		#else
810
+			warn("tcp support not compiled in");
811
+		#endif
812
+	}
813
+	| TCP_OPT_DEFER_ACCEPT EQUAL error { yyerror("boolean value expected"); }
814
+	| TCP_OPT_DELAYED_ACK EQUAL NUMBER {
815
+		#ifdef USE_TCP
816
+			tcp_options.delayed_ack=$3;
817
+		#else
818
+			warn("tcp support not compiled in");
819
+		#endif
820
+	}
821
+	| TCP_OPT_DELAYED_ACK EQUAL error { yyerror("boolean value expected"); }
822
+	| TCP_OPT_SYNCNT EQUAL NUMBER {
823
+		#ifdef USE_TCP
824
+			tcp_options.syncnt=$3;
825
+		#else
826
+			warn("tcp support not compiled in");
827
+		#endif
828
+	}
829
+	| TCP_OPT_SYNCNT EQUAL error { yyerror("number expected"); }
830
+	| TCP_OPT_LINGER2 EQUAL NUMBER {
831
+		#ifdef USE_TCP
832
+			tcp_options.linger2=$3;
833
+		#else
834
+			warn("tcp support not compiled in");
835
+		#endif
836
+	}
837
+	| TCP_OPT_LINGER2 EQUAL error { yyerror("number expected"); }
838
+	| TCP_OPT_KEEPALIVE EQUAL NUMBER {
839
+		#ifdef USE_TCP
840
+			tcp_options.keepalive=$3;
841
+		#else
842
+			warn("tcp support not compiled in");
843
+		#endif
844
+	}
845
+	| TCP_OPT_KEEPALIVE EQUAL error { yyerror("boolean value expected");}
846
+	| TCP_OPT_KEEPIDLE EQUAL NUMBER {
847
+		#ifdef USE_TCP
848
+			tcp_options.keepidle=$3;
849
+		#else
850
+			warn("tcp support not compiled in");
851
+		#endif
852
+	}
853
+	| TCP_OPT_KEEPIDLE EQUAL error { yyerror("number expected"); }
854
+	| TCP_OPT_KEEPINTVL EQUAL NUMBER {
855
+		#ifdef USE_TCP
856
+			tcp_options.keepintvl=$3;
857
+		#else
858
+			warn("tcp support not compiled in");
859
+		#endif
860
+	}
861
+	| TCP_OPT_KEEPINTVL EQUAL error { yyerror("number expected"); }
862
+	| TCP_OPT_KEEPCNT EQUAL NUMBER {
863
+		#ifdef USE_TCP
864
+			tcp_options.keepcnt=$3;
865
+		#else
866
+			warn("tcp support not compiled in");
867
+		#endif
868
+	}
869
+	| TCP_OPT_KEEPCNT EQUAL error { yyerror("number expected"); }
786 870
 	| DISABLE_TLS EQUAL NUMBER {
787 871
 		#ifdef USE_TLS
788 872
 			tls_disable=$3;
... ...
@@ -37,6 +37,7 @@
37 37
 #include "pt.h"
38 38
 #include "ut.h"
39 39
 #include "tcp_info.h"
40
+#include "tcp_options.h"
40 41
 #include "core_cmd.h"
41 42
 
42 43
 #ifdef USE_DNS_CACHE
... ...
@@ -470,12 +471,11 @@ all:
470 471
 			}
471 472
 		}
472 473
 		rpc->add(c, "{", &handle);
473
-		rpc->struct_add(handle, "ddddddd",
474
+		rpc->struct_add(handle, "dddddd",
474 475
 			"pool  ", i,
475 476
 			"frags ", (unsigned int)frags,
476 477
 			"t. misses", (unsigned int)misses,
477 478
 			"mem   ", (unsigned int)mem,
478
-			"bitmap", (unsigned int)shm_block->pool[i].bitmap,
479 479
 			"missed", (unsigned int)shm_block->pool[i].missed,
480 480
 			"hits",   (unsigned int)shm_block->pool[i].hits
481 481
 		);
... ...
@@ -490,7 +490,7 @@ all:
490 490
 		main_b_frags+=shm_block->free_hash[r].no;
491 491
 	}
492 492
 	rpc->add(c, "{", &handle);
493
-	rpc->struct_add(handle, "dddddddddddddd",
493
+	rpc->struct_add(handle, "ddddddddddddd",
494 494
 		"max_frags      ", (unsigned int)max_frags,
495 495
 		"max_frags_pool ", max_frags_pool,
496 496
 		"max_frags_hash", max_frags_hash,
... ...
@@ -503,8 +503,7 @@ all:
503 503
 		"in_pools_frags ", (unsigned int)pool_frags,
504 504
 		"main_s_frags   ", (unsigned int)main_s_frags,
505 505
 		"main_b_frags   ", (unsigned int)main_b_frags,
506
-		"main_frags     ", (unsigned int)(main_b_frags+main_s_frags),
507
-		"main_bitmap    ", (unsigned int)shm_block->bitmap
506
+		"main_frags     ", (unsigned int)(main_b_frags+main_s_frags)
508 507
 	);
509 508
 }
510 509
 
... ...
@@ -546,6 +545,43 @@ static void core_tcpinfo(rpc_t* rpc, void* c)
546 545
 #endif
547 546
 }
548 547
 
548
+
549
+
550
+static const char* core_tcp_options_doc[] = {
551
+	"Returns active tcp options.",    /* Documentation string */
552
+	0                                 /* Method signature(s) */
553
+};
554
+
555
+static void core_tcp_options(rpc_t* rpc, void* c)
556
+{
557
+	void *handle;
558
+#ifdef USE_TCP
559
+	struct tcp_cfg_options t;
560
+
561
+	if (!tcp_disable){
562
+		tcp_options_get(&t);
563
+		rpc->add(c, "{", &handle);
564
+		rpc->struct_add(handle, "ddddddddd",
565
+			"fd_cache",		t.fd_cache,
566
+			"defer_accept",	t.defer_accept,
567
+			"delayed_ack",	t.delayed_ack,
568
+			"syncnt",		t.syncnt,
569
+			"linger2",		t.linger2,
570
+			"keepalive",	t.keepalive,
571
+			"keepidle",		t.keepidle,
572
+			"keepintvl",	t.keepintvl,
573
+			"keepcnt",		t.keepcnt
574
+		);
575
+	}else{
576
+		rpc->fault(c, 500, "tcp support disabled");
577
+	}
578
+#else
579
+	rpc->fault(c, 500, "tcp support not compiled");
580
+#endif
581
+}
582
+
583
+
584
+
549 585
 /*
550 586
  * RPC Methods exported by this module
551 587
  */
... ...
@@ -564,7 +600,8 @@ rpc_export_t core_rpc_methods[] = {
564 600
 #if defined(SF_MALLOC) || defined(LL_MALLOC)
565 601
 	{"core.sfmalloc",          core_sfmalloc,          core_sfmalloc_doc,   0},
566 602
 #endif
567
-	{"core.tcp_info",          core_tcpinfo,           core_tcpinfo_doc,          0	},
603
+	{"core.tcp_info",          core_tcpinfo,           core_tcpinfo_doc,    0},
604
+	{"core.tcp_options",       core_tcp_options,       core_tcp_options_doc,0},
568 605
 #ifdef USE_DNS_CACHE
569 606
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,     0	},
570 607
 	{"dns.debug",          dns_cache_debug,           dns_cache_debug_doc,        0	},
... ...
@@ -42,5 +42,4 @@ struct tcp_gen_info{
42 42
 
43 43
 void tcp_get_info(struct tcp_gen_info* ti);
44 44
 
45
-
46 45
 #endif
... ...
@@ -85,6 +85,8 @@
85 85
  *               io_watch_add-ing its fd - it's safer this way (andrei)
86 86
  *  2007-11-26  improved tcp timers: switched to local_timer (andrei)
87 87
  *  2007-11-27  added send fd cache and reader fd reuse (andrei)
88
+ *  2007-11-28  added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
89
+ *               KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
88 90
  */
89 91
 
90 92
 
... ...
@@ -142,6 +144,7 @@
142 144
 #endif
143 145
 
144 146
 #include "tcp_info.h"
147
+#include "tcp_options.h"
145 148
 
146 149
 #define local_malloc pkg_malloc
147 150
 #define local_free   pkg_free
... ...
@@ -178,8 +181,6 @@ enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
178 181
 				F_TCPCONN, F_TCPCHILD, F_PROC };
179 182
 
180 183
 
181
-#define TCP_FD_CACHE
182
-
183 184
 #ifdef TCP_FD_CACHE
184 185
 
185 186
 #define TCP_FD_CACHE_SIZE 8
... ...
@@ -270,6 +271,56 @@ int tcp_set_src_addr(struct ip_addr* ip)
270 271
 
271 272
 
272 273
 
274
+static inline int init_sock_keepalive(int s)
275
+{
276
+	int optval;
277
+	
278
+#ifdef HAVE_SO_KEEPALIVE
279
+	if (tcp_options.keepalive){
280
+		optval=1;
281
+		if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
282
+						sizeof(optval))<0){
283
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
284
+						" SO_KEEPALIVE: %s\n", strerror(errno));
285
+			return -1;
286
+		}
287
+	}
288
+#endif
289
+#ifdef HAVE_TCP_KEEPINTVL
290
+	if (tcp_options.keepintvl){
291
+		optval=tcp_options.keepintvl;
292
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
293
+						sizeof(optval))<0){
294
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
295
+						" keepalive probes interval: %s\n", strerror(errno));
296
+		}
297
+	}
298
+#endif
299
+#ifdef HAVE_TCP_KEEPIDLE
300
+	if (tcp_options.keepidle){
301
+		optval=tcp_options.keepidle;
302
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
303
+						sizeof(optval))<0){
304
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
305
+						" keepalive idle interval: %s\n", strerror(errno));
306
+		}
307
+	}
308
+#endif
309
+#ifdef HAVE_TCP_KEEPCNT
310
+	if (tcp_options.keepcnt){
311
+		optval=tcp_options.keepcnt;
312
+		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
313
+						sizeof(optval))<0){
314
+			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
315
+						" maximum keepalive count: %s\n", strerror(errno));
316
+		}
317
+	}
318
+#endif
319
+	return 0;
320
+}
321
+
322
+
323
+
273 324
 /* set all socket/fd options for new sockets (e.g. before connect): 
274 325
  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
275 326
  *
... ...
@@ -303,6 +354,37 @@ static int init_sock_opt(int s)
303 354
 		/* continue, not critical */
304 355
 	}
305 356
 #endif /* !TCP_DONT_REUSEADDR */
357
+#ifdef HAVE_TCP_SYNCNT
358
+	if (tcp_options.syncnt){
359
+		optval=tcp_options.syncnt;
360
+		if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
361
+						sizeof(optval))<0){
362
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
363
+						" maximum SYN retr. count: %s\n", strerror(errno));
364
+		}
365
+	}
366
+#endif
367
+#ifdef HAVE_TCP_LINGER2
368
+	if (tcp_options.linger2){
369
+		optval=tcp_options.linger2;
370
+		if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
371
+						sizeof(optval))<0){
372
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
373
+						" maximum LINGER2 timeout: %s\n", strerror(errno));
374
+		}
375
+	}
376
+#endif
377
+#ifdef HAVE_TCP_QUICKACK
378
+	if (tcp_options.delayed_ack){
379
+		optval=0; /* reset quick ack => delayed ack */
380
+		if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
381
+						sizeof(optval))<0){
382
+			LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
383
+						" TCP_QUICKACK: %s\n", strerror(errno));
384
+		}
385
+	}
386
+#endif /* HAVE_TCP_QUICKACK */
387
+	init_sock_keepalive(s);
306 388
 	
307 389
 	/* non-blocking */
308 390
 	flags=fcntl(s, F_GETFL);
... ...
@@ -1130,7 +1212,8 @@ get_fd:
1130 1212
 			fd=c->fd;
1131 1213
 			do_close_fd=0; /* don't close the fd on exit, it's in use */
1132 1214
 #ifdef TCP_FD_CACHE
1133
-		}else if (likely((fd_cache_e=tcp_fd_cache_get(c))!=0)){
1215
+		}else if (likely(tcp_options.fd_cache && 
1216
+							((fd_cache_e=tcp_fd_cache_get(c))!=0))){
1134 1217
 			fd=fd_cache_e->fd;
1135 1218
 			do_close_fd=0;
1136 1219
 			DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
... ...
@@ -1213,7 +1296,7 @@ send_it:
1213 1296
 	}
1214 1297
 end:
1215 1298
 #ifdef TCP_FD_CACHE
1216
-	if (unlikely(fd_cache_e==0)){
1299
+	if (unlikely((fd_cache_e==0) && tcp_options.fd_cache)){
1217 1300
 		tcp_fd_cache_add(c, fd);
1218 1301
 	}else
1219 1302
 #endif /* TCP_FD_CACHE */
... ...
@@ -1229,6 +1312,9 @@ int tcp_init(struct socket_info* sock_info)
1229 1312
 {
1230 1313
 	union sockaddr_union* addr;
1231 1314
 	int optval;
1315
+#ifdef HAVE_TCP_ACCEPT_FILTER
1316
+	struct accept_filter_arg afa;
1317
+#endif /* HAVE_TCP_ACCEPT_FILTER */
1232 1318
 #ifdef DISABLE_NAGLE
1233 1319
 	int flag;
1234 1320
 	struct protoent* pe;
... ...
@@ -1291,6 +1377,52 @@ int tcp_init(struct socket_info* sock_info)
1291 1377
 		LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
1292 1378
 		/* continue since this is not critical */
1293 1379
 	}
1380
+#ifdef HAVE_TCP_DEFER_ACCEPT
1381
+	/* linux only */
1382
+	if (tcp_options.defer_accept){
1383
+		optval=tcp_options.defer_accept;
1384
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
1385
+					(void*)&optval, sizeof(optval)) ==-1){
1386
+			LOG(L_WARN, "WARNING: tcp_init: setsockopt TCP_DEFER_ACCEPT %s\n",
1387
+						strerror(errno));
1388
+		/* continue since this is not critical */
1389
+		}
1390
+	}
1391
+#endif /* HAVE_TCP_DEFFER_ACCEPT */
1392
+#ifdef HAVE_TCP_SYNCNT
1393
+	if (tcp_options.syncnt){
1394
+		optval=tcp_options.syncnt;
1395
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
1396
+						sizeof(optval))<0){
1397
+			LOG(L_WARN, "WARNING: tcp_init: failed to set"
1398
+						" maximum SYN retr. count: %s\n", strerror(errno));
1399
+		}
1400
+	}
1401
+#endif
1402
+#ifdef HAVE_TCP_ACCEPT_FILTER
1403
+	/* freebsd */
1404
+	if (tcp_options.defer_accept){
1405
+		memset(&afa, 0, sizeof(afa));
1406
+		strcpy(afa.af_name, "dataready");
1407
+		if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
1408
+					(void*)&afal, sizeof(afa)) ==-1){
1409
+			LOG(L_WARN, "WARNING: tcp_init: setsockopt SO_ACCEPTFILTER %s\n",
1410
+						strerror(errno));
1411
+		/* continue since this is not critical */
1412
+		}
1413
+	}
1414
+#endif /* HAVE_TCP_ACCEPT_FILTER */
1415
+#ifdef HAVE_TCP_LINGER2
1416
+	if (tcp_options.linger2){
1417
+		optval=tcp_options.linger2;
1418
+		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
1419
+						sizeof(optval))<0){
1420
+			LOG(L_WARN, "WARNING: tcp_init: failed to set"
1421
+						" maximum LINGER2 timeout: %s\n", strerror(errno));
1422
+		}
1423
+	}
1424
+#endif
1425
+	init_sock_keepalive(sock_info->socket);
1294 1426
 	if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
1295 1427
 		LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
1296 1428
 				sock_info->socket,  &addr->s, 
... ...
@@ -1347,7 +1479,7 @@ static void tcpconn_destroy(struct tcp_connection* tcpconn)
1347 1479
 #endif
1348 1480
 		_tcpconn_free(tcpconn);
1349 1481
 #ifdef TCP_FD_CACHE
1350
-		shutdown(fd, SHUT_RDWR);
1482
+		if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
1351 1483
 #endif /* TCP_FD_CACHE */
1352 1484
 		close(fd);
1353 1485
 		(*tcp_connections_no)--;
... ...
@@ -2060,7 +2192,7 @@ static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
2060 2192
 #endif /* USE_TLS */
2061 2193
 					_tcpconn_free(c);
2062 2194
 #ifdef TCP_FD_CACHE
2063
-					shutdown(fd, SHUT_RDWR);
2195
+					if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2064 2196
 #endif /* TCP_FD_CACHE */
2065 2197
 					close(fd);
2066 2198
 				}
... ...
@@ -2129,7 +2261,7 @@ static inline void tcpconn_destroy_all()
2129 2261
 				_tcpconn_rm(c);
2130 2262
 				if (fd>0) {
2131 2263
 #ifdef TCP_FD_CACHE
2132
-					shutdown(fd, SHUT_RDWR);
2264
+					if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2133 2265
 #endif /* TCP_FD_CACHE */
2134 2266
 					close(fd);
2135 2267
 				}
... ...
@@ -2172,7 +2304,7 @@ void tcp_main_loop()
2172 2304
 		goto error;
2173 2305
 	}
2174 2306
 #ifdef TCP_FD_CACHE
2175
-	tcp_fd_cache_init();
2307
+	if (tcp_options.fd_cache) tcp_fd_cache_init();
2176 2308
 #endif /* TCP_FD_CACHE */
2177 2309
 	
2178 2310
 	/* add all the sockets we listen on for connections */
... ...
@@ -2347,6 +2479,7 @@ int init_tcp()
2347 2479
 {
2348 2480
 	char* poll_err;
2349 2481
 	
2482
+	tcp_options_check();
2350 2483
 	/* init lock */
2351 2484
 	tcpconn_lock=lock_alloc();
2352 2485
 	if (tcpconn_lock==0){
2353 2486
new file mode 100644
... ...
@@ -0,0 +1,111 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2007 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/*
19
+ * tcp options
20
+ *
21
+ * History:
22
+ * --------
23
+ *  2007-11-28  created by andrei
24
+ */
25
+
26
+#include "tcp_options.h"
27
+#include "dprint.h"
28
+
29
+
30
+struct tcp_cfg_options tcp_options;
31
+
32
+
33
+/* set defaults */
34
+void init_tcp_options()
35
+{
36
+
37
+#ifdef TCP_FD_CACHE
38
+	tcp_options.fd_cache=1;
39
+#endif
40
+#ifdef HAVE_SO_KEEPALIVE
41
+	tcp_options.keepalive=1;
42
+#endif
43
+/*
44
+#if defined HAVE_TCP_DEFER_ACCEPT || defined HAVE_TCP_ACCEPT_FILTER
45
+	tcp_options.defer_accept=1;
46
+#endif
47
+*/
48
+#ifdef HAVE_TCP_QUICKACK
49
+	tcp_options.delayed_ack=1;
50
+#endif
51
+}
52
+
53
+
54
+
55
+#define W_OPT_NC(option) \
56
+	if (tcp_options.option){\
57
+		WARN("tcp_options: tcp_" ##option \
58
+				"cannot be enabled (recompile needed)\n"); \
59
+	}
60
+
61
+
62
+
63
+#define W_OPT_NS(option) \
64
+	if (tcp_options.option){\
65
+		WARN("tcp_options: tcp_" ##option \
66
+				"cannot be enabled (no OS support)\n"); \
67
+	}
68
+
69
+
70
+/* checks & warns if some tcp_option cannot be enabled */
71
+void tcp_options_check()
72
+{
73
+#ifndef TCP_FD_CACHE
74
+	W_OPT_NC(defer_accept);
75
+#endif
76
+
77
+#if ! defined HAVE_TCP_DEFER_ACCEPT && ! defined HAVE_TCP_ACCEPT_FILTER
78
+	W_OPT_NS(defer_accept);
79
+#endif
80
+#ifndef HAVE_TCP_SYNCNT
81
+	W_OPT_NS(syncnt);
82
+#endif
83
+#ifndef HAVE_TCP_LINGER2
84
+	W_OPT_NS(linger2);
85
+#endif
86
+#ifndef HAVE_TCP_KEEPINTVL
87
+	W_OPT_NS(keepintvl);
88
+#endif
89
+#ifndef HAVE_TCP_KEEPIDLE
90
+	W_OPT_NS(keepidle);
91
+#endif
92
+#ifndef HAVE_TCP_KEEPCNT
93
+	W_OPT_NS(keepcnt);
94
+#endif
95
+	if (tcp_options.keepintvl || tcp_options.keepidle || tcp_options.keepcnt){
96
+		tcp_options.keepalive=1; /* force on */
97
+	}
98
+#ifndef HAVE_SO_KEEPALIVE
99
+	W_OPT_NS(keepalive);
100
+#endif
101
+#ifndef HAVE_TCP_QUICKACK
102
+	W_OPT_NS(delayed_ack);
103
+#endif
104
+}
105
+
106
+
107
+
108
+void tcp_options_get(struct tcp_cfg_options* t)
109
+{
110
+	*t=tcp_options;
111
+}
0 112
new file mode 100644
... ...
@@ -0,0 +1,116 @@
1
+/* 
2
+ * $Id$
3
+ * 
4
+ * Copyright (C) 2007 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/*
19
+ * tcp options
20
+ *
21
+ * History:
22
+ * --------
23
+ *  2007-11-28  created by andrei
24
+ */
25
+
26
+#ifndef tcp_options_h
27
+#define tcp_options_h
28
+
29
+#ifndef NO_TCP_FD_CACHE
30
+#define TCP_FD_CACHE /* enable fd caching */
31
+#endif
32
+
33
+
34
+
35
+/* defer accept */
36
+#ifndef  NO_TCP_DEFER_ACCEPT
37
+#ifdef __OS_linux
38
+#define HAVE_TCP_DEFER_ACCEPT
39
+#elif define __OS_freebsd
40
+#define HAVE_TCP_ACCEPT_FILTER
41
+#endif /* __OS_ */
42
+#endif /* NO_TCP_DEFER_ACCEPT */
43
+
44
+
45
+/* syn count */
46
+#ifndef NO_TCP_SYNCNT
47
+#ifdef __OS_linux
48
+#define HAVE_TCP_SYNCNT
49
+#endif /* __OS_*/
50
+#endif /* NO_TCP_SYNCNT */
51
+
52
+/* tcp linger2 */
53
+#ifndef NO_TCP_LINGER2
54
+#ifdef __OS_linux
55
+#define HAVE_TCP_LINGER2
56
+#endif /* __OS_ */
57
+#endif /* NO_TCP_LINGER2 */
58
+
59
+/* keepalive */
60
+#ifndef NO_TCP_KEEPALIVE
61
+#define HAVE_SO_KEEPALIVE
62
+#endif /* NO_TCP_KEEPALIVE */
63
+
64
+/* keepintvl */
65
+#ifndef NO_TCP_KEEPINTVL
66
+#ifdef __OS_linux
67
+#define HAVE_TCP_KEEPINTVL
68
+#endif /* __OS_ */
69
+#endif /* NO_TCP_KEEPIDLE */
70
+
71
+/* keepidle */
72
+#ifndef NO_TCP_KEEPIDLE
73
+#ifdef __OS_linux
74
+#define HAVE_TCP_KEEPIDLE
75
+#endif /* __OS_*/
76
+#endif /* NO_TCP_KEEPIDLE */
77
+
78
+
79
+/* keepcnt */
80
+#ifndef NO_TCP_KEEPCNT
81
+#ifdef __OS_linux
82
+#define HAVE_TCP_KEEPCNT
83
+#endif /* __OS_ */
84
+#endif /* NO_TCP_KEEPCNT */
85
+
86
+
87
+/* delayed ack (quick_ack) */
88
+#ifndef NO_TCP_QUICKACK
89
+#ifdef __OS_linux
90
+#define HAVE_TCP_QUICKACK
91
+#endif /* __OS_ */
92
+#endif /* NO_TCP_QUICKACK */
93
+
94
+
95
+struct tcp_cfg_options{
96
+	/* ser tcp options */
97
+	int fd_cache; /* on /off */
98
+	/* tcp socket options */
99
+	int defer_accept; /* on / off */
100
+	int delayed_ack; /* delay ack on connect */ 
101
+	int syncnt;     /* numbers of SYNs retrs. before giving up connecting */
102
+	int linger2;    /* lifetime of orphaned  FIN_WAIT2 state sockets */
103
+	int keepalive;  /* on /off */
104
+	int keepidle;   /* idle time (s) before tcp starts sending keepalives */
105
+	int keepintvl;  /* interval between keep alives */
106
+	int keepcnt;    /* maximum no. of keepalives before giving up */
107
+};
108
+
109
+
110
+extern struct tcp_cfg_options tcp_options;
111
+
112
+void init_tcp_options();
113
+void tcp_options_check();
114
+void tcp_options_get(struct tcp_cfg_options* t);
115
+
116
+#endif /* tcp_options_h */