Browse code

tcp: config option for the async write block size

- the block size used for the async writes can now be configured
both from ser.cfg (tcp_wq_blk_size) and at runtime. This value
has only a little performance impact and only when writes are
delayed. Small values are safer (big values on proxies that
open thousands of connections over slow links would eat up a
lot of memory). For now it's main use is debugging.

Andrei Pelinescu-Onciul authored on 09/03/2009 13:45:49
Showing 7 changed files
... ...
@@ -236,6 +236,20 @@ core:
236 236
                between the short name and long name in cache as CNAME record
237 237
 
238 238
 new config variables:
239
+  tcp_rd_buf_size = buffer size used for tcp reads.
240
+                    A high buffer size increases performance on server with few
241
+                    connections and lot of traffic on them, but also increases
242
+                     memory consumption (so for lots of connection is better 
243
+                    to use a low value). Note also that this value limits the
244
+                    maximum datagram size that can be received over tcp.
245
+                    Default: 4096, can be changed at runtime.
246
+  tcp_wq_blk_size = block size used for tcp async writes. It should be big
247
+                    enough to hold a few datagrams. If it's smaller then a
248
+                    datagram (in fact a tcp write()) size, it will be rounded
249
+                    up. It has no influenced on the number of datagrams 
250
+                    queued (for that see tcp_conn_wq_max or tcp_wq_max).
251
+                    It has mostly debugging and testing value (can be ignored).
252
+                    Default: 2100 (~ 2 INVITEs), can be changed at runtime.
239 253
   tcp_no_connect = yes/no - disable connects, ser will only accept new 
240 254
                      connections, it will never try to open new ones.
241 255
                      Default: no, can be changed at runtime.
... ...
@@ -305,6 +305,8 @@ TCP_OPT_FD_CACHE	"tcp_fd_cache"
305 305
 TCP_OPT_BUF_WRITE	"tcp_buf_write"|"tcp_async"
306 306
 TCP_OPT_CONN_WQ_MAX	"tcp_conn_wq_max"
307 307
 TCP_OPT_WQ_MAX		"tcp_wq_max"
308
+TCP_OPT_RD_BUF		"tcp_rd_buf_size"
309
+TCP_OPT_WQ_BLK		"tcp_wq_blk_size"
308 310
 TCP_OPT_DEFER_ACCEPT "tcp_defer_accept"
309 311
 TCP_OPT_DELAYED_ACK	"tcp_delayed_ack"
310 312
 TCP_OPT_SYNCNT		"tcp_syncnt"
... ...
@@ -605,6 +607,10 @@ EAT_ABLE	[\ \t\b\r]
605 605
 									return TCP_OPT_CONN_WQ_MAX; }
606 606
 <INITIAL>{TCP_OPT_WQ_MAX}	{ count(); yylval.strval=yytext;
607 607
 									return TCP_OPT_WQ_MAX; }
608
+<INITIAL>{TCP_OPT_RD_BUF}	{ count(); yylval.strval=yytext;
609
+									return TCP_OPT_RD_BUF; }
610
+<INITIAL>{TCP_OPT_WQ_BLK}	{ count(); yylval.strval=yytext;
611
+									return TCP_OPT_WQ_BLK; }
608 612
 <INITIAL>{TCP_OPT_BUF_WRITE}	{ count(); yylval.strval=yytext;
609 613
 									return TCP_OPT_BUF_WRITE; }
610 614
 <INITIAL>{TCP_OPT_DEFER_ACCEPT}	{ count(); yylval.strval=yytext;
... ...
@@ -359,6 +359,8 @@ static void free_socket_id_lst(struct socket_id* i);
359 359
 %token TCP_OPT_BUF_WRITE
360 360
 %token TCP_OPT_CONN_WQ_MAX
361 361
 %token TCP_OPT_WQ_MAX
362
+%token TCP_OPT_RD_BUF
363
+%token TCP_OPT_WQ_BLK
362 364
 %token TCP_OPT_DEFER_ACCEPT
363 365
 %token TCP_OPT_DELAYED_ACK
364 366
 %token TCP_OPT_SYNCNT
... ...
@@ -907,7 +909,23 @@ assign_stm:
907 907
 			warn("tcp support not compiled in");
908 908
 		#endif
909 909
 	}
910
-	| TCP_OPT_WQ_MAX error { yyerror("boolean value expected"); }
910
+	| TCP_OPT_WQ_MAX error { yyerror("number expected"); }
911
+	| TCP_OPT_RD_BUF EQUAL NUMBER {
912
+		#ifdef USE_TCP
913
+			tcp_default_cfg.rd_buf_size=$3;
914
+		#else
915
+			warn("tcp support not compiled in");
916
+		#endif
917
+	}
918
+	| TCP_OPT_RD_BUF error { yyerror("number expected"); }
919
+	| TCP_OPT_WQ_BLK EQUAL NUMBER {
920
+		#ifdef USE_TCP
921
+			tcp_default_cfg.wq_blk_size=$3;
922
+		#else
923
+			warn("tcp support not compiled in");
924
+		#endif
925
+	}
926
+	| TCP_OPT_WQ_BLK error { yyerror("number expected"); }
911 927
 	| TCP_OPT_DEFER_ACCEPT EQUAL NUMBER {
912 928
 		#ifdef USE_TCP
913 929
 			tcp_default_cfg.defer_accept=$3;
... ...
@@ -39,6 +39,10 @@
39 39
 										  time, timeout */
40 40
 #define DEFAULT_TCP_MAX_CONNECTIONS 2048 /* maximum connections */
41 41
 
42
+#define DEFAULT_TCP_BUF_SIZE	4096  /* buffer size used for reads */
43
+
44
+#define DEFAULT_TCP_WBUF_SIZE	2100 /*  after debugging switch to 4-16k */
45
+
42 46
 struct tcp_child{
43 47
 	pid_t pid;
44 48
 	int proc_no; /* ser proc_no, for debugging */
... ...
@@ -208,7 +208,6 @@
208 208
 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
209 209
 
210 210
 #ifdef TCP_ASYNC
211
-#define TCP_WBUF_SIZE	1024 /* FIXME: after debugging switch to 16-32k */
212 211
 static unsigned int* tcp_total_wq=0;
213 212
 #endif
214 213
 
... ...
@@ -642,7 +641,7 @@ inline static int _wbufq_add(struct  tcp_connection* c, char* data,
642 642
 	}
643 643
 	
644 644
 	if (unlikely(q->last==0)){
645
-		wb_size=MAX_unsigned(TCP_WBUF_SIZE, size);
645
+		wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
646 646
 		wb=shm_malloc(sizeof(*wb)+wb_size-1);
647 647
 		if (unlikely(wb==0))
648 648
 			goto error;
... ...
@@ -663,7 +662,7 @@ inline static int _wbufq_add(struct  tcp_connection* c, char* data,
663 663
 	while(size){
664 664
 		last_free=wb->b_size-q->last_used;
665 665
 		if (last_free==0){
666
-			wb_size=MAX_unsigned(TCP_WBUF_SIZE, size);
666
+			wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
667 667
 			wb=shm_malloc(sizeof(*wb)+wb_size-1);
668 668
 			if (unlikely(wb==0))
669 669
 				goto error;
... ...
@@ -926,13 +925,15 @@ struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
926 926
 									int state)
927 927
 {
928 928
 	struct tcp_connection *c;
929
+	int rd_b_size;
929 930
 	
930
-	c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
931
+	rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
932
+	c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
931 933
 	if (c==0){
932 934
 		LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
933 935
 		goto error;
934 936
 	}
935
-	memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
937
+	memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
936 938
 	c->s=sock;
937 939
 	c->fd=-1; /* not initialized */
938 940
 	if (lock_init(&c->write_lock)==0){
... ...
@@ -956,7 +957,7 @@ struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
956 956
 	}
957 957
 	print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
958 958
 	DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
959
-	init_tcp_req(&c->req);
959
+	init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
960 960
 	c->id=(*connection_id)++;
961 961
 	c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
962 962
 	c->rcv.proto_reserved2=0;
... ...
@@ -133,6 +133,10 @@ static cfg_def_t tcp_cfg_def[] = {
133 133
 	{ "wq_timeout_ticks",   CFG_VAR_INT | CFG_READONLY, 0,
134 134
 									MAX_TCP_CON_LIFETIME,         0,         0,
135 135
 		"internal send_timeout value in ticks, used in async. mode"},
136
+	{ "rd_buf_size", CFG_VAR_INT | CFG_ATOMIC,    512,    65536,  0,         0,
137
+		"internal read buffer size (should be > max. expected datagram)"},
138
+	{ "wq_blk_size", CFG_VAR_INT | CFG_ATOMIC,    1,    65535,  0,         0,
139
+		"internal async write block size (debugging use only for now)"},
136 140
 	{0, 0, 0, 0, 0, 0, 0}
137 141
 };
138 142
 
... ...
@@ -175,6 +179,8 @@ void init_tcp_options()
175 175
 	tcp_default_cfg.alias_flags=TCP_ALIAS_FORCE_ADD;
176 176
 	/* flags used for adding the default aliases of a new tcp connection */
177 177
 	tcp_default_cfg.new_conn_alias_flags=TCP_ALIAS_REPLACE;
178
+	tcp_default_cfg.rd_buf_size=DEFAULT_TCP_BUF_SIZE;
179
+	tcp_default_cfg.wq_blk_size=DEFAULT_TCP_WBUF_SIZE;
178 180
 }
179 181
 
180 182
 
... ...
@@ -261,6 +267,39 @@ static int fix_max_conns(void* cfg_h, str* name, void** val)
261 261
 
262 262
 
263 263
 
264
+/** fix *val according to the cfg entry "name".
265
+ * (*val must be integer)
266
+ * 1. check if *val is between name min..max and if not change it to
267
+ *    the corresp. value
268
+ * 2. call fixup callback if defined in the cfg
269
+ * @return 0 on success
270
+ */
271
+static int tcp_cfg_def_fix(char* name, int* val)
272
+{
273
+	cfg_def_t* c;
274
+	str s;
275
+	
276
+	for (c=&tcp_cfg_def[0]; c->name; c++){
277
+		if (strcmp(name, c->name)==0){
278
+			/* found */
279
+			if ((c->type & CFG_VAR_INT)  && (c->min || c->max)){
280
+				if (*val < c->min) *val=c->min;
281
+				else if (*val > c->max) *val=c->max;
282
+				if (c->on_change_cb){
283
+					s.s=c->name;
284
+					s.len=strlen(s.s);
285
+					return c->on_change_cb(&tcp_default_cfg, &s, (void*)val);
286
+				}
287
+			}
288
+			return 0;
289
+		}
290
+	}
291
+	WARN("tcp config option \"%s\" not found\n", name);
292
+	return -1; /* not found */
293
+}
294
+
295
+
296
+
264 297
 /* checks & warns if some tcp_option cannot be enabled */
265 298
 void tcp_options_check()
266 299
 {
... ...
@@ -324,6 +363,9 @@ void tcp_options_check()
324 324
 	tcp_default_cfg.tcp_wq_timeout=S_TO_TICKS(tcp_default_cfg.send_timeout_s);
325 325
 #endif /* TCP_ASYNC */
326 326
 	tcp_default_cfg.max_connections=tcp_max_connections;
327
+	tcp_cfg_def_fix("rd_buf_size", (int*)&tcp_default_cfg.rd_buf_size);
328
+	tcp_cfg_def_fix("wq_blk_size", (int*)&tcp_default_cfg.wq_blk_size);
329
+	
327 330
 }
328 331
 
329 332
 
... ...
@@ -140,6 +140,8 @@ struct cfg_group_tcp{
140 140
 	int new_conn_alias_flags;
141 141
 	/* internal, "fixed" vars */
142 142
 	unsigned int tcp_wq_timeout; /* in ticks, timeout for queued writes */
143
+	unsigned int rd_buf_size; /* read buffer size (should be > max. datagram)*/
144
+	unsigned int wq_blk_size; /* async write block size (debugging use) */
143 145
 };
144 146
 
145 147
 extern struct cfg_group_tcp tcp_default_cfg;