Browse code

Merge remote branch 'origin/andrei/raw_sock'

Raw socket support for sending UDP IPv4 packets
(major performance increase on multi-cpu machines running linux:
40-50% faster at least in stateless mode).

* origin/andrei/raw_sock:
NEWS: notes about the new udp4_raw mode
raw sockets: added info rpc
core: compile raw socket support by default on freebsd
raw sockets: freebsd support
cfg: delay cfg_shmize to just before forking
core: always compile the raw sockets code on linux
raw sockets: ttl can be set from the config file
raw sockets: ttl can be set or auto-detected
core: include raw socket support in version info
raw sockets: use BSD ip & udp structure versions
raw sockets: config file support
raw sockets: udp send will use now raw sockets if enabled
raw sockets: runtime config support
raw sockets: build ip header & fragmentation support
raw socket: compilation fixes
raw sockets: get dst. ip from the ip header
core: basic support for receiving udp sip packets on raw sockets
core: basic raw socket support functions

Andrei Pelinescu-Onciul authored on 11/08/2010 20:49:04
Showing 17 changed files
... ...
@@ -1649,7 +1649,7 @@ ifeq ($(OS), linux)
1649 1649
 	use_futex= yes
1650 1650
 	C_DEFS+=-DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN -DHAVE_SCHED_YIELD \
1651 1651
 			-DHAVE_MSG_NOSIGNAL -DHAVE_MSGHDR_MSG_CONTROL -DHAVE_ALLOCA_H \
1652
-			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER
1652
+			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER -DUSE_RAW_SOCKS
1653 1653
 	ifneq ($(found_lock_method), yes)
1654 1654
 		#C_DEFS+= -DUSE_POSIX_SEM
1655 1655
 		C_DEFS+=-DUSE_PTHREAD_MUTEX
... ...
@@ -1768,7 +1768,7 @@ ifeq ($(OS), freebsd)
1768 1768
 	C_DEFS+=-DHAVE_SOCKADDR_SA_LEN -DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN \
1769 1769
 		-DHAVE_SCHED_YIELD -DHAVE_MSGHDR_MSG_CONTROL \
1770 1770
 		-DHAVE_CONNECT_ECONNRESET_BUG -DHAVE_TIMEGM \
1771
-		-DHAVE_NETINET_IN_SYSTM
1771
+		-DHAVE_NETINET_IN_SYSTM -DUSE_RAW_SOCKS
1772 1772
 	ifneq ($(found_lock_method), yes)
1773 1773
 		C_DEFS+= -DUSE_PTHREAD_MUTEX  # try pthread sems
1774 1774
 		found_lock_method=yes
... ...
@@ -21,6 +21,19 @@ core:
21 21
             t_set_fr($foo) (equivalent now with t_set_fr("$foo")).
22 22
   - all the module functions can now be called with any constant expression
23 23
       as parameters. E.g.: f("7 *" +" 6 = " + 7 * 6);
24
+  - major performance increase on Linux multi-cpu machines that send a lot
25
+     of UDP IPv4 packets (40-50% faster in stateless mode). For it to work
26
+     udp4_raw must be enabled or set into auto mode in sr.cfg and sr must be
27
+     started as root or with CAP_NET_RAW. Note that even if udp4_raw is
28
+     off (default), if sr was started with enough privileges, it can be
29
+     enabled at runtime.
30
+     The support for using raw sockets is also available on FreeBSD (compiled
31
+     by default but not tested for performance yet), NetBSD, OpenBSD and
32
+     Darwin (not tested and not compiled by default, needs
33
+     make cfg extra_defs=-DUSE_RAW_SOCKS). To check if the support is
34
+     compiled, use ser -V |grep --color RAW_SOCKS or for a running
35
+     ser: sercmd core.udp4_raw_info.
36
+     See udp4_raw, udp4_raw_mtu and udp4_raw_ttl below.
24 37
   - onreply_route {...} is now equivalent with onreply_route[0] {...}
25 38
   - global, per protocol blacklist ignore masks (via extended send_flags).
26 39
     See dst_blacklist_udp_imask a.s.o (dst_blacklist_*_imask).
... ...
@@ -29,6 +42,28 @@ core:
29 42
   - support for permanent entries in the DNS cache.
30 43
 
31 44
 new config variables:
45
+  - udp4_raw - enables raw socket support for sending UDP IPv4 datagrams 
46
+      (40-50% performance increase on linux multi-cpu).
47
+      Possible values: 0 - disabled (default), 1 - enabled, -1 auto.
48
+      In "auto" mode it will be enabled if possible (sr started as root or
49
+      with CAP_NET_RAW).
50
+      udp4_raw can be used on Linux and FreeBSD. For other BSDs and Darwin
51
+      one must compile with -DUSE_RAW_SOCKS.
52
+      On Linux one should also set udp4_raw_mtu if the MTU on any network
53
+      interface that could be used for sending is smaller then 1500.
54
+      Can be set at runtime as long as sr was started with enough privileges
55
+      (core.udp4_raw).
56
+  - udp4_raw_mtu - MTU value used for UDP IPv4 packets when udp4_raw is
57
+      enabled.  It should be set to the minimum MTU of all the network
58
+      interfaces that could be used for sending. The default value is 1500.
59
+      Note that on BSDs it does not need to be set (if set it will be ignored,
60
+      the proper MTU will be used automatically by the kernel). On Linux it
61
+      should be set.
62
+      Can be set at runtime (core.udp4_raw_mtu).
63
+  - udp4_raw_ttl - TTL value used for UDP IPv4 packets when udp4_raw is
64
+      enabled. By default it is set to auto mode (-1), meaning that the
65
+      same TTL will be used as for normal UDP sockets.
66
+      Can be set at runtime (core.udp4_raw_ttl).
32 67
   - dst_blacklist_udp_imask - global blacklist events ignore mask for udp
33 68
     (a blacklist event/reason set in this variable will be ignored when 
34 69
     deciding whether or not to blacklist an udp destination). Can be set
... ...
@@ -211,6 +211,9 @@ ADD_LOCAL_RPORT		"add_local_rport"
211 211
 FORCE_TCP_ALIAS		"force_tcp_alias"|"add_tcp_alias"
212 212
 UDP_MTU		"udp_mtu"
213 213
 UDP_MTU_TRY_PROTO	"udp_mtu_try_proto"
214
+UDP4_RAW		"udp4_raw"
215
+UDP4_RAW_MTU	"udp4_raw_mtu"
216
+UDP4_RAW_TTL	"udp4_raw_ttl"
214 217
 SETFLAG		setflag
215 218
 RESETFLAG	resetflag
216 219
 ISFLAGSET	isflagset
... ...
@@ -605,6 +608,9 @@ SUBST       subst
605 608
 <INITIAL>{UDP_MTU}	{ count(); yylval.strval=yytext; return UDP_MTU; }
606 609
 <INITIAL>{UDP_MTU_TRY_PROTO}	{ count(); yylval.strval=yytext;
607 610
 									return UDP_MTU_TRY_PROTO; }
611
+<INITIAL>{UDP4_RAW}	{ count(); yylval.strval=yytext; return UDP4_RAW; }
612
+<INITIAL>{UDP4_RAW_MTU}	{ count(); yylval.strval=yytext; return UDP4_RAW_MTU; }
613
+<INITIAL>{UDP4_RAW_TTL}	{ count(); yylval.strval=yytext; return UDP4_RAW_TTL; }
608 614
 <INITIAL>{IF}	{ count(); yylval.strval=yytext; return IF; }
609 615
 <INITIAL>{ELSE}	{ count(); yylval.strval=yytext; return ELSE; }
610 616
 
... ...
@@ -200,6 +200,12 @@
200 200
 	#define IF_SCTP(x) warn("sctp support not compiled in")
201 201
 #endif
202 202
 
203
+#ifdef USE_RAW_SOCKS
204
+	#define IF_RAW_SOCKS(x) x
205
+#else
206
+	#define IF_RAW_SOCKS(x) warn("raw socket support not compiled in")
207
+#endif
208
+
203 209
 
204 210
 extern int yylex();
205 211
 /* safer then using yytext which can be array or pointer */
... ...
@@ -325,6 +331,9 @@ extern char *finame;
325 331
 %token FORCE_TCP_ALIAS
326 332
 %token UDP_MTU
327 333
 %token UDP_MTU_TRY_PROTO
334
+%token UDP4_RAW
335
+%token UDP4_RAW_MTU
336
+%token UDP4_RAW_TTL
328 337
 %token IF
329 338
 %token ELSE
330 339
 %token SET_ADV_ADDRESS
... ...
@@ -1581,6 +1590,16 @@ assign_stm:
1581 1590
 		{ default_core_cfg.udp_mtu_try_proto=$3; fix_global_req_flags(0, 0); }
1582 1591
 	| UDP_MTU_TRY_PROTO EQUAL error
1583 1592
 		{ yyerror("TCP, TLS, SCTP or UDP expected"); }
1593
+	| UDP4_RAW EQUAL intno { IF_RAW_SOCKS(default_core_cfg.udp4_raw=$3); }
1594
+	| UDP4_RAW EQUAL error { yyerror("number expected"); }
1595
+	| UDP4_RAW_MTU EQUAL NUMBER {
1596
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_mtu=$3);
1597
+	}
1598
+	| UDP4_RAW_MTU EQUAL error { yyerror("number expected"); }
1599
+	| UDP4_RAW_TTL EQUAL NUMBER {
1600
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_ttl=$3);
1601
+	}
1602
+	| UDP4_RAW_TTL EQUAL error { yyerror("number expected"); }
1584 1603
 	| cfg_var
1585 1604
 	| error EQUAL { yyerror("unknown config variable"); }
1586 1605
 	;
... ...
@@ -24,16 +24,15 @@
24 24
  *  2007-12-03	Initial version (Miklos)
25 25
  *  2008-01-31  added DNS resolver parameters (Miklos)
26 26
  */
27
-/*!
28
- * \file
29
- * \brief SIP-router core ::  Core configuration parser
30
- * \ingroup core
31
- * Module: \ref core
27
+/** core runtime config.
28
+ * @file cfg_core.c
29
+ * @ingroup core
30
+ * Module: @ref core
32 31
  *
33
- * See 
34
- * - \ref ConfigCoreDoc
35
- * - \ref ConfigEngine
36
- * - \ref cfg_core.h
32
+ * See
33
+ * - @ref ConfigCoreDoc
34
+ * - @ref ConfigEngine
35
+ * - @ref cfg_core.h
37 36
  */
38 37
 /*!
39 38
  * \page ConfigCoreDoc Documentation of configuration parser
... ...
@@ -57,6 +56,8 @@
57 56
 #include "pt.h"
58 57
 #endif
59 58
 #include "msg_translator.h" /* fix_global_req_flags() */
59
+#include "globals.h"
60
+#include "sock_ut.h"
60 61
 #include "cfg/cfg.h"
61 62
 #include "cfg_core.h"
62 63
 
... ...
@@ -113,6 +114,9 @@ struct cfg_group_core default_core_cfg = {
113 114
 	DEFAULT_MAX_WHILE_LOOPS, /*!< max_while_loops */
114 115
 	0, /*!< udp_mtu (disabled by default) */
115 116
 	0, /*!< udp_mtu_try_proto -> default disabled */
117
+	0, /**< udp4_raw (disabled by default) */
118
+	1500, /**< udp4_raw_mtu (1500 by default) */
119
+	-1,  /**< udp4_raw_ttl (auto detect by default) */
116 120
 	0,  /*!< force_rport */
117 121
 	L_DBG, /*!< memlog */
118 122
 	3 /*!< mem_summary -flags: 0 off, 1 pkg_status, 2 shm_status,
... ...
@@ -121,6 +125,57 @@ struct cfg_group_core default_core_cfg = {
121 125
 
122 126
 void	*core_cfg = &default_core_cfg;
123 127
 
128
+
129
+static int check_raw_sock_support(void* cfg_h, str* gname, str* name,
130
+									void** v)
131
+{
132
+	int val;
133
+	
134
+	val = (int)(long)(*v);
135
+#ifndef USE_RAW_SOCKS
136
+	if (val > 0) {
137
+		ERR("no RAW_SOCKS support, please recompile with it enabled\n");
138
+		return -1;
139
+	}
140
+	return 0;
141
+#else /* USE_RAW_SOCKS */
142
+	if (raw_udp4_send_sock < 0) {
143
+		if (val > 0) {
144
+			ERR("could not intialize raw socket on startup, please "
145
+					"restart as root or with CAP_NET_RAW\n");
146
+			return -1;
147
+		} else if (val < 0) {
148
+			/* auto and no socket => disable */
149
+			*v = (void*)(long)0;
150
+		}
151
+	} else if (val < 0) {
152
+		/* auto and socket => enable */
153
+		*v = (void*)(long)1;
154
+	}
155
+	return 0;
156
+#endif /* USE_RAW_SOCKS */
157
+}
158
+
159
+
160
+
161
+static int  udp4_raw_ttl_fixup(void* cfg_h, str* gname, str* name, void** val)
162
+{
163
+	int v;
164
+	v = (int)(long)(*val);
165
+	if (v < 0) {
166
+		if (sendipv4)
167
+			v = sock_get_ttl(sendipv4->socket);
168
+	}
169
+	if (v < 0) {
170
+		/* some error => use a reasonable default */
171
+		v = 63;
172
+	}
173
+	*val = (void*)(long)v;
174
+	return 0;
175
+}
176
+
177
+
178
+
124 179
 cfg_def_t core_cfg_def[] = {
125 180
 	{"debug",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
126 181
 		"debug level"},
... ...
@@ -179,7 +234,8 @@ cfg_def_t core_cfg_def[] = {
179 234
 	{"dns_search_full_match",	CFG_VAR_INT,	0, 1, 0, 0,
180 235
 		"enable/disable domain name checks against the search list "
181 236
 		"in DNS answers"},
182
-	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup, resolv_reinit,
237
+	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup,
238
+		resolv_reinit,
183 239
 		"set to 1 in order to reinitialize the DNS resolver"},
184 240
 	/* DNS cache */
185 241
 #ifdef USE_DNS_CACHE
... ...
@@ -230,6 +286,16 @@ cfg_def_t core_cfg_def[] = {
230 286
 			" exceeds udp_mtu"},
231 287
 	{"udp_mtu_try_proto", CFG_VAR_INT, 1, 4, 0, fix_global_req_flags,
232 288
 		"if send size > udp_mtu use proto (1 udp, 2 tcp, 3 tls, 4 sctp)"},
289
+	{"udp4_raw", CFG_VAR_INT | CFG_ATOMIC, -1, 1, check_raw_sock_support, 0,
290
+		"enable/disable using a raw socket for sending UDP IPV4 packets."
291
+		" Should be  faster on multi-CPU linux running machines."},
292
+	{"udp4_raw_mtu", CFG_VAR_INT | CFG_ATOMIC, 28, 65535, 0, 0,
293
+		"set the MTU used when using raw sockets for udp sending."
294
+		" This  value will be used when deciding whether or not to fragment"
295
+		" the packets."},
296
+	{"udp4_raw_ttl", CFG_VAR_INT | CFG_ATOMIC, -1, 255, udp4_raw_ttl_fixup, 0,
297
+		"set the IP TTL used when using raw sockets for udp sending."
298
+		" -1 will use the same value as for normal udp sockets."},
233 299
 	{"force_rport",     CFG_VAR_INT, 0, 1,  0, fix_global_req_flags,
234 300
 		"force rport for all the received messages" },
235 301
 	{"memlog",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
... ...
@@ -36,12 +36,11 @@
36 36
  * -------
37 37
  *  2007-12-03	Initial version (Miklos)
38 38
  */
39
-/*!
40
- * \file
41
- * \brief SIP-router core :: Core configuration
42
- * \ingroup core
39
+/** core runtime config.
40
+ * @file cfg_core.h
41
+ * @ingroup core
43 42
  *
44
- * Module: \ref core
43
+ * Module: @ref core
45 44
  */
46 45
 
47 46
 
... ...
@@ -103,6 +102,9 @@ struct cfg_group_core {
103 102
 	int max_while_loops;
104 103
 	int udp_mtu; /*!< maximum send size for udp, if > try another protocol*/
105 104
 	int udp_mtu_try_proto; /*!< if packet> udp_mtu, try proto (e.g. TCP) */
105
+	int udp4_raw; /* use raw sockets for sending on udp ipv 4 */
106
+	int udp4_raw_mtu; /* mtu used when using udp raw socket */
107
+	int udp4_raw_ttl; /* ttl used when using udp raw sockets */
106 108
 	int force_rport; /*!< if set rport will always be forced*/
107 109
 	int memlog; /*!< log level for memory status/summary info */
108 110
 	int mem_summary; /*!< display memory status/summary info on exit */
... ...
@@ -42,6 +42,7 @@
42 42
 #include "tcp_info.h"
43 43
 #include "tcp_options.h"
44 44
 #include "core_cmd.h"
45
+#include "cfg_core.h"
45 46
 #ifdef USE_SCTP
46 47
 #include "sctp_options.h"
47 48
 #include "sctp_server.h"
... ...
@@ -843,6 +844,30 @@ static void core_sctpinfo(rpc_t* rpc, void* c)
843 844
 
844 845
 
845 846
 
847
+
848
+static const char* core_udp4rawinfo_doc[] = {
849
+	"Returns udp4_raw related info.",    /* Documentation string */
850
+	0                                     /* Method signature(s) */
851
+};
852
+
853
+static void core_udp4rawinfo(rpc_t* rpc, void* c)
854
+{
855
+#ifdef USE_RAW_SOCKS
856
+	void *handle;
857
+
858
+	rpc->add(c, "{", &handle);
859
+	rpc->struct_add(handle, "ddd",
860
+		"udp4_raw", cfg_get(core, core_cfg, udp4_raw),
861
+		"udp4_raw_mtu", cfg_get(core, core_cfg, udp4_raw_mtu),
862
+		"udp4_raw_ttl", cfg_get(core, core_cfg, udp4_raw_ttl)
863
+	);
864
+#else /* USE_RAW_SOCKS */
865
+	rpc->fault(c, 500, "udp4_raw mode support not compiled");
866
+#endif /* USE_RAW_SOCKS */
867
+}
868
+
869
+
870
+
846 871
 /*
847 872
  * RPC Methods exported by this module
848 873
  */
... ...
@@ -876,6 +901,8 @@ static rpc_export_t core_rpc_methods[] = {
876 901
 	{"core.sctp_options",      core_sctp_options,      core_sctp_options_doc,
877 902
 		0},
878 903
 	{"core.sctp_info",         core_sctpinfo,          core_sctpinfo_doc,   0},
904
+	{"core.udp4_raw_info",     core_udp4rawinfo,       core_udp4rawinfo_doc,
905
+		0},
879 906
 #ifdef USE_DNS_CACHE
880 907
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,
881 908
 		0	},
... ...
@@ -64,6 +64,10 @@ extern struct socket_info* bind_address; /* pointer to the crt. proc.
64 64
 extern struct socket_info* sendipv4; /* ipv4 socket to use when msg.
65 65
 										comes from ipv6*/
66 66
 extern struct socket_info* sendipv6; /* same as above for ipv6 */
67
+#ifdef USE_RAW_SOCKS
68
+extern int raw_udp4_send_sock;
69
+#endif /* USE_RAW_SOCKS */
70
+
67 71
 #ifdef USE_TCP
68 72
 extern struct socket_info* sendipv4_tcp; /* ipv4 socket to use when msg.
69 73
 										comes from ipv6*/
... ...
@@ -73,9 +73,9 @@
73 73
  * 2008-08-08  sctp support (andrei)
74 74
  * 2008-08-19  -l support for mmultihomed addresses/addresses lists
75 75
  *                (e.g. -l (eth0, 1.2.3.4, foo.bar) ) (andrei)
76
- *  2010-04-19 added daemon_status_fd pipe to communicate the parent process
77
- *             with the main process in daemonize mode, so the parent process
78
- *             can return the proper exit status code (ibc)
76
+ * 2010-04-19  added daemon_status_fd pipe to communicate the parent process
77
+ *              with the main process in daemonize mode, so the parent process
78
+ *              can return the proper exit status code (ibc)
79 79
  */
80 80
 
81 81
 /** main file (init, daemonize, startup) 
... ...
@@ -145,6 +145,9 @@
145 145
 #include "nonsip_hooks.h"
146 146
 #include "ut.h"
147 147
 #include "signals.h"
148
+#ifdef USE_RAW_SOCKS
149
+#include "raw_sock.h"
150
+#endif /* USE_RAW_SOCKS */
148 151
 #ifdef USE_TCP
149 152
 #include "poll_types.h"
150 153
 #include "tcp_init.h"
... ...
@@ -185,6 +188,7 @@
185 188
 #include "pvapi_init.h" /* init */
186 189
 #include "pv_core.h" /* register core pvars */
187 190
 #include "ppcfg.h"
191
+#include "sock_ut.h"
188 192
 
189 193
 #ifdef DEBUG_DMALLOC
190 194
 #include <dmalloc.h>
... ...
@@ -437,6 +441,9 @@ struct socket_info* bind_address=0; /* pointer to the crt. proc.
437 441
 									 listening address*/
438 442
 struct socket_info* sendipv4; /* ipv4 socket to use when msg. comes from ipv6*/
439 443
 struct socket_info* sendipv6; /* same as above for ipv6 */
444
+#ifdef USE_RAW_SOCKS
445
+int raw_udp4_send_sock = -1; /* raw socket used for sending udp4 packets */
446
+#endif /* USE_RAW_SOCKS */
440 447
 #ifdef USE_TCP
441 448
 struct socket_info* sendipv4_tcp;
442 449
 struct socket_info* sendipv6_tcp;
... ...
@@ -1236,15 +1243,57 @@ int main_loop()
1236 1243
 		/* only one address, we ignore all the others */
1237 1244
 		if (udp_init(udp_listen)==-1) goto error;
1238 1245
 		bind_address=udp_listen;
1239
-		if (bind_address->address.af==AF_INET)
1246
+		if (bind_address->address.af==AF_INET) {
1240 1247
 			sendipv4=bind_address;
1241
-		else
1248
+#ifdef USE_RAW_SOCKS
1249
+		/* always try to have a raw socket opened if we are using ipv4 */
1250
+		raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
1251
+		if (raw_udp4_send_sock < 0) {
1252
+			if ( default_core_cfg.udp4_raw > 0) {
1253
+				/* force use raw socket failed */
1254
+				ERR("could not initialize raw udp send socket (ipv4):"
1255
+						" %s (%d)\n", strerror(errno), errno);
1256
+				if (errno == EPERM)
1257
+					ERR("could not initialize raw socket on startup"
1258
+						" due to inadequate permissions, please"
1259
+						" restart as root or with CAP_NET_RAW\n");
1260
+				goto error;
1261
+			}
1262
+			default_core_cfg.udp4_raw = 0; /* disabled */
1263
+		} else {
1264
+			register_fds(1);
1265
+			if (default_core_cfg.udp4_raw < 0) {
1266
+				/* auto-detect => use it */
1267
+				default_core_cfg.udp4_raw = 1; /* enabled */
1268
+				DBG("raw socket possible => turning it on\n");
1269
+			}
1270
+			if (default_core_cfg.udp4_raw_ttl < 0) {
1271
+				/* auto-detect */
1272
+				default_core_cfg.udp4_raw_ttl = sock_get_ttl(sendipv4->socket);
1273
+				if (default_core_cfg.udp4_raw_ttl < 0)
1274
+					/* error, use some default value */
1275
+					default_core_cfg.udp4_raw_ttl = 63;
1276
+			}
1277
+		}
1278
+#else
1279
+		default_core.cfg.udp4_raw = 0;
1280
+#endif /* USE_RAW_SOCKS */
1281
+		} else
1242 1282
 			sendipv6=bind_address;
1243 1283
 		if (udp_listen->next){
1244 1284
 			LOG(L_WARN, "WARNING: using only the first listen address"
1245 1285
 						" (no fork)\n");
1246 1286
 		}
1247 1287
 
1288
+		/* delay cfg_shmize to the last moment (it must be called _before_
1289
+		   forking). Changes to default cfgs after this point will be
1290
+		   ignored.
1291
+		*/
1292
+		if (cfg_shmize() < 0) {
1293
+			LOG(L_CRIT, "could not initialize shared configuration\n");
1294
+			goto error;
1295
+		}
1296
+	
1248 1297
 		/* Register the children that will keep updating their
1249 1298
 		 * local configuration */
1250 1299
 		cfg_register_child(
... ...
@@ -1363,6 +1412,42 @@ int main_loop()
1363 1412
 			/* children_no per each socket */
1364 1413
 			cfg_register_child(children_no);
1365 1414
 		}
1415
+#ifdef USE_RAW_SOCKS
1416
+		/* always try to have a raw socket opened if we are using ipv4 */
1417
+		if (sendipv4) {
1418
+			raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
1419
+			if (raw_udp4_send_sock < 0) {
1420
+				if ( default_core_cfg.udp4_raw > 0) {
1421
+						/* force use raw socket failed */
1422
+						ERR("could not initialize raw udp send socket (ipv4):"
1423
+								" %s (%d)\n", strerror(errno), errno);
1424
+						if (errno == EPERM)
1425
+							ERR("could not initialize raw socket on startup"
1426
+								" due to inadequate permissions, please"
1427
+								" restart as root or with CAP_NET_RAW\n");
1428
+						goto error;
1429
+					}
1430
+					default_core_cfg.udp4_raw = 0; /* disabled */
1431
+			} else {
1432
+				register_fds(1);
1433
+				if (default_core_cfg.udp4_raw < 0) {
1434
+					/* auto-detect => use it */
1435
+					default_core_cfg.udp4_raw = 1; /* enabled */
1436
+					DBG("raw socket possible => turning it on\n");
1437
+				}
1438
+				if (default_core_cfg.udp4_raw_ttl < 0) {
1439
+					/* auto-detect */
1440
+					default_core_cfg.udp4_raw_ttl =
1441
+						sock_get_ttl(sendipv4->socket);
1442
+					if (default_core_cfg.udp4_raw_ttl < 0)
1443
+						/* error, use some default value */
1444
+						default_core_cfg.udp4_raw_ttl = 63;
1445
+				}
1446
+			}
1447
+		}
1448
+#else
1449
+		default_core_cfg.udp4_raw = 0;
1450
+#endif /* USE_RAW_SOCKS */
1366 1451
 #ifdef USE_SCTP
1367 1452
 		if (!sctp_disable){
1368 1453
 			for(si=sctp_listen; si; si=si->next){
... ...
@@ -1428,6 +1513,14 @@ int main_loop()
1428 1513
 			 * sending) so we open all first*/
1429 1514
 		if (do_suid()==-1) goto error; /* try to drop privileges */
1430 1515
 
1516
+		/* delay cfg_shmize to the last moment (it must be called _before_
1517
+		   forking). Changes to default cfgs after this point will be
1518
+		   ignored (cfg_shmize() will copy the default cfgs into shmem).
1519
+		*/
1520
+		if (cfg_shmize() < 0) {
1521
+			LOG(L_CRIT, "could not initialize shared configuration\n");
1522
+			goto error;
1523
+		}
1431 1524
 		/* init childs with rank==PROC_INIT before forking any process,
1432 1525
 		 * this is a place for delayed (after mod_init) initializations
1433 1526
 		 * (e.g. shared vars that depend on the total number of processes
... ...
@@ -2291,11 +2384,6 @@ try_select_again:	tval.tv_usec = 0;
2291 2384
 		goto error;
2292 2385
 	}
2293 2386
 	
2294
-	if (cfg_shmize() < 0) {
2295
-		LOG(L_CRIT, "could not initialize shared configuration\n");
2296
-		goto error;
2297
-	}
2298
-	
2299 2387
 	/* initialize process_table, add core process no. (calc_proc_no()) to the
2300 2388
 	 * processes registered from the modules*/
2301 2389
 	if (init_pt(calc_proc_no())==-1)
2302 2390
new file mode 100644
... ...
@@ -0,0 +1,144 @@
1
+/*
2
+ * Copyright (C) 2010 iptelorg GmbH
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+/** raw socket udp listen functions.
17
+ *  @file raw_listener.c
18
+ *  @ingroup core
19
+ *  Module: @ref core
20
+ */
21
+/*
22
+ * History:
23
+ * --------
24
+ *  2010-06-09  intial version (from older code) andrei
25
+ */
26
+
27
+#ifdef USE_RAW_SOCKS
28
+
29
+
30
+#include "raw_listener.h"
31
+#include "raw_sock.h"
32
+#include "receive.h"
33
+
34
+#include <errno.h>
35
+#include <string.h>
36
+
37
+struct socket_info* raw_udp_sendipv4=0;
38
+
39
+/** creates a raw socket based on a socket_info structure.
40
+ * Side-effects: sets raw_udp_sendipv4 if not already set.
41
+ * @param si - pointer to partially filled socket_info structure (su must
42
+ *              be set).
43
+ * @param iface - pointer to network interface to bind on (str). Can be null.
44
+ * @param iphdr_incl - 1 if send on these socket will include the IP header.
45
+ * @return <0 on error, socket on success.
46
+ */
47
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl)
48
+{
49
+	int sock;
50
+	struct ip_addr ip;
51
+	
52
+	su2ip_addr(&ip, &si->su);
53
+	sock=raw_udp4_socket(&ip, iface, iphdr_incl);
54
+	if (sock>=0){
55
+		if (raw_udp_sendipv4==0 || iface==0 || iface->s==0)
56
+			raw_udp_sendipv4=si;
57
+	}
58
+	return sock;
59
+}
60
+
61
+
62
+
63
+/** receive sip udp ipv4 packets over a raw socket in a loop.
64
+ * It should be called by a "raw socket receiver" process
65
+ * (since the function never exits unless it encounters a
66
+ *  critical error).
67
+ * @param rsock - initialized raw socket.
68
+ * @param port1 - start of port range.
69
+ * @param port2 - end of port range. If 0 it's equivalent to listening only
70
+ *                on port1.
71
+ * @return <0 on error, never returns on success.
72
+ */
73
+int raw_udp4_rcv_loop(int rsock, int port1, int port2)
74
+{
75
+	static char buf[BUF_SIZE+1];
76
+	char* p;
77
+	char* tmp;
78
+	union sockaddr_union from;
79
+	union sockaddr_union to;
80
+	struct receive_info ri;
81
+	struct raw_filter rf;
82
+	int len;
83
+	
84
+	/* this will not change */
85
+	from.sin.sin_family=AF_INET;
86
+	ri.bind_address=0;
87
+	ri.proto=PROTO_UDP;
88
+	ri.proto_reserved1=0;
89
+	ri.proto_reserved2=0;
90
+	/* set filter to match any address but with the specified port range */
91
+	memset(&rf, 0, sizeof(rf));
92
+	rf.dst.ip.af=AF_INET;
93
+	rf.dst.ip.len=4;
94
+	rf.dst.mask.af=AF_INET;
95
+	rf.dst.mask.len=4;
96
+	rf.proto=PROTO_UDP;
97
+	rf.port1=port1;
98
+	rf.port2=port2?port2:port1;
99
+	for(;;){
100
+		p=buf;
101
+		len=raw_udp4_recv(rsock, &p, BUF_SIZE, &from, &to, &rf);
102
+		if (len<0){
103
+			if (len==-1){
104
+				LOG(L_ERR, "ERROR: raw_udp4_rcv_loop:raw_udp4_recv: %s [%d]\n",
105
+						strerror(errno), errno);
106
+				if ((errno==EINTR)||(errno==EWOULDBLOCK))
107
+					continue;
108
+				else
109
+					goto error;
110
+			}else{
111
+				DBG("raw_udp4_rcv_loop: raw_udp4_recv error: %d\n", len);
112
+				continue;
113
+			}
114
+		}
115
+		/* we must 0-term the message */
116
+		p[len]=0;
117
+		ri.src_su=from;
118
+		su2ip_addr(&ri.src_ip, &from);
119
+		ri.src_port=su_getport(&from);
120
+		su2ip_addr(&ri.dst_ip, &to);
121
+		ri.dst_port=su_getport(&to);
122
+		/* sanity checks */
123
+		if (len<MIN_UDP_PACKET){
124
+			tmp=ip_addr2a(&ri.src_ip);
125
+			DBG("raw_udp4_rcv_loop: probing packet received from %s %d\n",
126
+					tmp, htons(ri.src_port));
127
+			continue;
128
+		}
129
+		if (ri.src_port==0){
130
+			tmp=ip_addr2a(&ri.src_ip);
131
+			LOG(L_INFO, "raw_udp4_rcv_loop: dropping 0 port packet from %s\n",
132
+						tmp);
133
+			continue;
134
+		}
135
+		tmp=ip_addr2a(&ri.src_ip);
136
+		DBG("raw_udp4_rcv_loop: received from %s:\n[%.*s]\n", tmp, len, p);
137
+		receive_msg(p, len, &ri);
138
+	}
139
+error:
140
+	return -1;
141
+}
142
+
143
+
144
+#endif /* USE_RAW_SOCKS */
0 145
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+/*
2
+ * Copyright (C) 2010 iptelorg GmbH
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+/** raw socket udp listen functions.
17
+ *  @file raw_listener.h
18
+ *  @ingroup core
19
+ *  Module: @ref core
20
+ */
21
+/*
22
+ * History:
23
+ * --------
24
+ *  2010-06-09  initial version (from older code) andrei
25
+ */
26
+
27
+#ifndef _raw_listener_h
28
+#define _raw_listener_h
29
+
30
+#include "ip_addr.h"
31
+
32
+
33
+/** default raw socket used for sending on udp ipv4 */
34
+struct socket_info* raw_udp_sendipv4;
35
+
36
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl);
37
+int raw_udp4_rcv_loop(int rsock, int port1, int port2);
38
+
39
+#endif /* _raw_listener_h */
0 40
new file mode 100644
... ...
@@ -0,0 +1,696 @@
1
+/* 
2
+ * $Id$
3
+ *
4
+ * Copyright (C) 2010 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+/** raw socket functions.
19
+ *  @file raw_sock.c
20
+ *  @ingroup core
21
+ *  Module: @ref core
22
+ */
23
+/* 
24
+ * History:
25
+ * --------
26
+ *  2010-06-07  initial version (from older code) andrei
27
+ *  2010-06-15  IP_HDRINCL raw socket support, including on-send
28
+ *               fragmentation (andrei)
29
+ */
30
+
31
+#ifdef USE_RAW_SOCKS
32
+
33
+#include "compiler_opt.h"
34
+#include "ip_addr.h"
35
+#include "dprint.h"
36
+#include "str.h"
37
+#include "rand/fastrand.h"
38
+#include "globals.h"
39
+
40
+#include <errno.h>
41
+#include <string.h>
42
+#include <unistd.h>
43
+#include <sys/types.h>
44
+#include <fcntl.h>
45
+#include <sys/socket.h>
46
+#include <netinet/in.h>
47
+#include <netinet/in_systm.h>
48
+#include <arpa/inet.h>
49
+#ifndef __USE_BSD
50
+#define __USE_BSD  /* on linux use bsd version of iphdr (more portable) */
51
+#endif /* __USE_BSD */
52
+#include <netinet/ip.h>
53
+#define __FAVOR_BSD /* on linux use bsd version of udphdr (more portable) */
54
+#include <netinet/udp.h>
55
+
56
+#include "raw_sock.h"
57
+#include "cfg/cfg.h"
58
+#include "cfg_core.h"
59
+
60
+
61
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined(__OS_openbsd) \
62
+	|| defined (__OS_darwin)
63
+/** fragmentation is done by the kernel (no need to do it in userspace) */
64
+#define RAW_IPHDR_INC_AUTO_FRAG
65
+#endif /* __OS_* */
66
+
67
+/* macros for converting values in the expected format */
68
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined (__OS_darwin)
69
+/* on freebsd and netbsd the ip offset (along with flags) and the
70
+   ip header length must be filled in _host_ bytes order format.
71
+   The same is true for openbsd < 2.1.
72
+*/
73
+/** convert the ip offset in the format expected by the kernel. */
74
+#define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
75
+/** convert the ip total length in the format expected by the kernel. */
76
+#define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
77
+
78
+#else /* __OS_* */
79
+/* linux, openbsd >= 2.1 a.s.o. */
80
+/** convert the ip offset in the format expected by the kernel. */
81
+#define RAW_IPHDR_IP_OFF(off)  htons((unsigned short)(off))
82
+/** convert the ip total length in the format expected by the kernel. */
83
+#define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
84
+
85
+#endif /* __OS_* */
86
+
87
+
88
+/** create and return a raw socket.
89
+ * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
90
+ * @param ip - if not null the socket will be bound on this ip.
91
+ * @param iface - if not null the socket will be bound to this interface
92
+ *                (SO_BINDTODEVICE). This is supported only on linux.
93
+ * @param iphdr_incl - set to 1 if packets send on this socket include
94
+ *                     a pre-built ip header (some fields, like the checksum
95
+ *                     will still be filled by the kernel, OTOH packet
96
+ *                     fragmentation has to be done in user space).
97
+ * @return socket on success, -1 on error
98
+ */
99
+int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
100
+{
101
+	int sock;
102
+	int t;
103
+	union sockaddr_union su;
104
+#if defined (SO_BINDTODEVICE)
105
+	char short_ifname[sizeof(int)];
106
+	int ifname_len;
107
+	char* ifname;
108
+#endif /* SO_BINDTODEVICE */
109
+
110
+	sock = socket(PF_INET, SOCK_RAW, proto);
111
+	if (sock==-1)
112
+		goto error;
113
+	/* set socket options */
114
+	if (iphdr_incl) {
115
+		t=1;
116
+		if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &t, sizeof(t))<0){
117
+			ERR("raw_socket: setsockopt(IP_HDRINCL) failed: %s [%d]\n",
118
+					strerror(errno), errno);
119
+			goto error;
120
+		}
121
+	} else {
122
+		/* IP_PKTINFO makes no sense if the ip header is included */
123
+		/* using IP_PKTINFO */
124
+		t=1;
125
+#ifdef IP_PKTINFO
126
+		if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
127
+			ERR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
128
+					strerror(errno), errno);
129
+			goto error;
130
+		}
131
+#elif defined(IP_RECVDSTADDR)
132
+		if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
133
+			ERR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
134
+					strerror(errno), errno);
135
+			goto error;
136
+		}
137
+#else
138
+#error "no method of getting the destination ip address supported"
139
+#endif /* IP_RECVDSTADDR / IP_PKTINFO */
140
+	}
141
+#if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
142
+	t=IP_PMTUDISC_DONT;
143
+	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
144
+		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
145
+				strerror(errno));
146
+		goto error;
147
+	}
148
+#endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
149
+	if (iface && iface->s){
150
+#if defined (SO_BINDTODEVICE)
151
+		/* workaround for linux bug: arg to setsockopt must have at least
152
+		 * sizeof(int) size or EINVAL would be returned */
153
+		if (iface->len<sizeof(int)){
154
+			memcpy(short_ifname, iface->s, iface->len);
155
+			short_ifname[iface->len]=0; /* make sure it's zero term */
156
+			ifname_len=sizeof(short_ifname);
157
+			ifname=short_ifname;
158
+		}else{
159
+			ifname_len=iface->len;
160
+			ifname=iface->s;
161
+		}
162
+		if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, ifname, ifname_len)
163
+						<0){
164
+				ERR("raw_socket: could not bind to %.*s: %s [%d]\n",
165
+							iface->len, ZSW(iface->s), strerror(errno), errno);
166
+				goto error;
167
+		}
168
+#else /* !SO_BINDTODEVICE */
169
+		/* SO_BINDTODEVICE is linux specific => cannot bind to a device */
170
+		ERR("raw_socket: bind to device supported only on linux\n");
171
+		goto error;
172
+#endif /* SO_BINDTODEVICE */
173
+	}
174
+	/* FIXME: probe_max_receive_buffer(sock) missing */
175
+	if (ip){
176
+		init_su(&su, ip, 0);
177
+		if (bind(sock, &su.s, sockaddru_len(su))==-1){
178
+			ERR("raw_socket: bind(%s) failed: %s [%d]\n",
179
+				ip_addr2a(ip), strerror(errno), errno);
180
+			goto error;
181
+		}
182
+	}
183
+	return sock;
184
+error:
185
+	if (sock!=-1) close(sock);
186
+	return -1;
187
+}
188
+
189
+
190
+
191
+/** create and return an udp over ipv4  raw socket.
192
+ * @param ip - if not null the socket will be bound on this ip.
193
+ * @param iface - if not null the socket will be bound to this interface
194
+ *                (SO_BINDTODEVICE).
195
+ * @param iphdr_incl - set to 1 if packets send on this socket include
196
+ *                     a pre-built ip header (some fields, like the checksum
197
+ *                     will still be filled by the kernel, OTOH packet
198
+ *                     fragmentation has to be done in user space).
199
+ * @return socket on success, -1 on error
200
+ */
201
+int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
202
+{
203
+	return raw_socket(IPPROTO_UDP, ip, iface, iphdr_incl);
204
+}
205
+
206
+
207
+
208
+/** receives an ipv4 packet using a raw socket.
209
+ * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
210
+ * from and to are filled (only the ip part the ports are 0 since this
211
+ * function doesn't try to look beyond the IP level).
212
+ * @param sock - raw socket
213
+ * @param buf - detination buffer.
214
+ * @param len - buffer len (should be enough for receiving a packet +
215
+ *               IP header).
216
+ * @param from - result parameter, the IP address part of it will be filled
217
+ *                with the source address and the port with 0.
218
+ * @param to - result parameter, the IP address part of it will be filled
219
+ *                with the destination (local) address and the port with 0.
220
+ * @return packet len or <0 on error: -1 (check errno),
221
+ *        -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
222
+ */
223
+int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
224
+					union sockaddr_union* to)
225
+{
226
+	struct iovec iov[1];
227
+	struct msghdr rcv_msg;
228
+	struct cmsghdr* cmsg;
229
+#ifdef IP_PKTINFO
230
+	struct in_pktinfo* rcv_pktinfo;
231
+#endif /* IP_PKTINFO */
232
+	int n, ret;
233
+	char msg_ctrl_buf[1024];
234
+
235
+	iov[0].iov_base=buf;
236
+	iov[0].iov_len=len;
237
+	rcv_msg.msg_name=from;
238
+	rcv_msg.msg_namelen=sockaddru_len(*from);
239
+	rcv_msg.msg_control=msg_ctrl_buf;
240
+	rcv_msg.msg_controllen=sizeof(msg_ctrl_buf);
241
+	rcv_msg.msg_iov=&iov[0];
242
+	rcv_msg.msg_iovlen=1;
243
+	ret=-2; /* no PKT_INFO or AF mismatch */
244
+retry:
245
+	n=recvmsg(sock, &rcv_msg, MSG_WAITALL);
246
+	if (unlikely(n==-1)){
247
+		if (errno==EINTR)
248
+			goto retry;
249
+		ret=n;
250
+		goto end;
251
+	}
252
+	/* find the pkt info */
253
+	for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg; cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
254
+#ifdef IP_PKTINFO
255
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
256
+					(cmsg->cmsg_type==IP_PKTINFO))) {
257
+			rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
258
+			to->sin.sin_family=AF_INET;
259
+			memcpy(&to->sin.sin_addr, &rcv_pktinfo->ipi_spec_dst.s_addr, 
260
+									sizeof(to->sin.sin_addr));
261
+			to->sin.sin_port=0; /* not known */
262
+			/* interface no. in ipi_ifindex */
263
+			ret=n; /* success */
264
+			break;
265
+		}
266
+#elif defined (IP_RECVDSTADDR)
267
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
268
+					(cmsg->cmsg_type==IP_RECVDSTADDR))) {
269
+			to->sin.sin_family=AF_INET;
270
+			memcpy(&to->sin.sin_addr, CMSG_DATA(cmsg),
271
+									sizeof(to->sin.sin_addr));
272
+			to->sin.sin_port=0; /* not known */
273
+			ret=n; /* success */
274
+			break;
275
+		}
276
+#else
277
+#error "no method of getting the destination ip address supported"
278
+#endif /* IP_PKTINFO / IP_RECVDSTADDR */
279
+	}
280
+end:
281
+	return ret;
282
+}
283
+
284
+
285
+
286
+/* receive an ipv4 udp packet over a raw socket.
287
+ * The packet is copied in *buf and *buf is advanced to point to the
288
+ * payload.  Fills from and to.
289
+ * @param rsock - raw socket
290
+ * @param buf - the packet will be written to where *buf points intially and
291
+ *              then *buf will be advanced to point to the udp payload.
292
+ * @param len - buffer length (should be enough to hold at least the
293
+ *               ip and udp headers + 1 byte).
294
+ * @param from - result parameter, filled with source address and port of the
295
+ *               packet.
296
+ * @param from - result parameter, filled with destination (local) address and
297
+ *               port of the packet.
298
+ * @param rf   - filter used to decide whether or not the packet is
299
+ *                accepted/processed. If null, all the packets are accepted.
300
+ * @return packet len or  <0 on error (-1 and -2 on recv error @see recvpkt4,
301
+ *         -3 if the headers are invalid and -4 if the packet doesn't
302
+ *         match the  filter).
303
+ */
304
+int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
305
+					union sockaddr_union* to, struct raw_filter* rf)
306
+{
307
+	int n;
308
+	unsigned short dst_port;
309
+	unsigned short src_port;
310
+	struct ip_addr dst_ip;
311
+	char* end;
312
+	char* udph_start;
313
+	char* udp_payload;
314
+	struct ip iph;
315
+	struct udphdr udph;
316
+	unsigned short udp_len;
317
+
318
+	n=recvpkt4(rsock, *buf, len, from, to);
319
+	if (unlikely(n<0)) goto error;
320
+	
321
+	end=*buf+n;
322
+	if (unlikely(n<(sizeof(struct ip)+sizeof(struct udphdr)))) {
323
+		n=-3;
324
+		goto error;
325
+	}
326
+	/* FIXME: if initial buffer is aligned, one could skip the memcpy
327
+	   and directly cast ip and udphdr pointer to the memory */
328
+	memcpy(&iph, *buf, sizeof(struct ip));
329
+	udph_start=*buf+iph.ip_hl*4;
330
+	udp_payload=udph_start+sizeof(struct udphdr);
331
+	if (unlikely(udp_payload>end)){
332
+		n=-3;
333
+		goto error;
334
+	}
335
+	memcpy(&udph, udph_start, sizeof(struct udphdr));
336
+	udp_len=ntohs(udph.uh_ulen);
337
+	if (unlikely((udph_start+udp_len)!=end)){
338
+		if ((udph_start+udp_len)>end){
339
+			n=-3;
340
+			goto error;
341
+		}else{
342
+			ERR("udp length too small: %d/%d\n",
343
+					(int)udp_len, (int)(end-udph_start));
344
+			n=-3;
345
+			goto error;
346
+		}
347
+	}
348
+	/* advance buf */
349
+	*buf=udp_payload;
350
+	n=(int)(end-*buf);
351
+	/* fill ip from the packet (needed if no PKT_INFO is used) */
352
+	dst_ip.af=AF_INET;
353
+	dst_ip.len=4;
354
+	dst_ip.u.addr32[0]=iph.ip_dst.s_addr;
355
+	/* fill dst_port */
356
+	dst_port=ntohs(udph.uh_dport);
357
+	ip_addr2su(to, &dst_ip, dst_port);
358
+	/* fill src_port */
359
+	src_port=ntohs(udph.uh_sport);
360
+	su_setport(from, src_port);
361
+	if (likely(rf)) {
362
+		su2ip_addr(&dst_ip, to);
363
+		if ( (dst_port && rf->port1 && ((dst_port<rf->port1) ||
364
+										(dst_port>rf->port2)) ) ||
365
+			(matchnet(&dst_ip, &rf->dst)!=1) ){
366
+			/* no match */
367
+			n=-4;
368
+			goto error;
369
+		}
370
+	}
371
+	
372
+error:
373
+	return n;
374
+}
375
+
376
+
377
+
378
+/** udp checksum helper: compute the pseudo-header 16-bit "sum".
379
+ * Computes the partial checksum (no complement) of the pseudo-header.
380
+ * It is meant to be used by udpv4_chksum().
381
+ * @param uh - filled udp header
382
+ * @param src - source ip address in network byte order.
383
+ * @param dst - destination ip address in network byte order.
384
+ * @param length - payload length (not including the udp header),
385
+ *                 in _host_ order.
386
+ * @return the partial checksum in host order
387
+ */
388
+inline unsigned short udpv4_vhdr_sum(	struct udphdr* uh,
389
+										struct in_addr* src,
390
+										struct in_addr* dst,
391
+										unsigned short length)
392
+{
393
+	unsigned sum;
394
+	
395
+	/* pseudo header */
396
+	sum=(src->s_addr>>16)+(src->s_addr&0xffff)+
397
+		(dst->s_addr>>16)+(dst->s_addr&0xffff)+
398
+		htons(IPPROTO_UDP)+(uh->uh_ulen);
399
+	/* udp header */
400
+	sum+=(uh->uh_dport)+(uh->uh_sport)+(uh->uh_ulen) + 0 /*chksum*/; 
401
+	/* fold it */
402
+	sum=(sum>>16)+(sum&0xffff);
403
+	sum+=(sum>>16);
404
+	/* no complement */
405
+	return ntohs((unsigned short) sum);
406
+}
407
+
408
+
409
+
410
+/** compute the udp over ipv4 checksum.
411
+ * @param u - filled udp header (except checksum).
412
+ * @param src - source ip v4 address, in _network_ byte order.
413
+ * @param dst - destination ip v4 address, int _network_ byte order.
414
+ * @param data - pointer to the udp payload.
415
+ * @param length - payload length, not including the udp header and in
416
+ *                 _host_ order. The length mist be <= 0xffff - 8
417
+ *                 (to allow space for the udp header).
418
+ * @return the checksum in _host_ order */
419
+inline static unsigned short udpv4_chksum(struct udphdr* u,
420
+							struct in_addr* src, struct in_addr* dst,
421
+							unsigned char* data, unsigned short length)
422
+{
423
+	unsigned sum;
424
+	unsigned char* end;
425
+	sum=udpv4_vhdr_sum(u, src, dst, length);
426
+	end=data+(length&(~0x1)); /* make sure it's even */
427
+	/* TODO: 16 & 32 bit aligned version */
428
+		/* not aligned */
429
+		for(;data<end;data+=2){
430
+			sum+=((data[0]<<8)+data[1]);
431
+		}
432
+		if (length&0x1)
433
+			sum+=((*data)<<8);
434
+	
435
+	/* fold it */
436
+	sum=(sum>>16)+(sum&0xffff);
437
+	sum+=(sum>>16);
438
+	return (unsigned short)~sum;
439
+}
440
+
441
+
442
+
443
+/** fill in an udp header.
444
+ * @param u - udp header that will be filled.
445
+ * @param from - source ip v4 address and port.
446
+ * @param to -   destination ip v4 address and port.
447
+ * @param buf - pointer to the payload.
448
+ * @param len - payload length (not including the udp header).
449
+ * @param do_chk - if set the udp checksum will be computed, else it will
450
+ *                 be set to 0.
451
+ * @return 0 on success, < 0 on error.
452
+ */
453
+inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from, 
454
+				struct sockaddr_in* to, unsigned char* buf, int len,
455
+					int do_chk)
456
+{
457
+	u->uh_ulen=htons((unsigned short)len+sizeof(struct udphdr));
458
+	u->uh_sport=from->sin_port;
459
+	u->uh_dport=to->sin_port;
460
+	if (do_chk)
461
+		u->uh_sum=htons(
462
+				udpv4_chksum(u, &from->sin_addr, &to->sin_addr,  buf, len));
463
+	else
464
+		u->uh_sum=0; /* no checksum */
465
+	return 0;
466
+}
467
+
468
+
469
+
470
+/** fill in an ip header.
471
+ * Note: the checksum is _not_ computed.
472
+ * WARNING: The ip header length and offset might be filled in
473
+ * _host_ byte order or network byte order (depending on the OS, for example
474
+ *  freebsd needs host byte order for raw sockets with IPHDR_INC, while
475
+ *  linux needs network byte order).
476
+ * @param iph - ip header that will be filled.
477
+ * @param from - source ip v4 address (network byte order).
478
+ * @param to -   destination ip v4 address (network byte order).
479
+ * @param payload len - payload length (not including the ip header).
480
+ * @param proto - protocol.
481
+ * @return 0 on success, < 0 on error.
482
+ */
483
+inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
484
+				struct in_addr* to, int payload_len, unsigned char proto)
485
+{
486
+	iph->ip_hl = sizeof(struct ip)/4;
487
+	iph->ip_v = 4;
488
+	iph->ip_tos = tos;
489
+	/* on freebsd ip_len _must_ be in _host_ byte order instead
490
+	   of network byte order. On linux the length is ignored (it's filled
491
+	   automatically every time). */
492
+	iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
493
+	iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
494
+	iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
495
+	iph->ip_ttl = cfg_get(core, core_cfg, udp4_raw_ttl);
496
+	iph->ip_p = proto;
497
+	iph->ip_src = *from;
498
+	iph->ip_dst = *to;
499
+	iph->ip_sum = 0;
500
+
501
+	return 0;
502
+}
503
+
504
+
505
+
506
+/** send an udp packet over a non-ip_hdrincl raw socket.
507
+ * @param rsock - raw socket
508
+ * @param buf - data
509
+ * @param len - data len
510
+ * @param from - source address:port (_must_ be non-null, but the ip address
511
+ *                can be 0, in which case it will be filled by the kernel).
512
+ * @param to - destination address:port
513
+ * @return  <0 on error (errno set too), number of bytes sent on success
514
+ *          (including the udp header => on success len + udpheader size).
515
+ */
516
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
517
+					union sockaddr_union* from,
518
+					union sockaddr_union* to)
519
+{
520
+	struct msghdr snd_msg;
521
+	struct cmsghdr* cmsg;
522
+#ifdef IP_PKTINFO
523
+	struct in_pktinfo* snd_pktinfo;
524
+#endif /* IP_PKTINFO */
525
+	struct iovec iov[2];
526
+	struct udphdr udp_hdr;
527
+	char msg_ctrl_snd_buf[1024];
528
+	int ret;
529
+
530
+	memset(&snd_msg, 0, sizeof(snd_msg));
531
+	snd_msg.msg_name=&to->sin;
532
+	snd_msg.msg_namelen=sockaddru_len(*to);
533
+	snd_msg.msg_iov=&iov[0];
534
+	/* prepare udp header */
535
+	mk_udp_hdr(&udp_hdr, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
536
+	iov[0].iov_base=(char*)&udp_hdr;
537
+	iov[0].iov_len=sizeof(udp_hdr);
538
+	iov[1].iov_base=buf;
539
+	iov[1].iov_len=len;
540
+	snd_msg.msg_iovlen=2;
541
+	snd_msg.msg_control=msg_ctrl_snd_buf;
542
+	snd_msg.msg_controllen=sizeof(msg_ctrl_snd_buf);
543
+	/* init pktinfo cmsg */
544
+	cmsg=CMSG_FIRSTHDR(&snd_msg);
545
+	cmsg->cmsg_level=IPPROTO_IP;
546
+#ifdef IP_PKTINFO
547
+	cmsg->cmsg_type=IP_PKTINFO;
548
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
549
+	snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
550
+	snd_pktinfo->ipi_ifindex=0;
551
+	snd_pktinfo->ipi_spec_dst.s_addr=from->sin.sin_addr.s_addr;
552
+#elif defined (IP_SENDSRCADDR)
553
+	cmsg->cmsg_type=IP_SENDSRCADDR;
554
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
555
+	memcpy(CMSG_DATA(cmsg), &from->sin.sin_addr.s_addr,
556
+							sizeof(struct in_addr));
557
+#else
558
+#error "no method of setting the source ip supported"
559
+#endif /* IP_PKTINFO / IP_SENDSRCADDR */
560
+	snd_msg.msg_controllen=cmsg->cmsg_len;
561
+	snd_msg.msg_flags=0;
562
+	ret=sendmsg(rsock, &snd_msg, 0);
563
+	return ret;
564
+}
565
+
566
+
567
+
568
+/** send an udp packet over an IP_HDRINCL raw socket.
569
+ * If needed, send several fragments.
570
+ * @param rsock - raw socket
571
+ * @param buf - data
572
+ * @param len - data len
573
+ * @param from - source address:port (_must_ be non-null, but the ip address
574
+ *                can be 0, in which case it will be filled by the kernel).
575
+ * @param to - destination address:port
576
+ * @param mtu - maximum datagram size (including the ip header, excluding
577
+ *              link layer headers). Minimum allowed size is 28
578
+ *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
579
+ *               be ignored (the packet will be sent un-fragmented).
580
+ *              0 can be used to disable fragmentation.
581
+ * @return  <0 on error (-2: datagram too big, -1: check errno),
582
+ *          number of bytes sent on success
583
+ *          (including the ip & udp headers =>
584
+ *               on success len + udpheader + ipheader size).
585
+ */
586
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
587
+						union sockaddr_union* from,
588
+						union sockaddr_union* to, unsigned short mtu)
589
+{
590
+	struct msghdr snd_msg;
591
+	struct iovec iov[2];
592
+	struct ip_udp_hdr {
593
+		struct ip ip;
594
+		struct udphdr udp;
595
+	} hdr;
596
+	unsigned int totlen;
597
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
598
+	unsigned int ip_frag_size; /* fragment size */
599
+	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
600
+	unsigned int ip_payload;
601
+	unsigned int last_frag_offs;
602
+	void* last_frag_start;
603
+	int frg_no;
604
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
605
+	int ret;
606
+
607
+	totlen = len + sizeof(hdr);
608
+	if (unlikely(totlen) > 65535)
609
+		return -2;
610
+	memset(&snd_msg, 0, sizeof(snd_msg));
611
+	snd_msg.msg_name=&to->sin;
612
+	snd_msg.msg_namelen=sockaddru_len(*to);
613
+	snd_msg.msg_iov=&iov[0];
614
+	/* prepare the udp & ip headers */
615
+	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
616
+	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
617
+				len + sizeof(hdr.udp), IPPROTO_UDP);
618
+	iov[0].iov_base=(char*)&hdr;
619
+	iov[0].iov_len=sizeof(hdr);
620
+	snd_msg.msg_iovlen=2;
621
+	snd_msg.msg_control=0;
622
+	snd_msg.msg_controllen=0;
623
+	snd_msg.msg_flags=0;
624
+	/* this part changes for different fragments */
625
+	/* packets are fragmented if mtu has a valid value (at least an
626
+	   IP header + UDP header fit in it) and if the total length is greater
627
+	   then the mtu */
628
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
629
+	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
630
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
631
+		iov[1].iov_base=buf;
632
+		iov[1].iov_len=len;
633
+		ret=sendmsg(rsock, &snd_msg, 0);
634
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
635
+	} else {
636
+		ip_payload = len + sizeof(hdr.udp);
637
+		/* a fragment offset must be a multiple of 8 => its size must
638
+		   also be a multiple of 8, except for the last fragment */
639
+		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
640
+		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
641
+		frg_no = ip_payload / ip_frag_size +
642
+				 ((ip_payload % ip_frag_size) > last_frag_extra);
643
+		/*ip_last_frag_size = ip_payload % frag_size +
644
+							((ip_payload % frag_size) <= last_frag_extra) *
645
+							ip_frag_size; */
646
+		last_frag_offs = (frg_no - 1) * ip_frag_size;
647
+		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
648
+		   => last_frag_offs >= sizeof(hdr.udp) */
649
+		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
650
+		hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0
651
+											  (if 0 the kernel will fill it) */
652
+		/* send the first fragment */
653
+		iov[1].iov_base=buf;
654
+		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
655
+		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
656
+		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
657
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
658
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
659
+		ret=sendmsg(rsock, &snd_msg, 0);
660
+		if (unlikely(ret < 0))
661
+			goto end;
662
+		/* all the other fragments, include only the ip header */
663
+		iov[0].iov_len = sizeof(hdr.ip);
664
+		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
665
+		/* fragments between the first and the last */
666
+		while(unlikely(iov[1].iov_base < last_frag_start)) {
667
+			iov[1].iov_len = ip_frag_size;
668
+			hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
669
+			/* set MF  */
670
+			hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
671
+									(((char*)iov[1].iov_base - (char*)buf +
672
+										sizeof(hdr.udp)) / 8) | 0x2000 );
673
+			ret=sendmsg(rsock, &snd_msg, 0);
674
+			if (unlikely(ret < 0))
675
+				goto end;
676
+			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
677
+		}
678
+		/* last fragment */
679
+		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
680
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
681
+		/* don't set MF (last fragment) */
682
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
683
+									(((char*)iov[1].iov_base - (char*)buf +
684
+										sizeof(hdr.udp)) / 8) );
685
+		ret=sendmsg(rsock, &snd_msg, 0);
686
+		if (unlikely(ret < 0))
687
+			goto end;
688
+	}
689
+end:
690
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
691
+	return ret;
692
+}
693
+
694
+
695
+
696
+#endif /* USE_RAW_SOCKS */
0 697
new file mode 100644
... ...
@@ -0,0 +1,56 @@
1
+/*
2
+ * $Id$
3
+ *
4
+ * Copyright (C) 2010 iptelorg GmbH
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above