Browse code

Merge remote branch 'origin/andrei/raw_sock'

Raw socket support for sending UDP IPv4 packets
(major performance increase on multi-cpu machines running linux:
40-50% faster at least in stateless mode).

* origin/andrei/raw_sock:
NEWS: notes about the new udp4_raw mode
raw sockets: added info rpc
core: compile raw socket support by default on freebsd
raw sockets: freebsd support
cfg: delay cfg_shmize to just before forking
core: always compile the raw sockets code on linux
raw sockets: ttl can be set from the config file
raw sockets: ttl can be set or auto-detected
core: include raw socket support in version info
raw sockets: use BSD ip & udp structure versions
raw sockets: config file support
raw sockets: udp send will use now raw sockets if enabled
raw sockets: runtime config support
raw sockets: build ip header & fragmentation support
raw socket: compilation fixes
raw sockets: get dst. ip from the ip header
core: basic support for receiving udp sip packets on raw sockets
core: basic raw socket support functions

Andrei Pelinescu-Onciul authored on 11/08/2010 20:49:04
Showing 17 changed files
... ...
@@ -1649,7 +1649,7 @@ ifeq ($(OS), linux)
1649 1649
 	use_futex= yes
1650 1650
 	C_DEFS+=-DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN -DHAVE_SCHED_YIELD \
1651 1651
 			-DHAVE_MSG_NOSIGNAL -DHAVE_MSGHDR_MSG_CONTROL -DHAVE_ALLOCA_H \
1652
-			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER
1652
+			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER -DUSE_RAW_SOCKS
1653 1653
 	ifneq ($(found_lock_method), yes)
1654 1654
 		#C_DEFS+= -DUSE_POSIX_SEM
1655 1655
 		C_DEFS+=-DUSE_PTHREAD_MUTEX
... ...
@@ -1768,7 +1768,7 @@ ifeq ($(OS), freebsd)
1768 1768
 	C_DEFS+=-DHAVE_SOCKADDR_SA_LEN -DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN \
1769 1769
 		-DHAVE_SCHED_YIELD -DHAVE_MSGHDR_MSG_CONTROL \
1770 1770
 		-DHAVE_CONNECT_ECONNRESET_BUG -DHAVE_TIMEGM \
1771
-		-DHAVE_NETINET_IN_SYSTM
1771
+		-DHAVE_NETINET_IN_SYSTM -DUSE_RAW_SOCKS
1772 1772
 	ifneq ($(found_lock_method), yes)
1773 1773
 		C_DEFS+= -DUSE_PTHREAD_MUTEX  # try pthread sems
1774 1774
 		found_lock_method=yes
... ...
@@ -21,6 +21,19 @@ core:
21 21
             t_set_fr($foo) (equivalent now with t_set_fr("$foo")).
22 22
   - all the module functions can now be called with any constant expression
23 23
       as parameters. E.g.: f("7 *" +" 6 = " + 7 * 6);
24
+  - major performance increase on Linux multi-cpu machines that send a lot
25
+     of UDP IPv4 packets (40-50% faster in stateless mode). For it to work
26
+     udp4_raw must be enabled or set into auto mode in sr.cfg and sr must be
27
+     started as root or with CAP_NET_RAW. Note that even if udp4_raw is
28
+     off (default), if sr was started with enough privileges, it can be
29
+     enabled at runtime.
30
+     The support for using raw sockets is also available on FreeBSD (compiled
31
+     by default but not tested for performance yet), NetBSD, OpenBSD and
32
+     Darwin (not tested and not compiled by default, needs
33
+     make cfg extra_defs=-DUSE_RAW_SOCKS). To check if the support is
34
+     compiled, use ser -V |grep --color RAW_SOCKS or for a running
35
+     ser: sercmd core.udp4_raw_info.
36
+     See udp4_raw, udp4_raw_mtu and udp4_raw_ttl below.
24 37
   - onreply_route {...} is now equivalent with onreply_route[0] {...}
25 38
   - global, per protocol blacklist ignore masks (via extended send_flags).
26 39
     See dst_blacklist_udp_imask a.s.o (dst_blacklist_*_imask).
... ...
@@ -29,6 +42,28 @@ core:
29 29
   - support for permanent entries in the DNS cache.
30 30
 
31 31
 new config variables:
32
+  - udp4_raw - enables raw socket support for sending UDP IPv4 datagrams 
33
+      (40-50% performance increase on linux multi-cpu).
34
+      Possible values: 0 - disabled (default), 1 - enabled, -1 auto.
35
+      In "auto" mode it will be enabled if possible (sr started as root or
36
+      with CAP_NET_RAW).
37
+      udp4_raw can be used on Linux and FreeBSD. For other BSDs and Darwin
38
+      one must compile with -DUSE_RAW_SOCKS.
39
+      On Linux one should also set udp4_raw_mtu if the MTU on any network
40
+      interface that could be used for sending is smaller then 1500.
41
+      Can be set at runtime as long as sr was started with enough privileges
42
+      (core.udp4_raw).
43
+  - udp4_raw_mtu - MTU value used for UDP IPv4 packets when udp4_raw is
44
+      enabled.  It should be set to the minimum MTU of all the network
45
+      interfaces that could be used for sending. The default value is 1500.
46
+      Note that on BSDs it does not need to be set (if set it will be ignored,
47
+      the proper MTU will be used automatically by the kernel). On Linux it
48
+      should be set.
49
+      Can be set at runtime (core.udp4_raw_mtu).
50
+  - udp4_raw_ttl - TTL value used for UDP IPv4 packets when udp4_raw is
51
+      enabled. By default it is set to auto mode (-1), meaning that the
52
+      same TTL will be used as for normal UDP sockets.
53
+      Can be set at runtime (core.udp4_raw_ttl).
32 54
   - dst_blacklist_udp_imask - global blacklist events ignore mask for udp
33 55
     (a blacklist event/reason set in this variable will be ignored when 
34 56
     deciding whether or not to blacklist an udp destination). Can be set
... ...
@@ -211,6 +211,9 @@ ADD_LOCAL_RPORT		"add_local_rport"
211 211
 FORCE_TCP_ALIAS		"force_tcp_alias"|"add_tcp_alias"
212 212
 UDP_MTU		"udp_mtu"
213 213
 UDP_MTU_TRY_PROTO	"udp_mtu_try_proto"
214
+UDP4_RAW		"udp4_raw"
215
+UDP4_RAW_MTU	"udp4_raw_mtu"
216
+UDP4_RAW_TTL	"udp4_raw_ttl"
214 217
 SETFLAG		setflag
215 218
 RESETFLAG	resetflag
216 219
 ISFLAGSET	isflagset
... ...
@@ -605,6 +608,9 @@ SUBST       subst
605 605
 <INITIAL>{UDP_MTU}	{ count(); yylval.strval=yytext; return UDP_MTU; }
606 606
 <INITIAL>{UDP_MTU_TRY_PROTO}	{ count(); yylval.strval=yytext;
607 607
 									return UDP_MTU_TRY_PROTO; }
608
+<INITIAL>{UDP4_RAW}	{ count(); yylval.strval=yytext; return UDP4_RAW; }
609
+<INITIAL>{UDP4_RAW_MTU}	{ count(); yylval.strval=yytext; return UDP4_RAW_MTU; }
610
+<INITIAL>{UDP4_RAW_TTL}	{ count(); yylval.strval=yytext; return UDP4_RAW_TTL; }
608 611
 <INITIAL>{IF}	{ count(); yylval.strval=yytext; return IF; }
609 612
 <INITIAL>{ELSE}	{ count(); yylval.strval=yytext; return ELSE; }
610 613
 
... ...
@@ -200,6 +200,12 @@
200 200
 	#define IF_SCTP(x) warn("sctp support not compiled in")
201 201
 #endif
202 202
 
203
+#ifdef USE_RAW_SOCKS
204
+	#define IF_RAW_SOCKS(x) x
205
+#else
206
+	#define IF_RAW_SOCKS(x) warn("raw socket support not compiled in")
207
+#endif
208
+
203 209
 
204 210
 extern int yylex();
205 211
 /* safer then using yytext which can be array or pointer */
... ...
@@ -325,6 +331,9 @@ extern char *finame;
325 325
 %token FORCE_TCP_ALIAS
326 326
 %token UDP_MTU
327 327
 %token UDP_MTU_TRY_PROTO
328
+%token UDP4_RAW
329
+%token UDP4_RAW_MTU
330
+%token UDP4_RAW_TTL
328 331
 %token IF
329 332
 %token ELSE
330 333
 %token SET_ADV_ADDRESS
... ...
@@ -1581,6 +1590,16 @@ assign_stm:
1581 1581
 		{ default_core_cfg.udp_mtu_try_proto=$3; fix_global_req_flags(0, 0); }
1582 1582
 	| UDP_MTU_TRY_PROTO EQUAL error
1583 1583
 		{ yyerror("TCP, TLS, SCTP or UDP expected"); }
1584
+	| UDP4_RAW EQUAL intno { IF_RAW_SOCKS(default_core_cfg.udp4_raw=$3); }
1585
+	| UDP4_RAW EQUAL error { yyerror("number expected"); }
1586
+	| UDP4_RAW_MTU EQUAL NUMBER {
1587
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_mtu=$3);
1588
+	}
1589
+	| UDP4_RAW_MTU EQUAL error { yyerror("number expected"); }
1590
+	| UDP4_RAW_TTL EQUAL NUMBER {
1591
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_ttl=$3);
1592
+	}
1593
+	| UDP4_RAW_TTL EQUAL error { yyerror("number expected"); }
1584 1594
 	| cfg_var
1585 1595
 	| error EQUAL { yyerror("unknown config variable"); }
1586 1596
 	;
... ...
@@ -24,16 +24,15 @@
24 24
  *  2007-12-03	Initial version (Miklos)
25 25
  *  2008-01-31  added DNS resolver parameters (Miklos)
26 26
  */
27
-/*!
28
- * \file
29
- * \brief SIP-router core ::  Core configuration parser
30
- * \ingroup core
31
- * Module: \ref core
27
+/** core runtime config.
28
+ * @file cfg_core.c
29
+ * @ingroup core
30
+ * Module: @ref core
32 31
  *
33
- * See 
34
- * - \ref ConfigCoreDoc
35
- * - \ref ConfigEngine
36
- * - \ref cfg_core.h
32
+ * See
33
+ * - @ref ConfigCoreDoc
34
+ * - @ref ConfigEngine
35
+ * - @ref cfg_core.h
37 36
  */
38 37
 /*!
39 38
  * \page ConfigCoreDoc Documentation of configuration parser
... ...
@@ -57,6 +56,8 @@
57 57
 #include "pt.h"
58 58
 #endif
59 59
 #include "msg_translator.h" /* fix_global_req_flags() */
60
+#include "globals.h"
61
+#include "sock_ut.h"
60 62
 #include "cfg/cfg.h"
61 63
 #include "cfg_core.h"
62 64
 
... ...
@@ -113,6 +114,9 @@ struct cfg_group_core default_core_cfg = {
113 113
 	DEFAULT_MAX_WHILE_LOOPS, /*!< max_while_loops */
114 114
 	0, /*!< udp_mtu (disabled by default) */
115 115
 	0, /*!< udp_mtu_try_proto -> default disabled */
116
+	0, /**< udp4_raw (disabled by default) */
117
+	1500, /**< udp4_raw_mtu (1500 by default) */
118
+	-1,  /**< udp4_raw_ttl (auto detect by default) */
116 119
 	0,  /*!< force_rport */
117 120
 	L_DBG, /*!< memlog */
118 121
 	3 /*!< mem_summary -flags: 0 off, 1 pkg_status, 2 shm_status,
... ...
@@ -121,6 +125,57 @@ struct cfg_group_core default_core_cfg = {
121 121
 
122 122
 void	*core_cfg = &default_core_cfg;
123 123
 
124
+
125
+static int check_raw_sock_support(void* cfg_h, str* gname, str* name,
126
+									void** v)
127
+{
128
+	int val;
129
+	
130
+	val = (int)(long)(*v);
131
+#ifndef USE_RAW_SOCKS
132
+	if (val > 0) {
133
+		ERR("no RAW_SOCKS support, please recompile with it enabled\n");
134
+		return -1;
135
+	}
136
+	return 0;
137
+#else /* USE_RAW_SOCKS */
138
+	if (raw_udp4_send_sock < 0) {
139
+		if (val > 0) {
140
+			ERR("could not intialize raw socket on startup, please "
141
+					"restart as root or with CAP_NET_RAW\n");
142
+			return -1;
143
+		} else if (val < 0) {
144
+			/* auto and no socket => disable */
145
+			*v = (void*)(long)0;
146
+		}
147
+	} else if (val < 0) {
148
+		/* auto and socket => enable */
149
+		*v = (void*)(long)1;
150
+	}
151
+	return 0;
152
+#endif /* USE_RAW_SOCKS */
153
+}
154
+
155
+
156
+
157
+static int  udp4_raw_ttl_fixup(void* cfg_h, str* gname, str* name, void** val)
158
+{
159
+	int v;
160
+	v = (int)(long)(*val);
161
+	if (v < 0) {
162
+		if (sendipv4)
163
+			v = sock_get_ttl(sendipv4->socket);
164
+	}
165
+	if (v < 0) {
166
+		/* some error => use a reasonable default */
167
+		v = 63;
168
+	}
169
+	*val = (void*)(long)v;
170
+	return 0;
171
+}
172
+
173
+
174
+
124 175
 cfg_def_t core_cfg_def[] = {
125 176
 	{"debug",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
126 177
 		"debug level"},
... ...
@@ -179,7 +234,8 @@ cfg_def_t core_cfg_def[] = {
179 179
 	{"dns_search_full_match",	CFG_VAR_INT,	0, 1, 0, 0,
180 180
 		"enable/disable domain name checks against the search list "
181 181
 		"in DNS answers"},
182
-	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup, resolv_reinit,
182
+	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup,
183
+		resolv_reinit,
183 184
 		"set to 1 in order to reinitialize the DNS resolver"},
184 185
 	/* DNS cache */
185 186
 #ifdef USE_DNS_CACHE
... ...
@@ -230,6 +286,16 @@ cfg_def_t core_cfg_def[] = {
230 230
 			" exceeds udp_mtu"},
231 231
 	{"udp_mtu_try_proto", CFG_VAR_INT, 1, 4, 0, fix_global_req_flags,
232 232
 		"if send size > udp_mtu use proto (1 udp, 2 tcp, 3 tls, 4 sctp)"},
233
+	{"udp4_raw", CFG_VAR_INT | CFG_ATOMIC, -1, 1, check_raw_sock_support, 0,
234
+		"enable/disable using a raw socket for sending UDP IPV4 packets."
235
+		" Should be  faster on multi-CPU linux running machines."},
236
+	{"udp4_raw_mtu", CFG_VAR_INT | CFG_ATOMIC, 28, 65535, 0, 0,
237
+		"set the MTU used when using raw sockets for udp sending."
238
+		" This  value will be used when deciding whether or not to fragment"
239
+		" the packets."},
240
+	{"udp4_raw_ttl", CFG_VAR_INT | CFG_ATOMIC, -1, 255, udp4_raw_ttl_fixup, 0,
241
+		"set the IP TTL used when using raw sockets for udp sending."
242
+		" -1 will use the same value as for normal udp sockets."},
233 243
 	{"force_rport",     CFG_VAR_INT, 0, 1,  0, fix_global_req_flags,
234 244
 		"force rport for all the received messages" },
235 245
 	{"memlog",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
... ...
@@ -36,12 +36,11 @@
36 36
  * -------
37 37
  *  2007-12-03	Initial version (Miklos)
38 38
  */
39
-/*!
40
- * \file
41
- * \brief SIP-router core :: Core configuration
42
- * \ingroup core
39
+/** core runtime config.
40
+ * @file cfg_core.h
41
+ * @ingroup core
43 42
  *
44
- * Module: \ref core
43
+ * Module: @ref core
45 44
  */
46 45
 
47 46
 
... ...
@@ -103,6 +102,9 @@ struct cfg_group_core {
103 103
 	int max_while_loops;
104 104
 	int udp_mtu; /*!< maximum send size for udp, if > try another protocol*/
105 105
 	int udp_mtu_try_proto; /*!< if packet> udp_mtu, try proto (e.g. TCP) */
106
+	int udp4_raw; /* use raw sockets for sending on udp ipv 4 */
107
+	int udp4_raw_mtu; /* mtu used when using udp raw socket */
108
+	int udp4_raw_ttl; /* ttl used when using udp raw sockets */
106 109
 	int force_rport; /*!< if set rport will always be forced*/
107 110
 	int memlog; /*!< log level for memory status/summary info */
108 111
 	int mem_summary; /*!< display memory status/summary info on exit */
... ...
@@ -42,6 +42,7 @@
42 42
 #include "tcp_info.h"
43 43
 #include "tcp_options.h"
44 44
 #include "core_cmd.h"
45
+#include "cfg_core.h"
45 46
 #ifdef USE_SCTP
46 47
 #include "sctp_options.h"
47 48
 #include "sctp_server.h"
... ...
@@ -843,6 +844,30 @@ static void core_sctpinfo(rpc_t* rpc, void* c)
843 843
 
844 844
 
845 845
 
846
+
847
+static const char* core_udp4rawinfo_doc[] = {
848
+	"Returns udp4_raw related info.",    /* Documentation string */
849
+	0                                     /* Method signature(s) */
850
+};
851
+
852
+static void core_udp4rawinfo(rpc_t* rpc, void* c)
853
+{
854
+#ifdef USE_RAW_SOCKS
855
+	void *handle;
856
+
857
+	rpc->add(c, "{", &handle);
858
+	rpc->struct_add(handle, "ddd",
859
+		"udp4_raw", cfg_get(core, core_cfg, udp4_raw),
860
+		"udp4_raw_mtu", cfg_get(core, core_cfg, udp4_raw_mtu),
861
+		"udp4_raw_ttl", cfg_get(core, core_cfg, udp4_raw_ttl)
862
+	);
863
+#else /* USE_RAW_SOCKS */
864
+	rpc->fault(c, 500, "udp4_raw mode support not compiled");
865
+#endif /* USE_RAW_SOCKS */
866
+}
867
+
868
+
869
+
846 870
 /*
847 871
  * RPC Methods exported by this module
848 872
  */
... ...
@@ -876,6 +901,8 @@ static rpc_export_t core_rpc_methods[] = {
876 876
 	{"core.sctp_options",      core_sctp_options,      core_sctp_options_doc,
877 877
 		0},
878 878
 	{"core.sctp_info",         core_sctpinfo,          core_sctpinfo_doc,   0},
879
+	{"core.udp4_raw_info",     core_udp4rawinfo,       core_udp4rawinfo_doc,
880
+		0},
879 881
 #ifdef USE_DNS_CACHE
880 882
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,
881 883
 		0	},
... ...
@@ -64,6 +64,10 @@ extern struct socket_info* bind_address; /* pointer to the crt. proc.
64 64
 extern struct socket_info* sendipv4; /* ipv4 socket to use when msg.
65 65
 										comes from ipv6*/
66 66
 extern struct socket_info* sendipv6; /* same as above for ipv6 */
67
+#ifdef USE_RAW_SOCKS
68
+extern int raw_udp4_send_sock;
69
+#endif /* USE_RAW_SOCKS */
70
+
67 71
 #ifdef USE_TCP
68 72
 extern struct socket_info* sendipv4_tcp; /* ipv4 socket to use when msg.
69 73
 										comes from ipv6*/
... ...
@@ -73,9 +73,9 @@
73 73
  * 2008-08-08  sctp support (andrei)
74 74
  * 2008-08-19  -l support for mmultihomed addresses/addresses lists
75 75
  *                (e.g. -l (eth0, 1.2.3.4, foo.bar) ) (andrei)
76
- *  2010-04-19 added daemon_status_fd pipe to communicate the parent process
77
- *             with the main process in daemonize mode, so the parent process
78
- *             can return the proper exit status code (ibc)
76
+ * 2010-04-19  added daemon_status_fd pipe to communicate the parent process
77
+ *              with the main process in daemonize mode, so the parent process
78
+ *              can return the proper exit status code (ibc)
79 79
  */
80 80
 
81 81
 /** main file (init, daemonize, startup) 
... ...
@@ -145,6 +145,9 @@
145 145
 #include "nonsip_hooks.h"
146 146
 #include "ut.h"
147 147
 #include "signals.h"
148
+#ifdef USE_RAW_SOCKS
149
+#include "raw_sock.h"
150
+#endif /* USE_RAW_SOCKS */
148 151
 #ifdef USE_TCP
149 152
 #include "poll_types.h"
150 153
 #include "tcp_init.h"
... ...
@@ -185,6 +188,7 @@
185 185
 #include "pvapi_init.h" /* init */
186 186
 #include "pv_core.h" /* register core pvars */
187 187
 #include "ppcfg.h"
188
+#include "sock_ut.h"
188 189
 
189 190
 #ifdef DEBUG_DMALLOC
190 191
 #include <dmalloc.h>
... ...
@@ -437,6 +441,9 @@ struct socket_info* bind_address=0; /* pointer to the crt. proc.
437 437
 									 listening address*/
438 438
 struct socket_info* sendipv4; /* ipv4 socket to use when msg. comes from ipv6*/
439 439
 struct socket_info* sendipv6; /* same as above for ipv6 */
440
+#ifdef USE_RAW_SOCKS
441
+int raw_udp4_send_sock = -1; /* raw socket used for sending udp4 packets */
442
+#endif /* USE_RAW_SOCKS */
440 443
 #ifdef USE_TCP
441 444
 struct socket_info* sendipv4_tcp;
442 445
 struct socket_info* sendipv6_tcp;
... ...
@@ -1236,15 +1243,57 @@ int main_loop()
1236 1236
 		/* only one address, we ignore all the others */
1237 1237
 		if (udp_init(udp_listen)==-1) goto error;
1238 1238
 		bind_address=udp_listen;
1239
-		if (bind_address->address.af==AF_INET)
1239
+		if (bind_address->address.af==AF_INET) {
1240 1240
 			sendipv4=bind_address;
1241
-		else
1241
+#ifdef USE_RAW_SOCKS
1242
+		/* always try to have a raw socket opened if we are using ipv4 */
1243
+		raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
1244
+		if (raw_udp4_send_sock < 0) {
1245
+			if ( default_core_cfg.udp4_raw > 0) {
1246
+				/* force use raw socket failed */
1247
+				ERR("could not initialize raw udp send socket (ipv4):"
1248
+						" %s (%d)\n", strerror(errno), errno);
1249
+				if (errno == EPERM)
1250
+					ERR("could not initialize raw socket on startup"
1251
+						" due to inadequate permissions, please"
1252
+						" restart as root or with CAP_NET_RAW\n");
1253
+				goto error;
1254
+			}
1255
+			default_core_cfg.udp4_raw = 0; /* disabled */
1256
+		} else {
1257
+			register_fds(1);
1258
+			if (default_core_cfg.udp4_raw < 0) {
1259
+				/* auto-detect => use it */
1260
+				default_core_cfg.udp4_raw = 1; /* enabled */
1261
+				DBG("raw socket possible => turning it on\n");
1262
+			}
1263
+			if (default_core_cfg.udp4_raw_ttl < 0) {
1264
+				/* auto-detect */
1265
+				default_core_cfg.udp4_raw_ttl = sock_get_ttl(sendipv4->socket);
1266
+				if (default_core_cfg.udp4_raw_ttl < 0)
1267
+					/* error, use some default value */
1268
+					default_core_cfg.udp4_raw_ttl = 63;
1269
+			}
1270
+		}
1271
+#else
1272
+		default_core.cfg.udp4_raw = 0;
1273
+#endif /* USE_RAW_SOCKS */
1274
+		} else
1242 1275
 			sendipv6=bind_address;
1243 1276
 		if (udp_listen->next){
1244 1277
 			LOG(L_WARN, "WARNING: using only the first listen address"
1245 1278
 						" (no fork)\n");
1246 1279
 		}
1247 1280
 
1281
+		/* delay cfg_shmize to the last moment (it must be called _before_
1282
+		   forking). Changes to default cfgs after this point will be
1283
+		   ignored.
1284
+		*/
1285
+		if (cfg_shmize() < 0) {
1286
+			LOG(L_CRIT, "could not initialize shared configuration\n");
1287
+			goto error;
1288
+		}
1289
+	
1248 1290
 		/* Register the children that will keep updating their
1249 1291
 		 * local configuration */
1250 1292
 		cfg_register_child(
... ...
@@ -1363,6 +1412,42 @@ int main_loop()
1363 1363
 			/* children_no per each socket */
1364 1364
 			cfg_register_child(children_no);
1365 1365
 		}
1366
+#ifdef USE_RAW_SOCKS
1367
+		/* always try to have a raw socket opened if we are using ipv4 */
1368
+		if (sendipv4) {
1369
+			raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
1370
+			if (raw_udp4_send_sock < 0) {
1371
+				if ( default_core_cfg.udp4_raw > 0) {
1372
+						/* force use raw socket failed */
1373
+						ERR("could not initialize raw udp send socket (ipv4):"
1374
+								" %s (%d)\n", strerror(errno), errno);
1375
+						if (errno == EPERM)
1376
+							ERR("could not initialize raw socket on startup"
1377
+								" due to inadequate permissions, please"
1378
+								" restart as root or with CAP_NET_RAW\n");
1379
+						goto error;
1380
+					}
1381
+					default_core_cfg.udp4_raw = 0; /* disabled */
1382
+			} else {
1383
+				register_fds(1);
1384
+				if (default_core_cfg.udp4_raw < 0) {
1385
+					/* auto-detect => use it */
1386
+					default_core_cfg.udp4_raw = 1; /* enabled */
1387
+					DBG("raw socket possible => turning it on\n");
1388
+				}
1389
+				if (default_core_cfg.udp4_raw_ttl < 0) {
1390
+					/* auto-detect */
1391
+					default_core_cfg.udp4_raw_ttl =
1392
+						sock_get_ttl(sendipv4->socket);
1393
+					if (default_core_cfg.udp4_raw_ttl < 0)
1394
+						/* error, use some default value */
1395
+						default_core_cfg.udp4_raw_ttl = 63;
1396
+				}
1397
+			}
1398
+		}
1399
+#else
1400
+		default_core_cfg.udp4_raw = 0;
1401
+#endif /* USE_RAW_SOCKS */
1366 1402
 #ifdef USE_SCTP
1367 1403
 		if (!sctp_disable){
1368 1404
 			for(si=sctp_listen; si; si=si->next){
... ...
@@ -1428,6 +1513,14 @@ int main_loop()
1428 1428
 			 * sending) so we open all first*/
1429 1429
 		if (do_suid()==-1) goto error; /* try to drop privileges */
1430 1430
 
1431
+		/* delay cfg_shmize to the last moment (it must be called _before_
1432
+		   forking). Changes to default cfgs after this point will be
1433
+		   ignored (cfg_shmize() will copy the default cfgs into shmem).
1434
+		*/
1435
+		if (cfg_shmize() < 0) {
1436
+			LOG(L_CRIT, "could not initialize shared configuration\n");
1437
+			goto error;
1438
+		}
1431 1439
 		/* init childs with rank==PROC_INIT before forking any process,
1432 1440
 		 * this is a place for delayed (after mod_init) initializations
1433 1441
 		 * (e.g. shared vars that depend on the total number of processes
... ...
@@ -2291,11 +2384,6 @@ try_select_again:	tval.tv_usec = 0;
2291 2291
 		goto error;
2292 2292
 	}
2293 2293
 	
2294
-	if (cfg_shmize() < 0) {
2295
-		LOG(L_CRIT, "could not initialize shared configuration\n");
2296
-		goto error;
2297
-	}
2298
-	
2299 2294
 	/* initialize process_table, add core process no. (calc_proc_no()) to the
2300 2295
 	 * processes registered from the modules*/
2301 2296
 	if (init_pt(calc_proc_no())==-1)
2302 2297
new file mode 100644
... ...
@@ -0,0 +1,144 @@
0
+/*
1
+ * Copyright (C) 2010 iptelorg GmbH
2
+ *
3
+ * Permission to use, copy, modify, and distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
+ */
15
+/** raw socket udp listen functions.
16
+ *  @file raw_listener.c
17
+ *  @ingroup core
18
+ *  Module: @ref core
19
+ */
20
+/*
21
+ * History:
22
+ * --------
23
+ *  2010-06-09  intial version (from older code) andrei
24
+ */
25
+
26
+#ifdef USE_RAW_SOCKS
27
+
28
+
29
+#include "raw_listener.h"
30
+#include "raw_sock.h"
31
+#include "receive.h"
32
+
33
+#include <errno.h>
34
+#include <string.h>
35
+
36
+struct socket_info* raw_udp_sendipv4=0;
37
+
38
+/** creates a raw socket based on a socket_info structure.
39
+ * Side-effects: sets raw_udp_sendipv4 if not already set.
40
+ * @param si - pointer to partially filled socket_info structure (su must
41
+ *              be set).
42
+ * @param iface - pointer to network interface to bind on (str). Can be null.
43
+ * @param iphdr_incl - 1 if send on these socket will include the IP header.
44
+ * @return <0 on error, socket on success.
45
+ */
46
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl)
47
+{
48
+	int sock;
49
+	struct ip_addr ip;
50
+	
51
+	su2ip_addr(&ip, &si->su);
52
+	sock=raw_udp4_socket(&ip, iface, iphdr_incl);
53
+	if (sock>=0){
54
+		if (raw_udp_sendipv4==0 || iface==0 || iface->s==0)
55
+			raw_udp_sendipv4=si;
56
+	}
57
+	return sock;
58
+}
59
+
60
+
61
+
62
+/** receive sip udp ipv4 packets over a raw socket in a loop.
63
+ * It should be called by a "raw socket receiver" process
64
+ * (since the function never exits unless it encounters a
65
+ *  critical error).
66
+ * @param rsock - initialized raw socket.
67
+ * @param port1 - start of port range.
68
+ * @param port2 - end of port range. If 0 it's equivalent to listening only
69
+ *                on port1.
70
+ * @return <0 on error, never returns on success.
71
+ */
72
+int raw_udp4_rcv_loop(int rsock, int port1, int port2)
73
+{
74
+	static char buf[BUF_SIZE+1];
75
+	char* p;
76
+	char* tmp;
77
+	union sockaddr_union from;
78
+	union sockaddr_union to;
79
+	struct receive_info ri;
80
+	struct raw_filter rf;
81
+	int len;
82
+	
83
+	/* this will not change */
84
+	from.sin.sin_family=AF_INET;
85
+	ri.bind_address=0;
86
+	ri.proto=PROTO_UDP;
87
+	ri.proto_reserved1=0;
88
+	ri.proto_reserved2=0;
89
+	/* set filter to match any address but with the specified port range */
90
+	memset(&rf, 0, sizeof(rf));
91
+	rf.dst.ip.af=AF_INET;
92
+	rf.dst.ip.len=4;
93
+	rf.dst.mask.af=AF_INET;
94
+	rf.dst.mask.len=4;
95
+	rf.proto=PROTO_UDP;
96
+	rf.port1=port1;
97
+	rf.port2=port2?port2:port1;
98
+	for(;;){
99
+		p=buf;
100
+		len=raw_udp4_recv(rsock, &p, BUF_SIZE, &from, &to, &rf);
101
+		if (len<0){
102
+			if (len==-1){
103
+				LOG(L_ERR, "ERROR: raw_udp4_rcv_loop:raw_udp4_recv: %s [%d]\n",
104
+						strerror(errno), errno);
105
+				if ((errno==EINTR)||(errno==EWOULDBLOCK))
106
+					continue;
107
+				else
108
+					goto error;
109
+			}else{
110
+				DBG("raw_udp4_rcv_loop: raw_udp4_recv error: %d\n", len);
111
+				continue;
112
+			}
113
+		}
114
+		/* we must 0-term the message */
115
+		p[len]=0;
116
+		ri.src_su=from;
117
+		su2ip_addr(&ri.src_ip, &from);
118
+		ri.src_port=su_getport(&from);
119
+		su2ip_addr(&ri.dst_ip, &to);
120
+		ri.dst_port=su_getport(&to);
121
+		/* sanity checks */
122
+		if (len<MIN_UDP_PACKET){
123
+			tmp=ip_addr2a(&ri.src_ip);
124
+			DBG("raw_udp4_rcv_loop: probing packet received from %s %d\n",
125
+					tmp, htons(ri.src_port));
126
+			continue;
127
+		}
128
+		if (ri.src_port==0){
129
+			tmp=ip_addr2a(&ri.src_ip);
130
+			LOG(L_INFO, "raw_udp4_rcv_loop: dropping 0 port packet from %s\n",
131
+						tmp);
132
+			continue;
133
+		}
134
+		tmp=ip_addr2a(&ri.src_ip);
135
+		DBG("raw_udp4_rcv_loop: received from %s:\n[%.*s]\n", tmp, len, p);
136
+		receive_msg(p, len, &ri);
137
+	}
138
+error:
139
+	return -1;
140
+}
141
+
142
+
143
+#endif /* USE_RAW_SOCKS */
0 144
new file mode 100644
... ...
@@ -0,0 +1,39 @@
0
+/*
1
+ * Copyright (C) 2010 iptelorg GmbH
2
+ *
3
+ * Permission to use, copy, modify, and distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
+ */
15
+/** raw socket udp listen functions.
16
+ *  @file raw_listener.h
17
+ *  @ingroup core
18
+ *  Module: @ref core
19
+ */
20
+/*
21
+ * History:
22
+ * --------
23
+ *  2010-06-09  initial version (from older code) andrei
24
+ */
25
+
26
+#ifndef _raw_listener_h
27
+#define _raw_listener_h
28
+
29
+#include "ip_addr.h"
30
+
31
+
32
+/** default raw socket used for sending on udp ipv4 */
33
+struct socket_info* raw_udp_sendipv4;
34
+
35
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl);
36
+int raw_udp4_rcv_loop(int rsock, int port1, int port2);
37
+
38
+#endif /* _raw_listener_h */
0 39
new file mode 100644
... ...
@@ -0,0 +1,696 @@
0
+/* 
1
+ * $Id$
2
+ *
3
+ * Copyright (C) 2010 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+/** raw socket functions.
18
+ *  @file raw_sock.c
19
+ *  @ingroup core
20
+ *  Module: @ref core
21
+ */
22
+/* 
23
+ * History:
24
+ * --------
25
+ *  2010-06-07  initial version (from older code) andrei
26
+ *  2010-06-15  IP_HDRINCL raw socket support, including on-send
27
+ *               fragmentation (andrei)
28
+ */
29
+
30
+#ifdef USE_RAW_SOCKS
31
+
32
+#include "compiler_opt.h"
33
+#include "ip_addr.h"
34
+#include "dprint.h"
35
+#include "str.h"
36
+#include "rand/fastrand.h"
37
+#include "globals.h"
38
+
39
+#include <errno.h>
40
+#include <string.h>
41
+#include <unistd.h>
42
+#include <sys/types.h>
43
+#include <fcntl.h>
44
+#include <sys/socket.h>
45
+#include <netinet/in.h>
46
+#include <netinet/in_systm.h>
47
+#include <arpa/inet.h>
48
+#ifndef __USE_BSD
49
+#define __USE_BSD  /* on linux use bsd version of iphdr (more portable) */
50
+#endif /* __USE_BSD */
51
+#include <netinet/ip.h>
52
+#define __FAVOR_BSD /* on linux use bsd version of udphdr (more portable) */
53
+#include <netinet/udp.h>
54
+
55
+#include "raw_sock.h"
56
+#include "cfg/cfg.h"
57
+#include "cfg_core.h"
58
+
59
+
60
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined(__OS_openbsd) \
61
+	|| defined (__OS_darwin)
62
+/** fragmentation is done by the kernel (no need to do it in userspace) */
63
+#define RAW_IPHDR_INC_AUTO_FRAG
64
+#endif /* __OS_* */
65
+
66
+/* macros for converting values in the expected format */
67
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined (__OS_darwin)
68
+/* on freebsd and netbsd the ip offset (along with flags) and the
69
+   ip header length must be filled in _host_ bytes order format.
70
+   The same is true for openbsd < 2.1.
71
+*/
72
+/** convert the ip offset in the format expected by the kernel. */
73
+#define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
74
+/** convert the ip total length in the format expected by the kernel. */
75
+#define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
76
+
77
+#else /* __OS_* */
78
+/* linux, openbsd >= 2.1 a.s.o. */
79
+/** convert the ip offset in the format expected by the kernel. */
80
+#define RAW_IPHDR_IP_OFF(off)  htons((unsigned short)(off))
81
+/** convert the ip total length in the format expected by the kernel. */
82
+#define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
83
+
84
+#endif /* __OS_* */
85
+
86
+
87
+/** create and return a raw socket.
88
+ * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
89
+ * @param ip - if not null the socket will be bound on this ip.
90
+ * @param iface - if not null the socket will be bound to this interface
91
+ *                (SO_BINDTODEVICE). This is supported only on linux.
92
+ * @param iphdr_incl - set to 1 if packets send on this socket include
93
+ *                     a pre-built ip header (some fields, like the checksum
94
+ *                     will still be filled by the kernel, OTOH packet
95
+ *                     fragmentation has to be done in user space).
96
+ * @return socket on success, -1 on error
97
+ */
98
+int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
99
+{
100
+	int sock;
101
+	int t;
102
+	union sockaddr_union su;
103
+#if defined (SO_BINDTODEVICE)
104
+	char short_ifname[sizeof(int)];
105
+	int ifname_len;
106
+	char* ifname;
107
+#endif /* SO_BINDTODEVICE */
108
+
109
+	sock = socket(PF_INET, SOCK_RAW, proto);
110
+	if (sock==-1)
111
+		goto error;
112
+	/* set socket options */
113
+	if (iphdr_incl) {
114
+		t=1;
115
+		if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &t, sizeof(t))<0){
116
+			ERR("raw_socket: setsockopt(IP_HDRINCL) failed: %s [%d]\n",
117
+					strerror(errno), errno);
118
+			goto error;
119
+		}
120
+	} else {
121
+		/* IP_PKTINFO makes no sense if the ip header is included */
122
+		/* using IP_PKTINFO */
123
+		t=1;
124
+#ifdef IP_PKTINFO
125
+		if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
126
+			ERR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
127
+					strerror(errno), errno);
128
+			goto error;
129
+		}
130
+#elif defined(IP_RECVDSTADDR)
131
+		if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
132
+			ERR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
133
+					strerror(errno), errno);
134
+			goto error;
135
+		}
136
+#else
137
+#error "no method of getting the destination ip address supported"
138
+#endif /* IP_RECVDSTADDR / IP_PKTINFO */
139
+	}
140
+#if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
141
+	t=IP_PMTUDISC_DONT;
142
+	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
143
+		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
144
+				strerror(errno));
145
+		goto error;
146
+	}
147
+#endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
148
+	if (iface && iface->s){
149
+#if defined (SO_BINDTODEVICE)
150
+		/* workaround for linux bug: arg to setsockopt must have at least
151
+		 * sizeof(int) size or EINVAL would be returned */
152
+		if (iface->len<sizeof(int)){
153
+			memcpy(short_ifname, iface->s, iface->len);
154
+			short_ifname[iface->len]=0; /* make sure it's zero term */
155
+			ifname_len=sizeof(short_ifname);
156
+			ifname=short_ifname;
157
+		}else{
158
+			ifname_len=iface->len;
159
+			ifname=iface->s;
160
+		}
161
+		if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, ifname, ifname_len)
162
+						<0){
163
+				ERR("raw_socket: could not bind to %.*s: %s [%d]\n",
164
+							iface->len, ZSW(iface->s), strerror(errno), errno);
165
+				goto error;
166
+		}
167
+#else /* !SO_BINDTODEVICE */
168
+		/* SO_BINDTODEVICE is linux specific => cannot bind to a device */
169
+		ERR("raw_socket: bind to device supported only on linux\n");
170
+		goto error;
171
+#endif /* SO_BINDTODEVICE */
172
+	}
173
+	/* FIXME: probe_max_receive_buffer(sock) missing */
174
+	if (ip){
175
+		init_su(&su, ip, 0);
176
+		if (bind(sock, &su.s, sockaddru_len(su))==-1){
177
+			ERR("raw_socket: bind(%s) failed: %s [%d]\n",
178
+				ip_addr2a(ip), strerror(errno), errno);
179
+			goto error;
180
+		}
181
+	}
182
+	return sock;
183
+error:
184
+	if (sock!=-1) close(sock);
185
+	return -1;
186
+}
187
+
188
+
189
+
190
+/** create and return an udp over ipv4  raw socket.
191
+ * @param ip - if not null the socket will be bound on this ip.
192
+ * @param iface - if not null the socket will be bound to this interface
193
+ *                (SO_BINDTODEVICE).
194
+ * @param iphdr_incl - set to 1 if packets send on this socket include
195
+ *                     a pre-built ip header (some fields, like the checksum
196
+ *                     will still be filled by the kernel, OTOH packet
197
+ *                     fragmentation has to be done in user space).
198
+ * @return socket on success, -1 on error
199
+ */
200
+int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
201
+{
202
+	return raw_socket(IPPROTO_UDP, ip, iface, iphdr_incl);
203
+}
204
+
205
+
206
+
207
+/** receives an ipv4 packet using a raw socket.
208
+ * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
209
+ * from and to are filled (only the ip part the ports are 0 since this
210
+ * function doesn't try to look beyond the IP level).
211
+ * @param sock - raw socket
212
+ * @param buf - detination buffer.
213
+ * @param len - buffer len (should be enough for receiving a packet +
214
+ *               IP header).
215
+ * @param from - result parameter, the IP address part of it will be filled
216
+ *                with the source address and the port with 0.
217
+ * @param to - result parameter, the IP address part of it will be filled
218
+ *                with the destination (local) address and the port with 0.
219
+ * @return packet len or <0 on error: -1 (check errno),
220
+ *        -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
221
+ */
222
+int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
223
+					union sockaddr_union* to)
224
+{
225
+	struct iovec iov[1];
226
+	struct msghdr rcv_msg;
227
+	struct cmsghdr* cmsg;
228
+#ifdef IP_PKTINFO
229
+	struct in_pktinfo* rcv_pktinfo;
230
+#endif /* IP_PKTINFO */
231
+	int n, ret;
232
+	char msg_ctrl_buf[1024];
233
+
234
+	iov[0].iov_base=buf;
235
+	iov[0].iov_len=len;
236
+	rcv_msg.msg_name=from;
237
+	rcv_msg.msg_namelen=sockaddru_len(*from);
238
+	rcv_msg.msg_control=msg_ctrl_buf;
239
+	rcv_msg.msg_controllen=sizeof(msg_ctrl_buf);
240
+	rcv_msg.msg_iov=&iov[0];
241
+	rcv_msg.msg_iovlen=1;
242
+	ret=-2; /* no PKT_INFO or AF mismatch */
243
+retry:
244
+	n=recvmsg(sock, &rcv_msg, MSG_WAITALL);
245
+	if (unlikely(n==-1)){
246
+		if (errno==EINTR)
247
+			goto retry;
248
+		ret=n;
249
+		goto end;
250
+	}
251
+	/* find the pkt info */
252
+	for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg; cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
253
+#ifdef IP_PKTINFO
254
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
255
+					(cmsg->cmsg_type==IP_PKTINFO))) {
256
+			rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
257
+			to->sin.sin_family=AF_INET;
258
+			memcpy(&to->sin.sin_addr, &rcv_pktinfo->ipi_spec_dst.s_addr, 
259
+									sizeof(to->sin.sin_addr));
260
+			to->sin.sin_port=0; /* not known */
261
+			/* interface no. in ipi_ifindex */
262
+			ret=n; /* success */
263
+			break;
264
+		}
265
+#elif defined (IP_RECVDSTADDR)
266
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
267
+					(cmsg->cmsg_type==IP_RECVDSTADDR))) {
268
+			to->sin.sin_family=AF_INET;
269
+			memcpy(&to->sin.sin_addr, CMSG_DATA(cmsg),
270
+									sizeof(to->sin.sin_addr));
271
+			to->sin.sin_port=0; /* not known */
272
+			ret=n; /* success */
273
+			break;
274
+		}
275
+#else
276
+#error "no method of getting the destination ip address supported"
277
+#endif /* IP_PKTINFO / IP_RECVDSTADDR */
278
+	}
279
+end:
280
+	return ret;
281
+}
282
+
283
+
284
+
285
+/* receive an ipv4 udp packet over a raw socket.
286
+ * The packet is copied in *buf and *buf is advanced to point to the
287
+ * payload.  Fills from and to.
288
+ * @param rsock - raw socket
289
+ * @param buf - the packet will be written to where *buf points intially and
290
+ *              then *buf will be advanced to point to the udp payload.
291
+ * @param len - buffer length (should be enough to hold at least the
292
+ *               ip and udp headers + 1 byte).
293
+ * @param from - result parameter, filled with source address and port of the
294
+ *               packet.
295
+ * @param from - result parameter, filled with destination (local) address and
296
+ *               port of the packet.
297
+ * @param rf   - filter used to decide whether or not the packet is
298
+ *                accepted/processed. If null, all the packets are accepted.
299
+ * @return packet len or  <0 on error (-1 and -2 on recv error @see recvpkt4,
300
+ *         -3 if the headers are invalid and -4 if the packet doesn't
301
+ *         match the  filter).
302
+ */
303
+int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
304
+					union sockaddr_union* to, struct raw_filter* rf)
305
+{
306
+	int n;
307
+	unsigned short dst_port;
308
+	unsigned short src_port;
309
+	struct ip_addr dst_ip;
310
+	char* end;
311
+	char* udph_start;
312
+	char* udp_payload;
313
+	struct ip iph;
314
+	struct udphdr udph;
315
+	unsigned short udp_len;
316
+
317
+	n=recvpkt4(rsock, *buf, len, from, to);
318
+	if (unlikely(n<0)) goto error;
319
+	
320
+	end=*buf+n;
321
+	if (unlikely(n<(sizeof(struct ip)+sizeof(struct udphdr)))) {
322
+		n=-3;
323
+		goto error;
324
+	}
325
+	/* FIXME: if initial buffer is aligned, one could skip the memcpy
326
+	   and directly cast ip and udphdr pointer to the memory */
327
+	memcpy(&iph, *buf, sizeof(struct ip));
328
+	udph_start=*buf+iph.ip_hl*4;
329
+	udp_payload=udph_start+sizeof(struct udphdr);
330
+	if (unlikely(udp_payload>end)){
331
+		n=-3;
332
+		goto error;
333
+	}
334
+	memcpy(&udph, udph_start, sizeof(struct udphdr));
335
+	udp_len=ntohs(udph.uh_ulen);
336
+	if (unlikely((udph_start+udp_len)!=end)){
337
+		if ((udph_start+udp_len)>end){
338
+			n=-3;
339
+			goto error;
340
+		}else{
341
+			ERR("udp length too small: %d/%d\n",
342
+					(int)udp_len, (int)(end-udph_start));
343
+			n=-3;
344
+			goto error;
345
+		}
346
+	}
347
+	/* advance buf */
348
+	*buf=udp_payload;
349
+	n=(int)(end-*buf);
350
+	/* fill ip from the packet (needed if no PKT_INFO is used) */
351
+	dst_ip.af=AF_INET;
352
+	dst_ip.len=4;
353
+	dst_ip.u.addr32[0]=iph.ip_dst.s_addr;
354
+	/* fill dst_port */
355
+	dst_port=ntohs(udph.uh_dport);
356
+	ip_addr2su(to, &dst_ip, dst_port);
357
+	/* fill src_port */
358
+	src_port=ntohs(udph.uh_sport);
359
+	su_setport(from, src_port);
360
+	if (likely(rf)) {
361
+		su2ip_addr(&dst_ip, to);
362
+		if ( (dst_port && rf->port1 && ((dst_port<rf->port1) ||
363
+										(dst_port>rf->port2)) ) ||
364
+			(matchnet(&dst_ip, &rf->dst)!=1) ){
365
+			/* no match */
366
+			n=-4;
367
+			goto error;
368
+		}
369
+	}
370
+	
371
+error:
372
+	return n;
373
+}
374
+
375
+
376
+
377
+/** udp checksum helper: compute the pseudo-header 16-bit "sum".
378
+ * Computes the partial checksum (no complement) of the pseudo-header.
379
+ * It is meant to be used by udpv4_chksum().
380
+ * @param uh - filled udp header
381
+ * @param src - source ip address in network byte order.
382
+ * @param dst - destination ip address in network byte order.
383
+ * @param length - payload length (not including the udp header),
384
+ *                 in _host_ order.
385
+ * @return the partial checksum in host order
386
+ */
387
+inline unsigned short udpv4_vhdr_sum(	struct udphdr* uh,
388
+										struct in_addr* src,
389
+										struct in_addr* dst,
390
+										unsigned short length)
391
+{
392
+	unsigned sum;
393
+	
394
+	/* pseudo header */
395
+	sum=(src->s_addr>>16)+(src->s_addr&0xffff)+
396
+		(dst->s_addr>>16)+(dst->s_addr&0xffff)+
397
+		htons(IPPROTO_UDP)+(uh->uh_ulen);
398
+	/* udp header */
399
+	sum+=(uh->uh_dport)+(uh->uh_sport)+(uh->uh_ulen) + 0 /*chksum*/; 
400
+	/* fold it */
401
+	sum=(sum>>16)+(sum&0xffff);
402
+	sum+=(sum>>16);
403
+	/* no complement */
404
+	return ntohs((unsigned short) sum);
405
+}
406
+
407
+
408
+
409
+/** compute the udp over ipv4 checksum.
410
+ * @param u - filled udp header (except checksum).
411
+ * @param src - source ip v4 address, in _network_ byte order.
412
+ * @param dst - destination ip v4 address, int _network_ byte order.
413
+ * @param data - pointer to the udp payload.
414
+ * @param length - payload length, not including the udp header and in
415
+ *                 _host_ order. The length mist be <= 0xffff - 8
416
+ *                 (to allow space for the udp header).
417
+ * @return the checksum in _host_ order */
418
+inline static unsigned short udpv4_chksum(struct udphdr* u,
419
+							struct in_addr* src, struct in_addr* dst,
420
+							unsigned char* data, unsigned short length)
421
+{
422
+	unsigned sum;
423
+	unsigned char* end;
424
+	sum=udpv4_vhdr_sum(u, src, dst, length);
425
+	end=data+(length&(~0x1)); /* make sure it's even */
426
+	/* TODO: 16 & 32 bit aligned version */
427
+		/* not aligned */
428
+		for(;data<end;data+=2){
429
+			sum+=((data[0]<<8)+data[1]);
430
+		}
431
+		if (length&0x1)
432
+			sum+=((*data)<<8);
433
+	
434
+	/* fold it */
435
+	sum=(sum>>16)+(sum&0xffff);
436
+	sum+=(sum>>16);
437
+	return (unsigned short)~sum;
438
+}
439
+
440
+
441
+
442
+/** fill in an udp header.
443
+ * @param u - udp header that will be filled.
444
+ * @param from - source ip v4 address and port.
445
+ * @param to -   destination ip v4 address and port.
446
+ * @param buf - pointer to the payload.
447
+ * @param len - payload length (not including the udp header).
448
+ * @param do_chk - if set the udp checksum will be computed, else it will
449
+ *                 be set to 0.
450
+ * @return 0 on success, < 0 on error.
451
+ */
452
+inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from, 
453
+				struct sockaddr_in* to, unsigned char* buf, int len,
454
+					int do_chk)
455
+{
456
+	u->uh_ulen=htons((unsigned short)len+sizeof(struct udphdr));
457
+	u->uh_sport=from->sin_port;
458
+	u->uh_dport=to->sin_port;
459
+	if (do_chk)
460
+		u->uh_sum=htons(
461
+				udpv4_chksum(u, &from->sin_addr, &to->sin_addr,  buf, len));
462
+	else
463
+		u->uh_sum=0; /* no checksum */
464
+	return 0;
465
+}
466
+
467
+
468
+
469
+/** fill in an ip header.
470
+ * Note: the checksum is _not_ computed.
471
+ * WARNING: The ip header length and offset might be filled in
472
+ * _host_ byte order or network byte order (depending on the OS, for example
473
+ *  freebsd needs host byte order for raw sockets with IPHDR_INC, while
474
+ *  linux needs network byte order).
475
+ * @param iph - ip header that will be filled.
476
+ * @param from - source ip v4 address (network byte order).
477
+ * @param to -   destination ip v4 address (network byte order).
478
+ * @param payload len - payload length (not including the ip header).
479
+ * @param proto - protocol.
480
+ * @return 0 on success, < 0 on error.
481
+ */
482
+inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
483
+				struct in_addr* to, int payload_len, unsigned char proto)
484
+{
485
+	iph->ip_hl = sizeof(struct ip)/4;
486
+	iph->ip_v = 4;
487
+	iph->ip_tos = tos;
488
+	/* on freebsd ip_len _must_ be in _host_ byte order instead
489
+	   of network byte order. On linux the length is ignored (it's filled
490
+	   automatically every time). */
491
+	iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
492
+	iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
493
+	iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
494
+	iph->ip_ttl = cfg_get(core, core_cfg, udp4_raw_ttl);
495
+	iph->ip_p = proto;
496
+	iph->ip_src = *from;
497
+	iph->ip_dst = *to;
498
+	iph->ip_sum = 0;
499
+
500
+	return 0;
501
+}
502
+
503
+
504
+
505
+/** send an udp packet over a non-ip_hdrincl raw socket.
506
+ * @param rsock - raw socket
507
+ * @param buf - data
508
+ * @param len - data len
509
+ * @param from - source address:port (_must_ be non-null, but the ip address
510
+ *                can be 0, in which case it will be filled by the kernel).
511
+ * @param to - destination address:port
512
+ * @return  <0 on error (errno set too), number of bytes sent on success
513
+ *          (including the udp header => on success len + udpheader size).
514
+ */
515
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
516
+					union sockaddr_union* from,
517
+					union sockaddr_union* to)
518
+{
519
+	struct msghdr snd_msg;
520
+	struct cmsghdr* cmsg;
521
+#ifdef IP_PKTINFO
522
+	struct in_pktinfo* snd_pktinfo;
523
+#endif /* IP_PKTINFO */
524
+	struct iovec iov[2];
525
+	struct udphdr udp_hdr;
526
+	char msg_ctrl_snd_buf[1024];
527
+	int ret;
528
+
529
+	memset(&snd_msg, 0, sizeof(snd_msg));
530
+	snd_msg.msg_name=&to->sin;
531
+	snd_msg.msg_namelen=sockaddru_len(*to);
532
+	snd_msg.msg_iov=&iov[0];
533
+	/* prepare udp header */
534
+	mk_udp_hdr(&udp_hdr, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
535
+	iov[0].iov_base=(char*)&udp_hdr;
536
+	iov[0].iov_len=sizeof(udp_hdr);
537
+	iov[1].iov_base=buf;
538
+	iov[1].iov_len=len;
539
+	snd_msg.msg_iovlen=2;
540
+	snd_msg.msg_control=msg_ctrl_snd_buf;
541
+	snd_msg.msg_controllen=sizeof(msg_ctrl_snd_buf);
542
+	/* init pktinfo cmsg */
543
+	cmsg=CMSG_FIRSTHDR(&snd_msg);
544
+	cmsg->cmsg_level=IPPROTO_IP;
545
+#ifdef IP_PKTINFO
546
+	cmsg->cmsg_type=IP_PKTINFO;
547
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
548
+	snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
549
+	snd_pktinfo->ipi_ifindex=0;
550
+	snd_pktinfo->ipi_spec_dst.s_addr=from->sin.sin_addr.s_addr;
551
+#elif defined (IP_SENDSRCADDR)
552
+	cmsg->cmsg_type=IP_SENDSRCADDR;
553
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
554
+	memcpy(CMSG_DATA(cmsg), &from->sin.sin_addr.s_addr,
555
+							sizeof(struct in_addr));
556
+#else
557
+#error "no method of setting the source ip supported"
558
+#endif /* IP_PKTINFO / IP_SENDSRCADDR */
559
+	snd_msg.msg_controllen=cmsg->cmsg_len;
560
+	snd_msg.msg_flags=0;
561
+	ret=sendmsg(rsock, &snd_msg, 0);
562
+	return ret;
563
+}
564
+
565
+
566
+
567
+/** send an udp packet over an IP_HDRINCL raw socket.
568
+ * If needed, send several fragments.
569
+ * @param rsock - raw socket
570
+ * @param buf - data
571
+ * @param len - data len
572
+ * @param from - source address:port (_must_ be non-null, but the ip address
573
+ *                can be 0, in which case it will be filled by the kernel).
574
+ * @param to - destination address:port
575
+ * @param mtu - maximum datagram size (including the ip header, excluding
576
+ *              link layer headers). Minimum allowed size is 28
577
+ *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
578
+ *               be ignored (the packet will be sent un-fragmented).
579
+ *              0 can be used to disable fragmentation.
580
+ * @return  <0 on error (-2: datagram too big, -1: check errno),
581
+ *          number of bytes sent on success
582
+ *          (including the ip & udp headers =>
583
+ *               on success len + udpheader + ipheader size).
584
+ */
585
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
586
+						union sockaddr_union* from,
587
+						union sockaddr_union* to, unsigned short mtu)
588
+{
589
+	struct msghdr snd_msg;
590
+	struct iovec iov[2];
591
+	struct ip_udp_hdr {
592
+		struct ip ip;
593
+		struct udphdr udp;
594
+	} hdr;
595
+	unsigned int totlen;
596
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
597
+	unsigned int ip_frag_size; /* fragment size */
598
+	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
599
+	unsigned int ip_payload;
600
+	unsigned int last_frag_offs;
601
+	void* last_frag_start;
602
+	int frg_no;
603
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
604
+	int ret;
605
+
606
+	totlen = len + sizeof(hdr);
607
+	if (unlikely(totlen) > 65535)
608
+		return -2;
609
+	memset(&snd_msg, 0, sizeof(snd_msg));
610
+	snd_msg.msg_name=&to->sin;
611
+	snd_msg.msg_namelen=sockaddru_len(*to);
612
+	snd_msg.msg_iov=&iov[0];
613
+	/* prepare the udp & ip headers */
614
+	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
615
+	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
616
+				len + sizeof(hdr.udp), IPPROTO_UDP);
617
+	iov[0].iov_base=(char*)&hdr;
618
+	iov[0].iov_len=sizeof(hdr);
619
+	snd_msg.msg_iovlen=2;
620
+	snd_msg.msg_control=0;
621
+	snd_msg.msg_controllen=0;
622
+	snd_msg.msg_flags=0;
623
+	/* this part changes for different fragments */
624
+	/* packets are fragmented if mtu has a valid value (at least an
625
+	   IP header + UDP header fit in it) and if the total length is greater
626
+	   then the mtu */
627
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
628
+	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
629
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
630
+		iov[1].iov_base=buf;
631
+		iov[1].iov_len=len;
632
+		ret=sendmsg(rsock, &snd_msg, 0);
633
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
634
+	} else {
635
+		ip_payload = len + sizeof(hdr.udp);
636
+		/* a fragment offset must be a multiple of 8 => its size must
637
+		   also be a multiple of 8, except for the last fragment */
638
+		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
639
+		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
640
+		frg_no = ip_payload / ip_frag_size +
641
+				 ((ip_payload % ip_frag_size) > last_frag_extra);
642
+		/*ip_last_frag_size = ip_payload % frag_size +
643
+							((ip_payload % frag_size) <= last_frag_extra) *
644
+							ip_frag_size; */
645
+		last_frag_offs = (frg_no - 1) * ip_frag_size;
646
+		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
647
+		   => last_frag_offs >= sizeof(hdr.udp) */
648
+		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
649
+		hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0
650
+											  (if 0 the kernel will fill it) */
651
+		/* send the first fragment */
652
+		iov[1].iov_base=buf;
653
+		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
654
+		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
655
+		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
656
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
657
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
658
+		ret=sendmsg(rsock, &snd_msg, 0);
659
+		if (unlikely(ret < 0))
660
+			goto end;
661
+		/* all the other fragments, include only the ip header */
662
+		iov[0].iov_len = sizeof(hdr.ip);
663
+		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
664
+		/* fragments between the first and the last */
665
+		while(unlikely(iov[1].iov_base < last_frag_start)) {
666
+			iov[1].iov_len = ip_frag_size;
667
+			hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
668
+			/* set MF  */
669
+			hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
670
+									(((char*)iov[1].iov_base - (char*)buf +
671
+										sizeof(hdr.udp)) / 8) | 0x2000 );
672
+			ret=sendmsg(rsock, &snd_msg, 0);
673
+			if (unlikely(ret < 0))
674
+				goto end;
675
+			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
676
+		}
677
+		/* last fragment */
678
+		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
679
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
680
+		/* don't set MF (last fragment) */
681
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
682
+									(((char*)iov[1].iov_base - (char*)buf +
683
+										sizeof(hdr.udp)) / 8) );
684
+		ret=sendmsg(rsock, &snd_msg, 0);
685
+		if (unlikely(ret < 0))
686
+			goto end;
687
+	}
688
+end:
689
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
690
+	return ret;
691
+}
692
+
693
+
694
+
695
+#endif /* USE_RAW_SOCKS */
0 696
new file mode 100644
... ...
@@ -0,0 +1,56 @@
0
+/*
1
+ * $Id$
2
+ *
3
+ * Copyright (C) 2010 iptelorg GmbH
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7