Browse code

raw sockets: freebsd support

Support freebsd and theoretically other BSDs (other BSDs not tested):

- on most BSDs the offset and length of the ip header must be
filled in host byte order (when using raw sockets with
IPHDR_INC).
- no need for user-space fragmentation for raw sockets
- use IP_RECVDSTADDR and IPSENDSRCADDR instead of IP_PKTINFO
(for raw sockets without IPHDR_INC)
- fix ip header length (wrongly computed, on linux it worked
because linux always ignores and overwrites it)

Andrei Pelinescu-Onciul authored on 10/08/2010 15:43:25
Showing 2 changed files
... ...
@@ -27,9 +27,6 @@
27 27
  *  2010-06-15  IP_HDRINCL raw socket support, including on-send
28 28
  *               fragmentation (andrei)
29 29
  */
30
-/*
31
- * FIXME: IP_PKTINFO & IP_HDRINCL - linux specific
32
- */
33 30
 
34 31
 #ifdef USE_RAW_SOCKS
35 32
 
... ...
@@ -47,6 +44,7 @@
47 47
 #include <fcntl.h>
48 48
 #include <sys/socket.h>
49 49
 #include <netinet/in.h>
50
+#include <netinet/in_systm.h>
50 51
 #include <arpa/inet.h>
51 52
 #ifndef __USE_BSD
52 53
 #define __USE_BSD  /* on linux use bsd version of iphdr (more portable) */
... ...
@@ -60,11 +58,38 @@
60 60
 #include "cfg_core.h"
61 61
 
62 62
 
63
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined(__OS_openbsd) \
64
+	|| defined (__OS_darwin)
65
+/** fragmentation is done by the kernel (no need to do it in userspace) */
66
+#define RAW_IPHDR_INC_AUTO_FRAG
67
+#endif /* __OS_* */
68
+
69
+/* macros for converting values in the expected format */
70
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined (__OS_darwin)
71
+/* on freebsd and netbsd the ip offset (along with flags) and the
72
+   ip header length must be filled in _host_ bytes order format.
73
+   The same is true for openbsd < 2.1.
74
+*/
75
+/** convert the ip offset in the format expected by the kernel. */
76
+#define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
77
+/** convert the ip total length in the format expected by the kernel. */
78
+#define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
79
+
80
+#else /* __OS_* */
81
+/* linux, openbsd >= 2.1 a.s.o. */
82
+/** convert the ip offset in the format expected by the kernel. */
83
+#define RAW_IPHDR_IP_OFF(off)  htons((unsigned short)(off))
84
+/** convert the ip total length in the format expected by the kernel. */
85
+#define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
86
+
87
+#endif /* __OS_* */
88
+
89
+
63 90
 /** create and return a raw socket.
64 91
  * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
65 92
  * @param ip - if not null the socket will be bound on this ip.
66 93
  * @param iface - if not null the socket will be bound to this interface
67
- *                (SO_BINDTODEVICE).
94
+ *                (SO_BINDTODEVICE). This is supported only on linux.
68 95
  * @param iphdr_incl - set to 1 if packets send on this socket include
69 96
  *                     a pre-built ip header (some fields, like the checksum
70 97
  *                     will still be filled by the kernel, OTOH packet
... ...
@@ -76,9 +101,11 @@ int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
76 76
 	int sock;
77 77
 	int t;
78 78
 	union sockaddr_union su;
79
+#if defined (SO_BINDTODEVICE)
79 80
 	char short_ifname[sizeof(int)];
80 81
 	int ifname_len;
81 82
 	char* ifname;
83
+#endif /* SO_BINDTODEVICE */
82 84
 
83 85
 	sock = socket(PF_INET, SOCK_RAW, proto);
84 86
 	if (sock==-1)
... ...
@@ -95,19 +122,32 @@ int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
95 95
 		/* IP_PKTINFO makes no sense if the ip header is included */
96 96
 		/* using IP_PKTINFO */
97 97
 		t=1;
98
+#ifdef IP_PKTINFO
98 99
 		if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
99 100
 			ERR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
100 101
 					strerror(errno), errno);
101 102
 			goto error;
102 103
 		}
104
+#elif defined(IP_RECVDSTADDR)
105
+		if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
106
+			ERR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
107
+					strerror(errno), errno);
108
+			goto error;
109
+		}
110
+#else
111
+#error "no method of getting the destination ip address supported"
112
+#endif /* IP_RECVDSTADDR / IP_PKTINFO */
103 113
 	}
114
+#if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
104 115
 	t=IP_PMTUDISC_DONT;
105 116
 	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
106 117
 		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
107 118
 				strerror(errno));
108 119
 		goto error;
109 120
 	}
121
+#endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
110 122
 	if (iface && iface->s){
123
+#if defined (SO_BINDTODEVICE)
111 124
 		/* workaround for linux bug: arg to setsockopt must have at least
112 125
 		 * sizeof(int) size or EINVAL would be returned */
113 126
 		if (iface->len<sizeof(int)){
... ...
@@ -125,6 +165,11 @@ int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
125 125
 							iface->len, ZSW(iface->s), strerror(errno), errno);
126 126
 				goto error;
127 127
 		}
128
+#else /* !SO_BINDTODEVICE */
129
+		/* SO_BINDTODEVICE is linux specific => cannot bind to a device */
130
+		ERR("raw_socket: bind to device supported only on linux\n");
131
+		goto error;
132
+#endif /* SO_BINDTODEVICE */
128 133
 	}
129 134
 	/* FIXME: probe_max_receive_buffer(sock) missing */
130 135
 	if (ip){
... ...
@@ -160,8 +205,8 @@ int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
160 160
 
161 161
 
162 162
 
163
-/** receives an ipv4 packet suing a raw socket.
164
- * An ipv4 packet is received in buf, using IP_PKTINFO.
163
+/** receives an ipv4 packet using a raw socket.
164
+ * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
165 165
  * from and to are filled (only the ip part the ports are 0 since this
166 166
  * function doesn't try to look beyond the IP level).
167 167
  * @param sock - raw socket
... ...
@@ -173,7 +218,7 @@ int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
173 173
  * @param to - result parameter, the IP address part of it will be filled
174 174
  *                with the destination (local) address and the port with 0.
175 175
  * @return packet len or <0 on error: -1 (check errno),
176
- *        -2 no IP_PKTINFO found or AF mismatch
176
+ *        -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
177 177
  */
178 178
 int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
179 179
 					union sockaddr_union* to)
... ...
@@ -181,7 +226,9 @@ int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
181 181
 	struct iovec iov[1];
182 182
 	struct msghdr rcv_msg;
183 183
 	struct cmsghdr* cmsg;
184
+#ifdef IP_PKTINFO
184 185
 	struct in_pktinfo* rcv_pktinfo;
186
+#endif /* IP_PKTINFO */
185 187
 	int n, ret;
186 188
 	char msg_ctrl_buf[1024];
187 189
 
... ...
@@ -203,8 +250,8 @@ retry:
203 203
 		goto end;
204 204
 	}
205 205
 	/* find the pkt info */
206
-	rcv_pktinfo=0;
207 206
 	for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg; cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
207
+#ifdef IP_PKTINFO
208 208
 		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
209 209
 					(cmsg->cmsg_type==IP_PKTINFO))) {
210 210
 			rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
... ...
@@ -216,6 +263,19 @@ retry:
216 216
 			ret=n; /* success */
217 217
 			break;
218 218
 		}
219
+#elif defined (IP_RECVDSTADDR)
220
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
221
+					(cmsg->cmsg_type==IP_RECVDSTADDR))) {
222
+			to->sin.sin_family=AF_INET;
223
+			memcpy(&to->sin.sin_addr, CMSG_DATA(cmsg),
224
+									sizeof(to->sin.sin_addr));
225
+			to->sin.sin_port=0; /* not known */
226
+			ret=n; /* success */
227
+			break;
228
+		}
229
+#else
230
+#error "no method of getting the destination ip address supported"
231
+#endif /* IP_PKTINFO / IP_RECVDSTADDR */
219 232
 	}
220 233
 end:
221 234
 	return ret;
... ...
@@ -321,7 +381,7 @@ error:
321 321
  * @param uh - filled udp header
322 322
  * @param src - source ip address in network byte order.
323 323
  * @param dst - destination ip address in network byte order.
324
- * @param length - payload lenght (not including the udp header),
324
+ * @param length - payload length (not including the udp header),
325 325
  *                 in _host_ order.
326 326
  * @return the partial checksum in host order
327 327
  */
... ...
@@ -408,7 +468,11 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
408 408
 
409 409
 
410 410
 /** fill in an ip header.
411
- * Note: the checksum is _not_ computed
411
+ * Note: the checksum is _not_ computed.
412
+ * WARNING: The ip header length and offset might be filled in
413
+ * _host_ byte order or network byte order (depending on the OS, for example
414
+ *  freebsd needs host byte order for raw sockets with IPHDR_INC, while
415
+ *  linux needs network byte order).
412 416
  * @param iph - ip header that will be filled.
413 417
  * @param from - source ip v4 address (network byte order).
414 418
  * @param to -   destination ip v4 address (network byte order).
... ...
@@ -416,26 +480,30 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
416 416
  * @param proto - protocol.
417 417
  * @return 0 on success, < 0 on error.
418 418
  */
419
-inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from, 
419
+inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
420 420
 				struct in_addr* to, int payload_len, unsigned char proto)
421 421
 {
422 422
 	iph->ip_hl = sizeof(struct ip)/4;
423 423
 	iph->ip_v = 4;
424 424
 	iph->ip_tos = tos;
425
-	iph->ip_len = htons(payload_len);
426
-	iph->ip_id = 0;
425
+	/* on freebsd ip_len _must_ be in _host_ byte order instead
426
+	   of network byte order. On linux the length is ignored (it's filled
427
+	   automatically every time). */
428
+	iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
429
+	iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
427 430
 	iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
428 431
 	iph->ip_ttl = cfg_get(core, core_cfg, udp4_raw_ttl);
429 432
 	iph->ip_p = proto;
430
-	iph->ip_sum = 0;
431 433
 	iph->ip_src = *from;
432 434
 	iph->ip_dst = *to;
435
+	iph->ip_sum = 0;
436
+
433 437
 	return 0;
434 438
 }
435 439
 
436 440
 
437 441
 
438
-/** send an udp packet over a raw socket.
442
+/** send an udp packet over a non-ip_hdrincl raw socket.
439 443
  * @param rsock - raw socket
440 444
  * @param buf - data
441 445
  * @param len - data len
... ...
@@ -451,7 +519,9 @@ int raw_udp4_send(int rsock, char* buf, unsigned int len,
451 451
 {
452 452
 	struct msghdr snd_msg;
453 453
 	struct cmsghdr* cmsg;
454
+#ifdef IP_PKTINFO
454 455
 	struct in_pktinfo* snd_pktinfo;
456
+#endif /* IP_PKTINFO */
455 457
 	struct iovec iov[2];
456 458
 	struct udphdr udp_hdr;
457 459
 	char msg_ctrl_snd_buf[1024];
... ...
@@ -473,11 +543,20 @@ int raw_udp4_send(int rsock, char* buf, unsigned int len,
473 473
 	/* init pktinfo cmsg */
474 474
 	cmsg=CMSG_FIRSTHDR(&snd_msg);
475 475
 	cmsg->cmsg_level=IPPROTO_IP;
476
+#ifdef IP_PKTINFO
476 477
 	cmsg->cmsg_type=IP_PKTINFO;
477 478
 	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
478 479
 	snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
479 480
 	snd_pktinfo->ipi_ifindex=0;
480 481
 	snd_pktinfo->ipi_spec_dst.s_addr=from->sin.sin_addr.s_addr;
482
+#elif defined (IP_SENDSRCADDR)
483
+	cmsg->cmsg_type=IP_SENDSRCADDR;
484
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
485
+	memcpy(CMSG_DATA(cmsg), &from->sin.sin_addr.s_addr,
486
+							sizeof(struct in_addr));
487
+#else
488
+#error "no method of setting the source ip supported"
489
+#endif /* IP_PKTINFO / IP_SENDSRCADDR */
481 490
 	snd_msg.msg_controllen=cmsg->cmsg_len;
482 491
 	snd_msg.msg_flags=0;
483 492
 	ret=sendmsg(rsock, &snd_msg, 0);
... ...
@@ -515,12 +594,14 @@ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
515 515
 		struct udphdr udp;
516 516
 	} hdr;
517 517
 	unsigned int totlen;
518
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
518 519
 	unsigned int ip_frag_size; /* fragment size */
519 520
 	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
520 521
 	unsigned int ip_payload;
521 522
 	unsigned int last_frag_offs;
522 523
 	void* last_frag_start;
523 524
 	int frg_no;
525
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
524 526
 	int ret;
525 527
 
526 528
 	totlen = len + sizeof(hdr);
... ...
@@ -544,10 +625,13 @@ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
544 544
 	/* packets are fragmented if mtu has a valid value (at least an
545 545
 	   IP header + UDP header fit in it) and if the total length is greater
546 546
 	   then the mtu */
547
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
547 548
 	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
549
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
548 550
 		iov[1].iov_base=buf;
549 551
 		iov[1].iov_len=len;
550 552
 		ret=sendmsg(rsock, &snd_msg, 0);
553
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
551 554
 	} else {
552 555
 		ip_payload = len + sizeof(hdr.udp);
553 556
 		/* a fragment offset must be a multiple of 8 => its size must
... ...
@@ -570,8 +654,8 @@ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
570 570
 		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
571 571
 		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
572 572
 		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
573
-		hdr.ip.ip_len = htons(ip_frag_size);
574
-		hdr.ip.ip_off = htons(0x2000); /* set MF */
573
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
574
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
575 575
 		ret=sendmsg(rsock, &snd_msg, 0);
576 576
 		if (unlikely(ret < 0))
577 577
 			goto end;
... ...
@@ -581,11 +665,11 @@ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
581 581
 		/* fragments between the first and the last */
582 582
 		while(unlikely(iov[1].iov_base < last_frag_start)) {
583 583
 			iov[1].iov_len = ip_frag_size;
584
-			hdr.ip.ip_len = htons(iov[1].iov_len);
584
+			hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
585 585
 			/* set MF  */
586
-			hdr.ip.ip_off = htons( (unsigned short)
586
+			hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
587 587
 									(((char*)iov[1].iov_base - (char*)buf +
588
-										sizeof(hdr.udp)) / 8) | 0x2000);
588
+										sizeof(hdr.udp)) / 8) | 0x2000 );
589 589
 			ret=sendmsg(rsock, &snd_msg, 0);
590 590
 			if (unlikely(ret < 0))
591 591
 				goto end;
... ...
@@ -593,16 +677,17 @@ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
593 593
 		}
594 594
 		/* last fragment */
595 595
 		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
596
-		hdr.ip.ip_len = htons(iov[1].iov_len);
596
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
597 597
 		/* don't set MF (last fragment) */
598
-		hdr.ip.ip_off = htons( (unsigned short)
599
-								(((char*)iov[1].iov_base - (char*)buf +
600
-									sizeof(hdr.udp)) / 8) );
598
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
599
+									(((char*)iov[1].iov_base - (char*)buf +
600
+										sizeof(hdr.udp)) / 8) );
601 601
 		ret=sendmsg(rsock, &snd_msg, 0);
602 602
 		if (unlikely(ret < 0))
603 603
 			goto end;
604 604
 	}
605 605
 end:
606
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
606 607
 	return ret;
607 608
 }
608 609
 
... ...
@@ -602,8 +602,8 @@ raw_again:
602 602
 								mtu);
603 603
 		if (unlikely(n==-1)){
604 604
 			su2ip_addr(&ip, &dst->to);
605
-			LOG(L_ERR, "ERROR: raw_udp4_send(sock,%p,%u,...,%s:%d,%d):"
606
-					" %s(%d)\n", buf,len, ip_addr2a(&ip),
605
+			LOG(L_ERR, "ERROR: raw_iphdr_udp4_send(%d,%p,%u,...,%s:%d,%d):"
606
+					" %s(%d)\n", raw_udp4_send_sock, buf,len, ip_addr2a(&ip),
607 607
 					su_getport(&dst->to), mtu, strerror(errno), errno);
608 608
 			if (errno==EINTR) goto raw_again;
609 609
 		}