Browse code

raw sockets: build ip header & fragmentation support

Support for sending on raw sockets with IP_HDRINCL or IPPROTO_RAW,
that require building the IP header and fragmentation.

Andrei Pelinescu-Onciul authored on 15/06/2010 14:27:55
Showing 2 changed files
... ...
@@ -24,11 +24,12 @@
24 24
  * History:
25 25
  * --------
26 26
  *  2010-06-07  initial version (from older code) andrei
27
+ *  2010-06-15  IP_HDRINCL raw socket support, including on-send
28
+ *               fragmentation (andrei)
27 29
  */
28 30
 /*
29 31
  * FIXME: IP_PKTINFO & IP_HDRINCL - linux specific
30 32
  * FIXME: linux specific iphdr and udphdr
31
- * FIXME: send support for IP_HDRINCL
32 33
  */
33 34
 
34 35
 #ifdef USE_RAW_SOCKS
... ...
@@ -37,7 +38,8 @@
37 37
 #include "ip_addr.h"
38 38
 #include "dprint.h"
39 39
 #include "str.h"
40
-#include "ut.h"
40
+#include "rand/fastrand.h"
41
+#include "globals.h"
41 42
 
42 43
 #include <errno.h>
43 44
 #include <string.h>
... ...
@@ -99,7 +101,7 @@ int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
99 99
 	}
100 100
 	t=IP_PMTUDISC_DONT;
101 101
 	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
102
-		LOG(L_ERR, "raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
102
+		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
103 103
 				strerror(errno));
104 104
 		goto error;
105 105
 	}
... ...
@@ -275,7 +277,7 @@ int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
275 275
 			n=-3;
276 276
 			goto error;
277 277
 		}else{
278
-			LOG(L_ERR, "udp length too small: %d/%d\n",
278
+			ERR("udp length too small: %d/%d\n",
279 279
 					(int)udp_len, (int)(end-udph_start));
280 280
 			n=-3;
281 281
 			goto error;
... ...
@@ -290,7 +292,7 @@ int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
290 290
 	dst_ip.u.addr32[0]=iph.daddr;
291 291
 	/* fill dst_port */
292 292
 	dst_port=ntohs(udph.dest);
293
-	ip_addr2su(to, &dst_ip, port);
293
+	ip_addr2su(to, &dst_ip, dst_port);
294 294
 	/* fill src_port */
295 295
 	src_port=ntohs(udph.source);
296 296
 	su_setport(from, src_port);
... ...
@@ -403,6 +405,34 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
403 403
 
404 404
 
405 405
 
406
+/** fill in an ip header.
407
+ * Note: the checksum is _not_ computed
408
+ * @param iph - ip header that will be filled.
409
+ * @param from - source ip v4 address (network byte order).
410
+ * @param to -   destination ip v4 address (network byte order).
411
+ * @param payload len - payload length (not including the ip header).
412
+ * @param proto - protocol.
413
+ * @return 0 on success, < 0 on error.
414
+ */
415
+inline static int mk_ip_hdr(struct iphdr* iph, struct in_addr* from, 
416
+				struct in_addr* to, int payload_len, unsigned char proto)
417
+{
418
+	iph->ihl = sizeof(struct iphdr)/4;
419
+	iph->version = 4;
420
+	iph->tos = tos;
421
+	iph->tot_len = htons(payload_len);
422
+	iph->id = 0;
423
+	iph->frag_off = 0; /* first 3 bits = flags = 0, last 13 bits = offset */
424
+	iph->ttl = 63; /* FIXME: use some configured value */
425
+	iph->protocol = proto;
426
+	iph->check = 0;
427
+	iph->saddr = from->s_addr;
428
+	iph->daddr = to->s_addr;
429
+	return 0;
430
+}
431
+
432
+
433
+
406 434
 /** send an udp packet over a raw socket.
407 435
  * @param rsock - raw socket
408 436
  * @param buf - data
... ...
@@ -413,7 +443,8 @@ inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
413 413
  * @return  <0 on error (errno set too), number of bytes sent on success
414 414
  *          (including the udp header => on success len + udpheader size).
415 415
  */
416
-int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
416
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
417
+					union sockaddr_union* from,
417 418
 					union sockaddr_union* to)
418 419
 {
419 420
 	struct msghdr snd_msg;
... ...
@@ -453,4 +484,126 @@ int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
453 453
 
454 454
 
455 455
 
456
+/** send an udp packet over an IP_HDRINCL raw socket.
457
+ * If needed, send several fragments.
458
+ * @param rsock - raw socket
459
+ * @param buf - data
460
+ * @param len - data len
461
+ * @param from - source address:port (_must_ be non-null, but the ip address
462
+ *                can be 0, in which case it will be filled by the kernel).
463
+ * @param to - destination address:port
464
+ * @param mtu - maximum datagram size (including the ip header, excluding
465
+ *              link layer headers). Minimum allowed size is 28
466
+ *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
467
+ *               be ignored (the packet will be sent un-fragmented).
468
+ *              0 can be used to disable fragmentation.
469
+ * @return  <0 on error (-2: datagram too big, -1: check errno),
470
+ *          number of bytes sent on success
471
+ *          (including the ip & udp headers =>
472
+ *               on success len + udpheader + ipheader size).
473
+ */
474
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
475
+						union sockaddr_union* from,
476
+						union sockaddr_union* to, unsigned short mtu)
477
+{
478
+	struct msghdr snd_msg;
479
+	struct iovec iov[2];
480
+	struct ip_udp_hdr {
481
+		struct iphdr ip;
482
+		struct udphdr udp;
483
+	} hdr;
484
+	unsigned int totlen;
485
+	unsigned int ip_frag_size; /* fragment size */
486
+	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
487
+	unsigned int ip_payload;
488
+	unsigned int last_frag_offs;
489
+	void* last_frag_start;
490
+	int frg_no;
491
+	int ret;
492
+
493
+	totlen = len + sizeof(hdr);
494
+	if (unlikely(totlen) > 65535)
495
+		return -2;
496
+	memset(&snd_msg, 0, sizeof(snd_msg));
497
+	snd_msg.msg_name=&to->sin;
498
+	snd_msg.msg_namelen=sockaddru_len(*to);
499
+	snd_msg.msg_iov=&iov[0];
500
+	/* prepare the udp & ip headers */
501
+	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
502
+	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
503
+				len + sizeof(hdr.udp), IPPROTO_UDP);
504
+	iov[0].iov_base=(char*)&hdr;
505
+	iov[0].iov_len=sizeof(hdr);
506
+	snd_msg.msg_iovlen=2;
507
+	snd_msg.msg_control=0;
508
+	snd_msg.msg_controllen=0;
509
+	snd_msg.msg_flags=0;
510
+	/* this part changes for different fragments */
511
+	/* packets are fragmented if mtu has a valid value (at least an
512
+	   IP header + UDP header fit in it) and if the total length is greater
513
+	   then the mtu */
514
+	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
515
+		iov[1].iov_base=buf;
516
+		iov[1].iov_len=len;
517
+		ret=sendmsg(rsock, &snd_msg, 0);
518
+	} else {
519
+		ip_payload = len + sizeof(hdr.udp);
520
+		/* a fragment offset must be a multiple of 8 => its size must
521
+		   also be a multiple of 8, except for the last fragment */
522
+		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
523
+		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
524
+		frg_no = ip_payload / ip_frag_size +
525
+				 ((ip_payload % ip_frag_size) > last_frag_extra);
526
+		/*ip_last_frag_size = ip_payload % frag_size +
527
+							((ip_payload % frag_size) <= last_frag_extra) *
528
+							ip_frag_size; */
529
+		last_frag_offs = (frg_no - 1) * ip_frag_size;
530
+		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
531
+		   => last_frag_offs >= sizeof(hdr.udp) */
532
+		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
533
+		hdr.ip.id = fastrand_max(65534) + 1; /* random id, should be != 0
534
+											  (if 0 the kernel will fill it) */
535
+		/* send the first fragment */
536
+		iov[1].iov_base=buf;
537
+		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
538
+		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
539
+		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
540
+		hdr.ip.tot_len = htons(ip_frag_size);
541
+		hdr.ip.frag_off = htons(0x2000); /* set MF */
542
+		ret=sendmsg(rsock, &snd_msg, 0);
543
+		if (unlikely(ret < 0))
544
+			goto end;
545
+		/* all the other fragments, include only the ip header */
546
+		iov[0].iov_len = sizeof(hdr.ip);
547
+		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
548
+		/* fragments between the first and the last */
549
+		while(unlikely(iov[1].iov_base < last_frag_start)) {
550
+			iov[1].iov_len = ip_frag_size;
551
+			hdr.ip.tot_len = htons(iov[1].iov_len);
552
+			/* set MF  */
553
+			hdr.ip.frag_off = htons( (unsigned short)
554
+									(((char*)iov[1].iov_base - (char*)buf +
555
+										sizeof(hdr.udp)) / 8) | 0x2000);
556
+			ret=sendmsg(rsock, &snd_msg, 0);
557
+			if (unlikely(ret < 0))
558
+				goto end;
559
+			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
560
+		}
561
+		/* last fragment */
562
+		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
563
+		hdr.ip.tot_len = htons(iov[1].iov_len);
564
+		/* don't set MF (last fragment) */
565
+		hdr.ip.frag_off = htons( (unsigned short)
566
+								(((char*)iov[1].iov_base - (char*)buf +
567
+									sizeof(hdr.udp)) / 8) );
568
+		ret=sendmsg(rsock, &snd_msg, 0);
569
+		if (unlikely(ret < 0))
570
+			goto end;
571
+	}
572
+end:
573
+	return ret;
574
+}
575
+
576
+
577
+
456 578
 #endif /* USE_RAW_SOCKS */
... ...
@@ -46,7 +46,11 @@ int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
46 46
 					union sockaddr_union* to);
47 47
 int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
48 48
 					union sockaddr_union* to, struct raw_filter* rf);
49
-int raw_udp4_send(int rsock, char* buf, int len, union sockaddr_union* from,
49
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
50
+					union sockaddr_union* from,
50 51
 					union sockaddr_union* to);
52
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
53
+						union sockaddr_union* from,
54
+						union sockaddr_union* to, unsigned short mtu);
51 55
 
52 56
 #endif /* _raw_sock_h */