Browse code

- tcp receiver concerted to the new io_wait.h - epoll: close() not always removing the fd from set bug workarround - sigio_rt: reset O_ASYNC (sigio bug workarround) - more tcp related fixes

Andrei Pelinescu-Onciul authored on 05/07/2005 19:18:01
Showing 4 changed files
... ...
@@ -57,7 +57,7 @@ MAIN_NAME=ser
57 57
 VERSION = 0
58 58
 PATCHLEVEL = 10
59 59
 SUBLEVEL =   99
60
-EXTRAVERSION = -dev10
60
+EXTRAVERSION = -dev11-tcp
61 61
 
62 62
 RELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
63 63
 OS = $(shell uname -s | sed -e s/SunOS/solaris/ | tr "[A-Z]" "[a-z]")
... ...
@@ -297,13 +297,19 @@ inline static int io_watch_add(	io_wait_h* h,
297 297
 	int n;
298 298
 	struct epoll_event ep_event;
299 299
 #endif
300
-#ifdef HAVE_SIGIO_RT
301
-	static char buf[65536];
302
-#endif
303 300
 #ifdef HAVE_DEVPOLL
304 301
 	struct pollfd pfd;
305 302
 #endif
303
+#if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
304
+	int idx;
305
+	int check_io;
306
+	struct pollfd pf;
306 307
 	
308
+	check_io=0; /* set to 1 if we need to check for pre-existiing queued
309
+				   io/data on the fd */
310
+	idx=-1;
311
+#endif
312
+	e=0;
307 313
 	if (fd==-1){
308 314
 		LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
309 315
 		goto error;
... ...
@@ -344,6 +350,10 @@ inline static int io_watch_add(	io_wait_h* h,
344 344
 #ifdef HAVE_SIGIO_RT
345 345
 		case POLL_SIGIO_RT:
346 346
 			fd_array_setup;
347
+			/* re-set O_ASYNC might be needed, if not done from 
348
+			 * io_watch_del (or if somebody wants to add a fd which has
349
+			 * already O_ASYNC/F_SETSIG set on a dupplicate)
350
+			 */
347 351
 			/* set async & signal */
348 352
 			if (fcntl(fd, F_SETOWN, my_pid())==-1){
349 353
 				LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
... ...
@@ -362,9 +372,13 @@ inline static int io_watch_add(	io_wait_h* h,
362 362
 					fd,  h->signo, my_pid());
363 363
 #endif
364 364
 			/* empty socket receive buffer, if buffer is already full
365
-			 * (e.g. early media), no more space to put packets
366
-			 * => no more signals are ever generated -- andrei */
367
-			while(recv(fd, buf, sizeof(buf), 0)>=0);
365
+			 * no more space to put packets
366
+			 * => no more signals are ever generated
367
+			 * also when moving fds, the freshly moved fd might have
368
+			 *  already some bytes queued, we want to get them now
369
+			 *  and not later -- andrei */
370
+			idx=h->fd_no;
371
+			check_io=1;
368 372
 			break;
369 373
 #endif
370 374
 #ifdef HAVE_EPOLL
... ...
@@ -392,6 +406,8 @@ again2:
392 392
 					strerror(errno), errno);
393 393
 				goto error;
394 394
 			}
395
+			idx=-1;
396
+			check_io=1;
395 397
 			break;
396 398
 #endif
397 399
 #ifdef HAVE_KQUEUE
... ...
@@ -424,8 +440,23 @@ again_devpoll:
424 424
 	
425 425
 	h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
426 426
 				   has only informative value */
427
+#if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
428
+	if (check_io){
429
+		/* handle possible pre-existing events */
430
+		pf.fd=fd;
431
+		pf.events=POLLIN;
432
+check_io_again:
433
+		while( ((n=poll(&pf, 1, 0))>0) && (handle_io(e, idx)>0));
434
+		if (n==-1){
435
+			if (errno==EINTR) goto check_io_again;
436
+			LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
437
+						strerror(errno), errno);
438
+		}
439
+	}
440
+#endif
427 441
 	return 0;
428 442
 error:
443
+	if (e) unhash_fd_map(e);
429 444
 	return -1;
430 445
 #undef fd_array_setup
431 446
 #undef set_fd_flags 
... ...
@@ -469,14 +500,17 @@ inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
469 469
 #ifdef HAVE_DEVPOLL
470 470
 	struct pollfd pfd;
471 471
 #endif
472
+#ifdef HAVE_SIGIO_RT
473
+	int fd_flags;
474
+#endif
472 475
 	
473 476
 	if ((fd<0) || (fd>=h->max_fd_no)){
474 477
 		LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
475 478
 						fd, h->fd_no);
476 479
 		goto error;
477 480
 	}
478
-	DBG("DBG: io_watch_del (%p, %d, %d) fd_no=%d called\n",
479
-			h, fd, idx, h->fd_no);
481
+	DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
482
+			h, fd, idx, flags, h->fd_no);
480 483
 	e=get_fd_map(h, fd);
481 484
 	/* more sanity checks */
482 485
 	if (e==0){
... ...
@@ -509,25 +543,47 @@ inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
509 509
 #ifdef HAVE_SIGIO_RT
510 510
 		case POLL_SIGIO_RT:
511 511
 			fix_fd_array;
512
-			/* FIXME: re-set ASYNC? (not needed if the fd is/will be closed
513
-			 *        but might cause problems if the fd is "moved")
514
-			 *        update: probably not needed, the fd_map type!=0
515
-			 *        check should catch old queued signals or in-transit fd
516
-			 *        (so making another syscall to reset ASYNC is not 
517
-			 *         necessary)*/
512
+			/* the O_ASYNC flag must be reset all the time, the fd
513
+			 *  can be changed only if  O_ASYNC is reset (if not and
514
+			 *  the fd is a duplicate, you will get signals from the dup. fd
515
+			 *  and not from the original, even if the dup. fd wa closed
516
+			 *  and the signals re-set on the original) -- andrei
517
+			 */
518
+			/*if (!(flags & IO_FD_CLOSING)){*/
519
+				/* reset ASYNC */
520
+				fd_flags=fcntl(fd, F_GETFL); 
521
+				if (fd_flags==-1){ 
522
+					LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:" 
523
+							" %s [%d]\n", strerror(errno), errno); 
524
+					goto error; 
525
+				} 
526
+				if (fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1){ 
527
+					LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL" 
528
+								" failed: %s [%d]\n", strerror(errno), errno); 
529
+					goto error; 
530
+				} 
518 531
 			break;
519 532
 #endif
520 533
 #ifdef HAVE_EPOLL
521 534
 		case POLL_EPOLL_LT:
522 535
 		case POLL_EPOLL_ET:
536
+			/* epoll doesn't seem to automatically remove sockets,
537
+			 * if the socket is a dupplicate/moved and the original
538
+			 * is still open. The fd is removed from the epoll set
539
+			 * only when the original (and all the  copies?) is/are 
540
+			 * closed. This is probably a bug in epoll. --andrei */
541
+#ifdef EPOLL_NO_CLOSE_BUG
523 542
 			if (!(flags & IO_FD_CLOSING)){
543
+#endif
524 544
 				n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
525 545
 				if (n==-1){
526 546
 					LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
527 547
 							"list failed: %s [%d]\n", strerror(errno), errno);
528 548
 					goto error;
529 549
 				}
550
+#ifdef EPOLL_NO_CLOSE_BUG
530 551
 			}
552
+#endif
531 553
 			break;
532 554
 #endif
533 555
 #ifdef HAVE_KQUEUE
... ...
@@ -659,8 +715,10 @@ again:
659 659
 		if (n==-1){
660 660
 			if (errno==EINTR) goto again; /* signal, ignore it */
661 661
 			else{
662
-				LOG(L_ERR, "ERROR:io_wait_loop_epoll: epoll_wait:"
663
-						" %s [%d]\n", strerror(errno), errno);
662
+				LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
663
+						"epoll_wait(%d, %p, %d, %d): %s [%d]\n", 
664
+						h->epfd, h->ep_array, h->fd_no, t*1000,
665
+						strerror(errno), errno);
664 666
 				goto error;
665 667
 			}
666 668
 		}
... ...
@@ -1174,10 +1174,11 @@ inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1174 1174
 				tcpconn_destroy(tcpconn);
1175 1175
 				break;
1176 1176
 			}
1177
-			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1178 1177
 			/* update the timeout*/
1179 1178
 			tcpconn->timeout=get_ticks()+TCP_CON_TIMEOUT;
1180 1179
 			tcpconn_put(tcpconn);
1180
+			/* must be after the de-ref*/
1181
+			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1181 1182
 			DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1182 1183
 											tcpconn, tcpconn->refcnt);
1183 1184
 			break;
... ...
@@ -1309,9 +1310,9 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
1309 1309
 			tcpconn->s=fd;
1310 1310
 			/* add tcpconn to the list*/
1311 1311
 			tcpconn_add(tcpconn);
1312
-			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1313 1312
 			/* update the timeout*/
1314 1313
 			tcpconn->timeout=get_ticks()+TCP_CON_TIMEOUT;
1314
+			io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1315 1315
 			break;
1316 1316
 		default:
1317 1317
 			LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
... ...
@@ -33,6 +33,7 @@
33 33
  * 2003-07-01  tcp_read & friends take no a single tcp_connection 
34 34
  *              parameter & they set c->state to S_CONN_EOF on eof (andrei)
35 35
  * 2003-07-04  fixed tcp EOF handling (possible infinite loop) (andrei)
36
+ * 2005-07-05  migrated to the new io_wait code (andrei)
36 37
  */
37 38
 
38 39
 #ifdef USE_TCP
... ...
@@ -62,6 +63,17 @@
62 62
 #include "tls/tls_server.h"
63 63
 #endif
64 64
 
65
+#define HANDLE_IO_INLINE
66
+#include "io_wait.h"
67
+#include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
68
+
69
+/* types used in io_wait* */
70
+enum fd_types { F_NONE, F_TCPMAIN, F_TCPCONN };
71
+
72
+/* list of tcp connections handled by this process */
73
+static struct tcp_connection* tcp_conn_lst=0;
74
+static io_wait_h io_w; /* io_wait handler*/
75
+static int tcpmain_sock=-1;
65 76
 
66 77
 
67 78
 /* reads next available bytes
... ...
@@ -385,7 +397,7 @@ skip:
385 385
 
386 386
 
387 387
 
388
-int tcp_read_req(struct tcp_connection* con)
388
+int tcp_read_req(struct tcp_connection* con, int* bytes_read)
389 389
 {
390 390
 	int bytes;
391 391
 	int resp;
... ...
@@ -394,6 +406,7 @@ int tcp_read_req(struct tcp_connection* con)
394 394
 	int s;
395 395
 	char c;
396 396
 		
397
+		bytes=-1;
397 398
 		resp=CONN_RELEASE;
398 399
 		s=con->fd;
399 400
 		req=&con->req;
... ...
@@ -520,7 +533,7 @@ again:
520 520
 		
521 521
 		
522 522
 	end_req:
523
-		
523
+		if (bytes_read) *bytes_read=bytes;
524 524
 		return resp;
525 525
 }
526 526
 
... ...
@@ -543,7 +556,8 @@ void release_tcpconn(struct tcp_connection* c, long state, int unix_sock)
543 543
 }
544 544
 
545 545
 
546
-
546
+#ifdef DEBUG_TCP_RECEIVE
547
+/* old code known to work, kept arround for debuging */
547 548
 void tcp_receive_loop(int unix_sock)
548 549
 {
549 550
 	struct tcp_connection* list; /* list with connections in use */
... ...
@@ -680,15 +694,229 @@ skip:
680 680
 		
681 681
 	}
682 682
 }
683
+#else /* DEBUG_TCP_RECEIVE */
684
+
685
+
686
+
687
+/* handle io routine, based on the fd_map type
688
+ * (it will be called from io_wait_loop* )
689
+ * params:  fm  - pointer to a fd hash entry
690
+ *          idx - index in the fd_array (or -1 if not known)
691
+ * return: -1 on error, or when we are not interested any more on reads
692
+ *            from this fd (e.g.: we are closing it )
693
+ *          0 on EAGAIN or when by some other way it is known that no more 
694
+ *            io events are queued on the fd (the receive buffer is empty).
695
+ *            Usefull to detect when there are no more io events queued for
696
+ *            sigio_rt, epoll_et, kqueue.
697
+ *         >0 on successfull read from the fd (when there might be more io
698
+ *            queued -- the receive buffer might still be non-empty)
699
+ */
700
+inline static int handle_io(struct fd_map* fm, int idx)
701
+{	
702
+	int ret;
703
+	int n;
704
+	struct tcp_connection* con;
705
+	int s;
706
+	long resp;
707
+	
708
+	switch(fm->type){
709
+		case F_TCPMAIN:
710
+again:
711
+			ret=n=receive_fd(fm->fd, &con, sizeof(con), &s, 0);
712
+			DBG("received n=%d con=%p, fd=%d\n", n, con, s);
713
+			if (n<0){
714
+				if (errno == EWOULDBLOCK || errno == EAGAIN){
715
+					ret=0;
716
+					break;
717
+				}else if (errno == EINTR) goto again;
718
+				else{
719
+					LOG(L_CRIT,"BUG: tcp_receive: handle_io: read_fd: %s \n",
720
+							strerror(errno));
721
+						abort(); /* big error*/
722
+				}
723
+			}
724
+			if (n==0){
725
+				LOG(L_ERR, "WARNING: tcp_receive: handle_io: 0 bytes read\n");
726
+				break;
727
+			}
728
+			if (con==0){
729
+					LOG(L_CRIT, "BUG: tcp_receive: handle_io null pointer\n");
730
+					break;
731
+			}
732
+			con->fd=s;
733
+			if (s==-1) {
734
+				LOG(L_ERR, "ERROR: tcp_receive: handle_io: read_fd:"
735
+									"no fd read\n");
736
+				goto con_error;
737
+			}
738
+			if (con==tcp_conn_lst){
739
+				LOG(L_CRIT, "BUG: tcp_receive: handle_io: duplicate"
740
+							" connection received: %p, id %d, fd %d, refcnt %d"
741
+							" state %d (n=%d)\n", con, con->id, con->fd,
742
+							con->refcnt, con->state, n);
743
+				release_tcpconn(con, CONN_ERROR, tcpmain_sock);
744
+				break; /* try to recover */
745
+			}
746
+			/* must be before io_watch_add, io_watch_add might catch some
747
+			 * already existing events => might call handle_io and
748
+			 * handle_io might decide to del. the new connection =>
749
+			 * must be in the list */
750
+			tcpconn_listadd(tcp_conn_lst, con, c_next, c_prev);
751
+			con->timeout=get_ticks()+TCP_CHILD_TIMEOUT;
752
+			if (io_watch_add(&io_w, s, F_TCPCONN, con)<0){
753
+				LOG(L_CRIT, "ERROR: tcp_receive: handle_io: failed to add"
754
+						" new socket to the fd list\n");
755
+				tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
756
+				goto con_error;
757
+			}
758
+			break;
759
+		case F_TCPCONN:
760
+			con=(struct tcp_connection*)fm->data;
761
+			resp=tcp_read_req(con, &ret);
762
+			if (resp<0){
763
+				ret=-1; /* some error occured */
764
+				io_watch_del(&io_w, con->fd, idx, IO_FD_CLOSING);
765
+				tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
766
+				con->state=S_CONN_BAD;
767
+				release_tcpconn(con, resp, tcpmain_sock);
768
+			}else{
769
+				/* update timeout */
770
+				con->timeout=get_ticks()+TCP_CHILD_TIMEOUT;
771
+			}
772
+			break;
773
+		case F_NONE:
774
+			LOG(L_CRIT, "BUG: handle_io: empty fd map %p (%d): "
775
+						"{%d, %d, %p}\n", fm, (int)(fm-io_w.fd_hash),
776
+						fm->fd, fm->type, fm->data);
777
+			goto error;
778
+		default:
779
+			LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
780
+			goto error;
781
+	}
782
+	
783
+	return ret;
784
+con_error:
785
+	con->state=S_CONN_BAD;
786
+	release_tcpconn(con, CONN_ERROR, fm->fd);
787
+	return ret;
788
+error:
789
+	return -1;
790
+}
683 791
 
684 792
 
685
-#if 0
686
-int main(int argv, char** argc )
793
+
794
+/* releases expired connections and cleans up bad ones (state<0) */
795
+static inline void tcp_receive_timeout()
687 796
 {
688
-	printf("starting tests\n");
689
-	tcp_receive_loop();
797
+	struct tcp_connection* con;
798
+	struct tcp_connection* next;
799
+	int ticks;
800
+	
801
+	ticks=get_ticks();
802
+	for (con=tcp_conn_lst; con; con=next){
803
+		next=con->c_next; /* safe for removing */
804
+		if (con->state<0){   /* kill bad connections */ 
805
+			/* S_CONN_BAD or S_CONN_ERROR, remove it */
806
+			/* fd will be closed in release_tcpconn */
807
+			io_watch_del(&io_w, con->fd, -1, IO_FD_CLOSING);
808
+			tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
809
+			con->state=S_CONN_BAD;
810
+			release_tcpconn(con, CONN_ERROR, tcpmain_sock);
811
+			continue;
812
+		}
813
+		if (con->timeout<=ticks){
814
+			/* expired, return to "tcp main" */
815
+			DBG("tcp_receive_loop: %p expired (%d, %d)\n",
816
+					con, con->timeout, ticks);
817
+			/* fd will be closed in release_tcpconn */
818
+			io_watch_del(&io_w, con->fd, -1, IO_FD_CLOSING);
819
+			tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
820
+			release_tcpconn(con, CONN_RELEASE, tcpmain_sock);
821
+		}
822
+	}
690 823
 }
691 824
 
692
-#endif
693 825
 
826
+
827
+void tcp_receive_loop(int unix_sock)
828
+{
829
+	
830
+	/* init */
831
+	tcpmain_sock=unix_sock; /* init com. socket */
832
+	if (init_io_wait(&io_w, tcp_max_fd_no, tcp_poll_method)<0)
833
+		goto error;
834
+	/* add the unix socket */
835
+	if (io_watch_add(&io_w, tcpmain_sock, F_TCPMAIN, 0)<0){
836
+		LOG(L_CRIT, "ERROR: tcp_receive_loop: init: failed to add socket "
837
+							" to the fd list\n");
838
+		goto error;
839
+	}
840
+	/* main loop */
841
+	switch(io_w.poll_method){
842
+		case POLL_POLL:
843
+				while(1){
844
+					io_wait_loop_poll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
845
+					tcp_receive_timeout();
846
+				}
847
+				break;
848
+#ifdef HAVE_SELECT
849
+		case POLL_SELECT:
850
+			while(1){
851
+				io_wait_loop_select(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
852
+				tcp_receive_timeout();
853
+			}
854
+			break;
855
+#endif
856
+#ifdef HAVE_SIGIO_RT
857
+		case POLL_SIGIO_RT:
858
+			while(1){
859
+				io_wait_loop_sigio_rt(&io_w, TCP_CHILD_SELECT_TIMEOUT);
860
+				tcp_receive_timeout();
861
+			}
862
+			break;
863
+#endif
864
+#ifdef HAVE_EPOLL
865
+		case POLL_EPOLL_LT:
866
+			while(1){
867
+				io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
868
+				tcp_receive_timeout();
869
+			}
870
+			break;
871
+		case POLL_EPOLL_ET:
872
+			while(1){
873
+				io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 1);
874
+				tcp_receive_timeout();
875
+			}
876
+			break;
694 877
 #endif
878
+#ifdef HAVE_KQUEUE
879
+		case POLL_KQUEUE:
880
+			while(1){
881
+				io_wait_loop_kqueue(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
882
+				tcp_receive_timeout();
883
+			}
884
+			break;
885
+#endif
886
+#ifdef HAVE_DEVPOLL
887
+		case POLL_DEVPOLL:
888
+			while(1){
889
+				io_wait_loop_devpoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
890
+				tcp_receive_timeout();
891
+			}
892
+			break;
893
+#endif
894
+		default:
895
+			LOG(L_CRIT, "BUG: tcp_receive_loop: no support for poll method "
896
+					" %s (%d)\n", 
897
+					poll_method_name(io_w.poll_method), io_w.poll_method);
898
+			goto error;
899
+	}
900
+error:
901
+	destroy_io_wait(&io_w);
902
+	LOG(L_CRIT, "ERROR: tcp_receive_loop: exiting...");
903
+	exit(-1);
904
+}
905
+
906
+#endif /* DEBUG_TCP_RECEIVE */
907
+
908
+#endif /* USE_TCP */