Browse code

- avoid extra read syscalls (detect short reads) in poll_et or sigio_rt case (should improve performace) - if POLLRDHUP or EPOLLRDHUP are supported (linux >= 2.6.17) use them and avoid an extra syscall on EOF - on write error try to see if there's still some data in the socket read buffer and try to process it first (if there's no more data do a quick connection destroy)

Andrei Pelinescu-Onciul authored on 05/02/2008 21:47:29
Showing 3 changed files
... ...
@@ -75,6 +75,10 @@
75 75
 #define F_CONN_FD_CLOSED   32 /* fd was already closed */
76 76
 #define F_CONN_PENDING     64 /* pending connect  (fd not known yet in main) */
77 77
 #define F_CONN_MAIN_TIMER 128 /* timer active in the tcp_main process */
78
+#define F_CONN_EOF_SEEN   256 /* FIN or RST have been received */
79
+#define F_CONN_FORCE_EOF  512 /* act as if an EOF was received */
80
+#define F_CONN_OOB_DATA  1024 /* out of band data on the connection */
81
+#define F_CONN_WR_ERROR  2048 /* write error on the fd */
78 82
 
79 83
 
80 84
 enum tcp_req_errors {	TCP_REQ_INIT, TCP_REQ_OK, TCP_READ_ERROR,
... ...
@@ -93,6 +93,10 @@
93 93
  *                linked into the hash tables (was 0) (andrei)
94 94
  *  2007-12-21  support for pending connects (connections are added to the
95 95
  *               hash immediately and writes on them are buffered) (andrei)
96
+ *  2008-02-05  handle POLLRDHUP (if supported), POLLERR and
97
+ *               POLLHUP (andrei)
98
+ *              on write error check if there's still data in the socket 
99
+ *               read buffer and process it first (andrei)
96 100
  */
97 101
 
98 102
 
... ...
@@ -103,10 +107,15 @@
103 103
 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
104 104
 #endif
105 105
 
106
+#define HANDLE_IO_INLINE
107
+#include "io_wait.h" /* include first to make sure the needed features are
108
+						turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
109
+
106 110
 #include <sys/time.h>
107 111
 #include <sys/types.h>
108 112
 #include <sys/select.h>
109 113
 #include <sys/socket.h>
114
+#include <sys/ioctl.h>  /* ioctl() used on write error */
110 115
 #include <netinet/in.h>
111 116
 #include <netinet/in_systm.h>
112 117
 #include <netinet/ip.h>
... ...
@@ -157,8 +166,6 @@
157 157
 #define local_malloc pkg_malloc
158 158
 #define local_free   pkg_free
159 159
 
160
-#define HANDLE_IO_INLINE
161
-#include "io_wait.h"
162 160
 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
163 161
 
164 162
 
... ...
@@ -2553,7 +2560,7 @@ inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
2553 2553
 			local_timer_add(&tcp_main_ltimer, &tcpconn->timer, crt_timeout, t);
2554 2554
 			/* must be after the de-ref*/
2555 2555
 			tcpconn->flags|=F_CONN_MAIN_TIMER;
2556
-			tcpconn->flags&=~(F_CONN_REMOVED|F_CONN_READER);
2556
+			tcpconn->flags&=~(F_CONN_REMOVED|F_CONN_READER|F_CONN_OOB_DATA);
2557 2557
 #ifdef TCP_BUF_WRITE
2558 2558
 			if (unlikely(tcpconn->flags & F_CONN_WRITE_W))
2559 2559
 				n=io_watch_chg(&io_h, tcpconn->s, POLLIN| POLLOUT, -1);
... ...
@@ -3080,6 +3087,7 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
3080 3080
 {
3081 3081
 #ifdef TCP_BUF_WRITE
3082 3082
 	int empty_q;
3083
+	int bytes;
3083 3084
 #endif /* TCP_BUF_WRITE */
3084 3085
 	/*  is refcnt!=0 really necessary? 
3085 3086
 	 *  No, in fact it's a bug: I can have the following situation: a send only
... ...
@@ -3100,13 +3108,36 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
3100 3100
 #endif
3101 3101
 	/* pass it to child, so remove it from the io watch list  and the local
3102 3102
 	 *  timer */
3103
-	DBG("handle_tcpconn_ev: ev (%0x) on %p %d\n", ev, tcpconn, tcpconn->s);
3104 3103
 #ifdef TCP_BUF_WRITE
3105
-	if (unlikely((ev & POLLOUT) && (tcpconn->flags & F_CONN_WRITE_W))){
3106
-		if (unlikely(wbufq_run(tcpconn->s, tcpconn, &empty_q)<0)){
3107
-			io_watch_del(&io_h, tcpconn->s, fd_i, 0);
3108
-			tcpconn->flags|=F_CONN_REMOVED;
3104
+	if (unlikely((ev & (POLLOUT|POLLERR|POLLHUP)) &&
3105
+					(tcpconn->flags & F_CONN_WRITE_W))){
3106
+		if (unlikely((ev & (POLLERR|POLLHUP)) || 
3107
+					(wbufq_run(tcpconn->s, tcpconn, &empty_q)<0))){
3108
+			if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
3109
+				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(1) failed:"
3110
+							" for %p, fd %d\n", tcpconn, tcpconn->s);
3111
+			}
3112
+			if (!(tcpconn->flags & F_CONN_REMOVED) && (ev & POLLIN)){
3113
+				/* connection is watched for read and there is a read event
3114
+				 * (unfortunately if we have POLLIN here we don't know if 
3115
+				 * there's really any data in the read buffer or the POLLIN
3116
+				 * was generated by the error or EOF => to avoid loosing
3117
+				 *  data it's safer to either directly check the read buffer 
3118
+				 *  or *  try a read)*/
3119
+				/* in most cases the read buffer will be empty, so in general
3120
+				 * is cheaper to check it here and then send the 
3121
+				 * conn.  to a a child only if needed (another syscall + at 
3122
+				 * least 2 * syscalls in the reader + ...) */
3123
+				if ((ioctl(tcpconn->s, FIONREAD, &bytes)>=0) && (bytes>0)){
3124
+					tcpconn->flags&=~F_CONN_WRITE_W;
3125
+					tcpconn->flags|=F_CONN_REMOVED;
3126
+					tcpconn->flags|=F_CONN_FORCE_EOF|F_CONN_WR_ERROR;
3127
+					goto send_to_child;
3128
+				}
3129
+				/* if bytes==0 or ioctl failed, destroy the connection now */
3130
+			}
3109 3131
 			tcpconn->flags&=~F_CONN_WRITE_W;
3132
+			tcpconn->flags|=F_CONN_REMOVED;
3110 3133
 			if (unlikely(!tcpconn_try_unhash(tcpconn))){
3111 3134
 				LOG(L_CRIT, "BUG: tcpconn_ev: unhashed connection %p\n",
3112 3135
 							tcpconn);
... ...
@@ -3116,12 +3147,20 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
3116 3116
 		}
3117 3117
 		if (empty_q){
3118 3118
 			if (tcpconn->flags & F_CONN_REMOVED){
3119
-				if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1))
3119
+				if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
3120
+					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(2)"
3121
+								" failed:" " for %p, fd %d\n",
3122
+								tcpconn, tcpconn->s);
3120 3123
 					goto error;
3124
+				}
3121 3125
 			}else{
3122 3126
 				if (unlikely(io_watch_chg(&io_h, tcpconn->s,
3123
-											POLLIN, fd_i)==-1))
3127
+											POLLIN, fd_i)==-1)){
3128
+					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(1)"
3129
+								" failed:" " for %p, fd %d\n",
3130
+								tcpconn, tcpconn->s);
3124 3131
 					goto error;
3132
+				}
3125 3133
 			}
3126 3134
 			tcpconn->flags&=~F_CONN_WRITE_W;
3127 3135
 		}
... ...
@@ -3133,15 +3172,33 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
3133 3133
 		 * child and stop watching it for input (but continue watching for
3134 3134
 		 *  writes if needed): */
3135 3135
 		if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
3136
-			if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1))
3136
+			if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1)){
3137
+				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(2)"
3138
+							" failed:" " for %p, fd %d\n",
3139
+							tcpconn, tcpconn->s);
3137 3140
 				goto error;
3141
+			}
3138 3142
 		}else
3139 3143
 #else
3140 3144
 	{
3141 3145
 #endif /* TCP_BUF_WRITE */
3142
-			if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1))
3146
+			if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
3147
+				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(3)"
3148
+							" failed:" " for %p, fd %d\n",
3149
+							tcpconn, tcpconn->s);
3143 3150
 				goto error;
3151
+			}
3152
+#ifdef TCP_BUF_WRITE
3153
+send_to_child:
3154
+#endif
3144 3155
 		DBG("tcp: DBG: sendig to child, events %x\n", ev);
3156
+#ifdef POLLRDHUP
3157
+		tcpconn->flags|=((int)!(ev & (POLLRDHUP|POLLHUP|POLLERR)) -1) & 
3158
+							F_CONN_EOF_SEEN;
3159
+#else /* POLLRDHUP */
3160
+		tcpconn->flags|=((int)!(ev & (POLLHUP|POLLERR)) -1) & F_CONN_EOF_SEEN;
3161
+#endif /* POLLRDHUP */
3162
+		tcpconn->flags|= ((int)!(ev & POLLPRI) -1)  & F_CONN_OOB_DATA;
3145 3163
 		tcpconn->flags|=F_CONN_REMOVED|F_CONN_READER;
3146 3164
 		local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
3147 3165
 		tcpconn->flags&=~F_CONN_MAIN_TIMER;
... ...
@@ -3151,7 +3208,11 @@ inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
3151 3151
 			tcpconn->flags&=~F_CONN_READER;
3152 3152
 #ifdef TCP_BUF_WRITE
3153 3153
 			if (tcpconn->flags & F_CONN_WRITE_W){
3154
-				io_watch_del(&io_h, tcpconn->s, fd_i, 0);
3154
+				if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
3155
+					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(4)"
3156
+							" failed:" " for %p, fd %d\n",
3157
+							tcpconn, tcpconn->s);
3158
+				}
3155 3159
 				tcpconn->flags&=~F_CONN_WRITE_W;
3156 3160
 			}
3157 3161
 #endif /* TCP_BUF_WRITE */
... ...
@@ -38,6 +38,8 @@
38 38
  * 2006-10-13  added STUN support - state machine for TCP (vlada)
39 39
  * 2007-02-20  fixed timeout calc. bug (andrei)
40 40
  * 2007-11-26  improved tcp timers: switched to local_timer (andrei)
41
+ * 2008-02-04  optimizations: handle POLLRDHUP (if supported), detect short
42
+ *              reads (sock. buffer empty) (andrei)
41 43
  */
42 44
 
43 45
 #ifdef USE_TCP
... ...
@@ -86,6 +88,10 @@ int is_msg_complete(struct tcp_req* r);
86 86
 
87 87
 #define TCPCONN_TIMEOUT_MIN_RUN  1 /* run the timers each new tick */
88 88
 
89
+#define RD_CONN_SHORT_READ	1
90
+#define RD_CONN_EOF		2
91
+#define RD_CONN_FORCE_EOF	65536
92
+
89 93
 /* types used in io_wait* */
90 94
 enum fd_types { F_NONE, F_TCPMAIN, F_TCPCONN };
91 95
 
... ...
@@ -99,11 +105,19 @@ static ticks_t tcp_reader_prev_ticks;
99 99
 
100 100
 
101 101
 /* reads next available bytes
102
+ *   c- tcp connection used for reading, tcp_read changes also c->state on
103
+ *      EOF and c->req.error on read error
104
+ *   * flags - value/result - used to signal a seen or "forced" EOF on the 
105
+ *     connection (when it is known that no more data will come after the 
106
+ *     current socket buffer is emptied )=> return/signal EOF on the first 
107
+ *     short read (=> don't use it on POLLPRI, as OOB data will cause short
108
+ *      reads even if there are still remaining bytes in the socket buffer)
102 109
  * return number of bytes read, 0 on EOF or -1 on error,
103
- * on EOF it also sets c->state to S_CONN_EOF
110
+ * on EOF it also sets c->state to S_CONN_EOF.
104 111
  * (to distinguish from reads that would block which could return 0)
112
+ * RD_CONN_SHORT_READ is also set in *flags for short reads.
105 113
  * sets also r->error */
106
-int tcp_read(struct tcp_connection *c)
114
+int tcp_read(struct tcp_connection *c, int* flags)
107 115
 {
108 116
 	int bytes_free, bytes_read;
109 117
 	struct tcp_req *r;
... ...
@@ -121,19 +135,26 @@ int tcp_read(struct tcp_connection *c)
121 121
 again:
122 122
 	bytes_read=read(fd, r->pos, bytes_free);
123 123
 
124
-	if(bytes_read==-1){
125
-		if (errno == EWOULDBLOCK || errno == EAGAIN){
126
-			return 0; /* nothing has been read */
127
-		}else if (errno == EINTR) goto again;
128
-		else{
129
-			LOG(L_ERR, "ERROR: tcp_read: error reading: %s\n",strerror(errno));
130
-			r->error=TCP_READ_ERROR;
131
-			return -1;
124
+	if (likely(bytes_read!=bytes_free)){
125
+		if(unlikely(bytes_read==-1)){
126
+			if (errno == EWOULDBLOCK || errno == EAGAIN){
127
+				bytes_read=0; /* nothing has been read */
128
+			}else if (errno == EINTR) goto again;
129
+			else{
130
+				LOG(L_ERR, "ERROR: tcp_read: error reading: %s (%d)\n",
131
+							strerror(errno), errno);
132
+				r->error=TCP_READ_ERROR;
133
+				return -1;
134
+			}
135
+		}else if (unlikely((bytes_read==0) || 
136
+					(*flags & RD_CONN_FORCE_EOF))){
137
+			c->state=S_CONN_EOF;
138
+			*flags|=RD_CONN_EOF;
139
+			DBG("tcp_read: EOF on %p, FD %d\n", c, fd);
132 140
 		}
133
-	}else if (bytes_read==0){
134
-		c->state=S_CONN_EOF;
135
-		DBG("tcp_read: EOF on %p, FD %d\n", c, fd);
136
-	}
141
+		/* short read */
142
+		*flags|=RD_CONN_SHORT_READ;
143
+	} /* else normal full read */
137 144
 #ifdef EXTRA_DEBUG
138 145
 	DBG("tcp_read: read %d bytes:\n%.*s\n", bytes_read, bytes_read, r->pos);
139 146
 #endif
... ...
@@ -152,7 +173,7 @@ again:
152 152
  * when either r->body!=0 or r->state==H_BODY =>
153 153
  * all headers have been read. It should be called in a while loop.
154 154
  * returns < 0 if error or 0 if EOF */
155
-int tcp_read_headers(struct tcp_connection *c)
155
+int tcp_read_headers(struct tcp_connection *c, int* read_flags)
156 156
 {
157 157
 	int bytes, remaining;
158 158
 	char *p;
... ...
@@ -206,15 +227,15 @@ int tcp_read_headers(struct tcp_connection *c)
206 206
 
207 207
 	r=&c->req;
208 208
 	/* if we still have some unparsed part, parse it first, don't do the read*/
209
-	if (r->parsed<r->pos){
209
+	if (unlikely(r->parsed<r->pos)){
210 210
 		bytes=0;
211 211
 	}else{
212 212
 #ifdef USE_TLS
213
-		if (c->type==PROTO_TLS)
214
-			bytes=tls_read(c);
213
+		if (unlikely(c->type==PROTO_TLS))
214
+			bytes=tls_read(c); /* FIXME: read_flags support */
215 215
 		else
216 216
 #endif
217
-			bytes=tcp_read(c);
217
+			bytes=tcp_read(c, read_flags);
218 218
 		if (bytes<=0) return bytes;
219 219
 	}
220 220
 	p=r->parsed;
... ...
@@ -511,7 +532,7 @@ skip:
511 511
 
512 512
 
513 513
 
514
-int tcp_read_req(struct tcp_connection* con, int* bytes_read)
514
+int tcp_read_req(struct tcp_connection* con, int* bytes_read, int* read_flags)
515 515
 {
516 516
 	int bytes;
517 517
 	int total_bytes;
... ...
@@ -538,8 +559,8 @@ int tcp_read_req(struct tcp_connection* con, int* bytes_read)
538 538
 #endif
539 539
 
540 540
 again:
541
-		if(req->error==TCP_REQ_OK){
542
-			bytes=tcp_read_headers(con);
541
+		if (likely(req->error==TCP_REQ_OK)){
542
+			bytes=tcp_read_headers(con, read_flags);
543 543
 #ifdef EXTRA_DEBUG
544 544
 						/* if timeout state=0; goto end__req; */
545 545
 			DBG("read= %d bytes, parsed=%d, state=%d, error=%d\n",
... ...
@@ -549,7 +570,7 @@ again:
549 549
 					*(req->parsed-1), (int)(req->parsed-req->start),
550 550
 					req->start);
551 551
 #endif
552
-			if (bytes==-1){
552
+			if (unlikely(bytes==-1)){
553 553
 				LOG(L_ERR, "ERROR: tcp_read_req: error reading \n");
554 554
 				resp=CONN_ERROR;
555 555
 				goto end_req;
... ...
@@ -560,14 +581,14 @@ again:
560 560
 			 * if req. is complete we might have a second unparsed
561 561
 			 * request after it, so postpone release_with_eof
562 562
 			 */
563
-			if ((con->state==S_CONN_EOF) && (req->complete==0)) {
563
+			if (unlikely((con->state==S_CONN_EOF) && (req->complete==0))) {
564 564
 				DBG( "tcp_read_req: EOF\n");
565 565
 				resp=CONN_EOF;
566 566
 				goto end_req;
567 567
 			}
568 568
 		
569 569
 		}
570
-		if (req->error!=TCP_REQ_OK){
570
+		if (unlikely(req->error!=TCP_REQ_OK)){
571 571
 			LOG(L_ERR,"ERROR: tcp_read_req: bad request, state=%d, error=%d "
572 572
 					  "buf:\n%.*s\nparsed:\n%.*s\n", req->state, req->error,
573 573
 					  (int)(req->pos-req->buf), req->buf,
... ...
@@ -577,7 +598,7 @@ again:
577 577
 			resp=CONN_ERROR;
578 578
 			goto end_req;
579 579
 		}
580
-		if (req->complete){
580
+		if (likely(req->complete)){
581 581
 #ifdef EXTRA_DEBUG
582 582
 			DBG("tcp_read_req: end of header part\n");
583 583
 			DBG("- received from: port %d\n", con->rcv.src_port);
... ...
@@ -585,7 +606,7 @@ again:
585 585
 			DBG("tcp_read_req: headers:\n%.*s.\n",
586 586
 					(int)(req->body-req->start), req->start);
587 587
 #endif
588
-			if (req->has_content_len){
588
+			if (likely(req->has_content_len)){
589 589
 				DBG("tcp_read_req: content-length= %d\n", req->content_len);
590 590
 #ifdef EXTRA_DEBUG
591 591
 				DBG("tcp_read_req: body:\n%.*s\n", req->content_len,req->body);
... ...
@@ -637,26 +658,28 @@ again:
637 637
 			
638 638
 			/* prepare for next request */
639 639
 			size=req->pos-req->parsed;
640
-			if (size) memmove(req->buf, req->parsed, size);
641
-#ifdef EXTRA_DEBUG
642
-			DBG("tcp_read_req: preparing for new request, kept %ld bytes\n",
643
-					size);
644
-#endif
645
-			req->pos=req->buf+size;
646
-			req->parsed=req->buf;
647 640
 			req->start=req->buf;
648 641
 			req->body=0;
649 642
 			req->error=TCP_REQ_OK;
650 643
 			req->state=H_SKIP_EMPTY;
651 644
 			req->complete=req->content_len=req->has_content_len=0;
652 645
 			req->bytes_to_go=0;
653
-			/* if we still have some unparsed bytes, try to  parse them too*/
654
-			if (size) goto again;
655
-			else if (con->state==S_CONN_EOF){
646
+			req->pos=req->buf+size;
647
+			
648
+			if (unlikely(size)){ 
649
+				memmove(req->buf, req->parsed, size);
650
+				req->parsed=req->buf; /* fix req->parsed after using it */
651
+#ifdef EXTRA_DEBUG
652
+				DBG("tcp_read_req: preparing for new request, kept %ld"
653
+						" bytes\n", size);
654
+#endif
655
+				/*if we still have some unparsed bytes, try to parse them too*/
656
+				goto again;
657
+			} else if (unlikely(con->state==S_CONN_EOF)){
656 658
 				DBG( "tcp_read_req: EOF after reading complete request\n");
657 659
 				resp=CONN_EOF;
658 660
 			}
659
-			
661
+			req->parsed=req->buf; /* fix req->parsed */
660 662
 		}
661 663
 		
662 664
 		
... ...
@@ -732,6 +755,7 @@ inline static int handle_io(struct fd_map* fm, short events, int idx)
732 732
 {	
733 733
 	int ret;
734 734
 	int n;
735
+	int read_flags;
735 736
 	struct tcp_connection* con;
736 737
 	int s;
737 738
 	long resp;
... ...
@@ -787,7 +811,10 @@ again:
787 787
 			}
788 788
 			/* if we received the fd there is most likely data waiting to
789 789
 			 * be read => process it first to avoid extra sys calls */
790
-			resp=tcp_read_req(con, &n);
790
+			read_flags=((con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)) && 
791
+						!(con->flags & F_CONN_OOB_DATA))? RD_CONN_FORCE_EOF
792
+						:0;
793
+			resp=tcp_read_req(con, &n, &read_flags);
791 794
 			if (unlikely(resp<0)){
792 795
 				/* some error occured, but on the new fd, not on the tcp
793 796
 				 * main fd, so keep the ret value */
... ...
@@ -829,7 +856,14 @@ again:
829 829
 							con, con->id, atomic_get(&con->refcnt));
830 830
 				goto read_error;
831 831
 			}
832
-			resp=tcp_read_req(con, &ret);
832
+#ifdef POLLRDHUP
833
+			read_flags=(((events & POLLRDHUP) | 
834
+							(con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)))
835
+						&& !(events & POLLPRI))? RD_CONN_FORCE_EOF: 0;
836
+#else /* POLLRDHUP */
837
+			read_flags=0;
838
+#endif /* POLLRDHUP */
839
+			resp=tcp_read_req(con, &ret, &read_flags);
833 840
 			if (unlikely(resp<0)){
834 841
 read_error:
835 842
 				ret=-1; /* some error occured */
... ...
@@ -849,6 +883,10 @@ read_error:
849 849
 			}else{
850 850
 				/* update timeout */
851 851
 				con->timeout=get_ticks_raw()+S_TO_TICKS(TCP_CHILD_TIMEOUT);
852
+				/* ret= 0 (read the whole socket buffer) if short read & 
853
+				 *  !POLLPRI,  bytes read otherwise */
854
+				ret&=(((read_flags & RD_CONN_SHORT_READ) && 
855
+						!(events & POLLPRI)) - 1);
852 856
 			}
853 857
 			break;
854 858
 		case F_NONE: