Browse code

io_wait: kqueue: handle ENOENT and more robust error handling

- handle also ENOENT (along EBADF) when kevent fails due to errors
in the changelist. ENOENT can be returned in the following valid
scenario: fd scheduled for delayed removal from the watched fd
list, fd closed (which automatically removes the fd from the
kqueue watched list), new opened fd which gets the same number,
delayed changes applied (kevent()).
- treat all the other kevent errors or EV_ERRORs in a similar way
but log them (at BUG() level).
- return POLLERR|POLLHUP for EV_EOF with a non-null fflags.

(only kqueue, meaning *bsd and darwin are affected by this fix)

Andrei Pelinescu-Onciul authored on 18/06/2010 22:44:24
Showing 1 changed files
... ...
@@ -259,34 +259,33 @@ static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag,
259 259
 again:
260 260
 		n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
261 261
 		if (unlikely(n == -1)){
262
-			if (likely(errno == EBADF)) {
262
+			if (unlikely(errno == EINTR)) goto again;
263
+			else {
264
+				/* for a detailed explanation of what follows see
265
+				   io_wait_loop_kqueue EV_ERROR case */
266
+				if (unlikely(!(errno == EBADF || errno == ENOENT)))
267
+					BUG("kq_ev_change: kevent flush changes failed"
268
+							" (unexpected error): %s [%d]\n",
269
+							strerror(errno), errno);
270
+					/* ignore error even if it's not a EBADF/ENOENT */
263 271
 				/* one of the file descriptors is bad, probably already
264 272
 				   closed => try to apply changes one-by-one */
265 273
 				for (r = 0; r < h->kq_nchanges; r++) {
266 274
 retry2:
267 275
 					n = kevent(h->kq_fd, &h->kq_changes[r], 1, 0, 0, &tspec);
268 276
 					if (n==-1) {
269
-						if (errno == EBADF)
270
-							continue; /* skip over it */
271
-						if (errno == EINTR)
277
+						if (unlikely(errno == EINTR))
272 278
 							goto retry2;
273
-						LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
274
-									" failed: %s [%d]\n",
275
-										strerror(errno), errno);
276
-						/* shift the array */
277
-						memmove(&h->kq_changes[0], &h->kq_changes[r+1],
278
-									sizeof(h->kq_changes[0])*
279
-										(h->kq_nchanges-r-1));
280
-						h->kq_nchanges-=(r+1);
281
-						return -1;
279
+					/* for a detailed explanation of what follows see
280
+						io_wait_loop_kqueue EV_ERROR case */
281
+						if (unlikely(!(errno == EBADF || errno == ENOENT)))
282
+							BUG("kq_ev_change: kevent flush changes failed:"
283
+									" (unexpected error) %s [%d] (%d/%d)\n",
284
+										strerror(errno), errno,
285
+										r, h->kq_nchanges);
286
+						continue; /* skip over it */
282 287
 					}
283 288
 				}
284
-			} else if (errno == EINTR) goto again;
285
-			else {
286
-				LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
287
-						" failed: %s [%d]\n", strerror(errno), errno);
288
-				h->kq_nchanges=0; /* reset changes array */
289
-				return -1;
290 289
 			}
291 290
 		}
292 291
 		h->kq_nchanges=0; /* changes array is empty */
... ...
@@ -1118,17 +1117,18 @@ again:
1118 1118
 		n=kevent(h->kq_fd, h->kq_changes, apply_changes,  h->kq_array,
1119 1119
 					h->fd_no, &tspec);
1120 1120
 		if (unlikely(n==-1)){
1121
-			if (errno==EINTR) goto again; /* signal, ignore it */
1122
-			else if (errno==EBADF) {
1121
+			if (unlikely(errno==EINTR)) goto again; /* signal, ignore it */
1122
+			else {
1123
+				/* for a detailed explanation of what follows see below
1124
+				   the EV_ERROR case */
1125
+				if (unlikely(!(errno==EBADF || errno==ENOENT)))
1126
+					BUG("io_wait_loop_kqueue: kevent: unexpected error"
1127
+						" %s [%d]\n", strerror(errno), errno);
1123 1128
 				/* some of the FDs in kq_changes are bad (already closed)
1124 1129
 				   and there is not enough space in kq_array to return all
1125 1130
 				   of them back */
1126 1131
 				apply_changes = h->fd_no;
1127 1132
 				goto again;
1128
-			}else{
1129
-				LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1130
-						" %s [%d]\n", strerror(errno), errno);
1131
-				goto error;
1132 1133
 			}
1133 1134
 		}
1134 1135
 		/* remove applied changes */
... ...
@@ -1148,14 +1148,13 @@ again:
1148 1148
 					r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1149 1149
 					h->kq_array[r].flags);
1150 1150
 #endif
1151
-			if (unlikely((h->kq_array[r].flags & EV_ERROR) &&
1152
-							(h->kq_array[r].data == EBADF ||
1153
-							 h->kq_array[r].udata == 0))){
1151
+			if (unlikely((h->kq_array[r].flags & EV_ERROR) ||
1152
+							 h->kq_array[r].udata == 0)){
1154 1153
 				/* error in changes: we ignore it if it has to do with a
1155 1154
 				   bad fd or update==0. It can be caused by trying to remove an
1156 1155
 				   already closed fd: race between adding something to the
1157
-				   changes array, close() and applying the changes.
1158
-				   E.g. for ser tcp: tcp_main sends a fd to child fore reading
1156
+				   changes array, close() and applying the changes (EBADF).
1157
+				   E.g. for ser tcp: tcp_main sends a fd to child for reading
1159 1158
 				    => deletes it from the watched fds => the changes array
1160 1159
 					will contain an EV_DELETE for it. Before the changes
1161 1160
 					are applied (they are at the end of the main io_wait loop,
... ...
@@ -1163,6 +1162,16 @@ again:
1163 1163
 					to tcp_main by a sender (send fail) is processed and causes
1164 1164
 					the fd to be closed. When the changes are applied =>
1165 1165
 					error for the EV_DELETE attempt of a closed fd.
1166
+					Something similar can happen when a fd is scheduled
1167
+					for removal, is close()'ed before being removed and
1168
+					re-opened(a new sock. get the same fd). When the
1169
+					watched fd changes will be applied the fd will be valid
1170
+					(so no EBADF), but it's not already watch => ENOENT.
1171
+					We report a BUG for the other errors (there's nothing
1172
+					constructive we can do if we get an error we don't know 
1173
+					how to handle), but apart from that we ignore it in the
1174
+					idea that it is better apply the rest of the changes,
1175
+					rather then dropping all of them.
1166 1176
 				*/
1167 1177
 				/*
1168 1178
 					example EV_ERROR for trying to delete a read watched fd,
... ...
@@ -1176,9 +1185,12 @@ again:
1176 1176
 						udata = 0x0
1177 1177
 					}
1178 1178
 				*/
1179
-				if (h->kq_array[r].data != EBADF)
1180
-					LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1181
-							"fd %ld: %s [%ld]\n", (long)h->kq_array[r].ident,
1179
+				if (h->kq_array[r].data != EBADF &&
1180
+						h->kq_array[r].data != ENOENT)
1181
+					BUG("io_wait_loop_kqueue: kevent unexpected error on "
1182
+							"fd %ld udata %lx: %s [%ld]\n",
1183
+							(long)h->kq_array[r].ident,
1184
+							(long)h->kq_array[r].udata,
1182 1185
 							strerror(h->kq_array[r].data),
1183 1186
 							(long)h->kq_array[r].data);
1184 1187
 			}else{
... ...
@@ -1186,20 +1198,28 @@ again:
1186 1186
 				if (likely(h->kq_array[r].filter==EVFILT_READ)){
1187 1187
 					revents=POLLIN |
1188 1188
 						(((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1189
-						(((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1189
+						(((int)!((h->kq_array[r].flags & EV_EOF) &&
1190
+								 	h->kq_array[r].fflags != 0) - 1)&POLLERR);
1190 1191
 					while(fm->type && (fm->events & revents) && 
1191 1192
 							(handle_io(fm, revents, -1)>0) && repeat);
1192 1193
 				}else if (h->kq_array[r].filter==EVFILT_WRITE){
1193 1194
 					revents=POLLOUT |
1194 1195
 						(((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1195
-						(((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1196
+						(((int)!((h->kq_array[r].flags & EV_EOF) &&
1197
+								 	h->kq_array[r].fflags != 0) - 1)&POLLERR);
1196 1198
 					while(fm->type && (fm->events & revents) && 
1197 1199
 							(handle_io(fm, revents, -1)>0) && repeat);
1200
+				}else{
1201
+					BUG("io_wait_loop_kqueue: unknown filter: kqueue: event "
1202
+							"%d/%d: fd=%d, filter=%d, flags=0x%x, fflags=0x%x,"
1203
+							" data=%lx, udata=%lx\n",
1204
+					r, n, h->kq_array[r].ident, h->kq_array[r].filter,
1205
+					h->kq_array[r].flags, h->kq_array[r].fflags, 
1206
+					(long)h->kq_array[r].data, (long)h->kq_array[r].udata);
1198 1207
 				}
1199 1208
 			}
1200 1209
 		}
1201 1210
 	} while(unlikely(orig_changes));
1202
-error:
1203 1211
 	return n;
1204 1212
 }
1205 1213
 #endif