Browse code

io_wait: kqueue: handle ENOENT and more robust error handling

- handle also ENOENT (along EBADF) when kevent fails due to errors
in the changelist. ENOENT can be returned in the following valid
scenario: fd scheduled for delayed removal from the watched fd
list, fd closed (which automatically removes the fd from the
kqueue watched list), new opened fd which gets the same number,
delayed changes applied (kevent()).
- treat all the other kevent errors or EV_ERRORs in a similar way
but log them (at BUG() level).
- return POLLERR|POLLHUP for EV_EOF with a non-null fflags.

(only kqueue, meaning *bsd and darwin are affected by this fix)

Andrei Pelinescu-Onciul authored on 18/06/2010 22:44:24
Showing 1 changed files
... ...
@@ -259,34 +259,33 @@ static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag,
259 259
 again:
260 260
 		n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
261 261
 		if (unlikely(n == -1)){
262
-			if (likely(errno == EBADF)) {
262
+			if (unlikely(errno == EINTR)) goto again;
263
+			else {
264
+				/* for a detailed explanation of what follows see
265
+				   io_wait_loop_kqueue EV_ERROR case */
266
+				if (unlikely(!(errno == EBADF || errno == ENOENT)))
267
+					BUG("kq_ev_change: kevent flush changes failed"
268
+							" (unexpected error): %s [%d]\n",
269
+							strerror(errno), errno);
270
+					/* ignore error even if it's not a EBADF/ENOENT */
263 271
 				/* one of the file descriptors is bad, probably already
264 272
 				   closed => try to apply changes one-by-one */
265 273
 				for (r = 0; r < h->kq_nchanges; r++) {
266 274
 retry2:
267 275
 					n = kevent(h->kq_fd, &h->kq_changes[r], 1, 0, 0, &tspec);
268 276
 					if (n==-1) {
269
-						if (errno == EBADF)
270
-							continue; /* skip over it */
271
-						if (errno == EINTR)
277
+						if (unlikely(errno == EINTR))
272 278
 							goto retry2;
273
-						LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
274
-									" failed: %s [%d]\n",
275
-										strerror(errno), errno);
276
-						/* shift the array */
277
-						memmove(&h->kq_changes[0], &h->kq_changes[r+1],
278
-									sizeof(h->kq_changes[0])*
279
-										(h->kq_nchanges-r-1));
280
-						h->kq_nchanges-=(r+1);
281
-						return -1;
279
+					/* for a detailed explanation of what follows see
280
+						io_wait_loop_kqueue EV_ERROR case */
281
+						if (unlikely(!(errno == EBADF || errno == ENOENT)))
282
+							BUG("kq_ev_change: kevent flush changes failed:"
283
+									" (unexpected error) %s [%d] (%d/%d)\n",
284
+										strerror(errno), errno,
285
+										r, h->kq_nchanges);
286
+						continue; /* skip over it */
282 287
 					}
283 288
 				}
284
-			} else if (errno == EINTR) goto again;
285
-			else {
286
-				LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
287
-						" failed: %s [%d]\n", strerror(errno), errno);
288
-				h->kq_nchanges=0; /* reset changes array */
289
-				return -1;
290 289
 			}
291 290
 		}
292 291
 		h->kq_nchanges=0; /* changes array is empty */
... ...
@@ -1118,17 +1117,18 @@ again:
1118 1117
 		n=kevent(h->kq_fd, h->kq_changes, apply_changes,  h->kq_array,
1119 1118
 					h->fd_no, &tspec);
1120 1119
 		if (unlikely(n==-1)){
1121
-			if (errno==EINTR) goto again; /* signal, ignore it */
1122
-			else if (errno==EBADF) {
1120
+			if (unlikely(errno==EINTR)) goto again; /* signal, ignore it */
1121
+			else {
1122
+				/* for a detailed explanation of what follows see below
1123
+				   the EV_ERROR case */
1124
+				if (unlikely(!(errno==EBADF || errno==ENOENT)))
1125
+					BUG("io_wait_loop_kqueue: kevent: unexpected error"
1126
+						" %s [%d]\n", strerror(errno), errno);
1123 1127
 				/* some of the FDs in kq_changes are bad (already closed)
1124 1128
 				   and there is not enough space in kq_array to return all
1125 1129
 				   of them back */
1126 1130
 				apply_changes = h->fd_no;
1127 1131
 				goto again;
1128
-			}else{
1129
-				LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1130
-						" %s [%d]\n", strerror(errno), errno);
1131
-				goto error;
1132 1132
 			}
1133 1133
 		}
1134 1134
 		/* remove applied changes */
... ...
@@ -1148,14 +1148,13 @@ again:
1148 1148
 					r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1149 1149
 					h->kq_array[r].flags);
1150 1150
 #endif
1151
-			if (unlikely((h->kq_array[r].flags & EV_ERROR) &&
1152
-							(h->kq_array[r].data == EBADF ||
1153
-							 h->kq_array[r].udata == 0))){
1151
+			if (unlikely((h->kq_array[r].flags & EV_ERROR) ||
1152
+							 h->kq_array[r].udata == 0)){
1154 1153
 				/* error in changes: we ignore it if it has to do with a
1155 1154
 				   bad fd or update==0. It can be caused by trying to remove an
1156 1155
 				   already closed fd: race between adding something to the
1157
-				   changes array, close() and applying the changes.
1158
-				   E.g. for ser tcp: tcp_main sends a fd to child fore reading
1156
+				   changes array, close() and applying the changes (EBADF).
1157
+				   E.g. for ser tcp: tcp_main sends a fd to child for reading
1159 1158
 				    => deletes it from the watched fds => the changes array
1160 1159
 					will contain an EV_DELETE for it. Before the changes
1161 1160
 					are applied (they are at the end of the main io_wait loop,
... ...
@@ -1163,6 +1162,16 @@ again:
1163 1162
 					to tcp_main by a sender (send fail) is processed and causes
1164 1163
 					the fd to be closed. When the changes are applied =>
1165 1164
 					error for the EV_DELETE attempt of a closed fd.
1165
+					Something similar can happen when a fd is scheduled
1166
+					for removal, is close()'ed before being removed and
1167
+					re-opened(a new sock. get the same fd). When the
1168
+					watched fd changes will be applied the fd will be valid
1169
+					(so no EBADF), but it's not already watch => ENOENT.
1170
+					We report a BUG for the other errors (there's nothing
1171
+					constructive we can do if we get an error we don't know 
1172
+					how to handle), but apart from that we ignore it in the
1173
+					idea that it is better apply the rest of the changes,
1174
+					rather then dropping all of them.
1166 1175
 				*/
1167 1176
 				/*
1168 1177
 					example EV_ERROR for trying to delete a read watched fd,
... ...
@@ -1176,9 +1185,12 @@ again:
1176 1185
 						udata = 0x0
1177 1186
 					}
1178 1187
 				*/
1179
-				if (h->kq_array[r].data != EBADF)
1180
-					LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1181
-							"fd %ld: %s [%ld]\n", (long)h->kq_array[r].ident,
1188
+				if (h->kq_array[r].data != EBADF &&
1189
+						h->kq_array[r].data != ENOENT)
1190
+					BUG("io_wait_loop_kqueue: kevent unexpected error on "
1191
+							"fd %ld udata %lx: %s [%ld]\n",
1192
+							(long)h->kq_array[r].ident,
1193
+							(long)h->kq_array[r].udata,
1182 1194
 							strerror(h->kq_array[r].data),
1183 1195
 							(long)h->kq_array[r].data);
1184 1196
 			}else{
... ...
@@ -1186,20 +1198,28 @@ again:
1186 1198
 				if (likely(h->kq_array[r].filter==EVFILT_READ)){
1187 1199
 					revents=POLLIN |
1188 1200
 						(((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1189
-						(((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1201
+						(((int)!((h->kq_array[r].flags & EV_EOF) &&
1202
+								 	h->kq_array[r].fflags != 0) - 1)&POLLERR);
1190 1203
 					while(fm->type && (fm->events & revents) && 
1191 1204
 							(handle_io(fm, revents, -1)>0) && repeat);
1192 1205
 				}else if (h->kq_array[r].filter==EVFILT_WRITE){
1193 1206
 					revents=POLLOUT |
1194 1207
 						(((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1195
-						(((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1208
+						(((int)!((h->kq_array[r].flags & EV_EOF) &&
1209
+								 	h->kq_array[r].fflags != 0) - 1)&POLLERR);
1196 1210
 					while(fm->type && (fm->events & revents) && 
1197 1211
 							(handle_io(fm, revents, -1)>0) && repeat);
1212
+				}else{
1213
+					BUG("io_wait_loop_kqueue: unknown filter: kqueue: event "
1214
+							"%d/%d: fd=%d, filter=%d, flags=0x%x, fflags=0x%x,"
1215
+							" data=%lx, udata=%lx\n",
1216
+					r, n, h->kq_array[r].ident, h->kq_array[r].filter,
1217
+					h->kq_array[r].flags, h->kq_array[r].fflags, 
1218
+					(long)h->kq_array[r].data, (long)h->kq_array[r].udata);
1198 1219
 				}
1199 1220
 			}
1200 1221
 		}
1201 1222
 	} while(unlikely(orig_changes));
1202
-error:
1203 1223
 	return n;
1204 1224
 }
1205 1225
 #endif