Browse code

- fork_process & fork_tcp_process fixes - reverted to the old fork()-in-parallel behaviour (uncomment FORK_DONT_WAIT for the "serial" fork()).

Andrei Pelinescu-Onciul authored on 02/10/2006 17:29:23
Showing 5 changed files
... ...
@@ -67,7 +67,7 @@ MAIN_NAME=ser
67 67
 VERSION = 0
68 68
 PATCHLEVEL = 10
69 69
 SUBLEVEL =   99
70
-EXTRAVERSION = -dev46-dns_cache
70
+EXTRAVERSION = -dev47-dns_cache
71 71
 
72 72
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
73 73
 			$(SUBLEVEL) )
... ...
@@ -46,15 +46,19 @@ modules:
46 46
                          (failure_route only).
47 47
                        - t_branch_replied() -- returns true if the failure 
48 48
                          route is executed for a branch that did receive at
49
-                         least one reply (failure_route only).. It can be used
49
+                         least one reply in the past (the current reply 
50
+                          is not taken into account). It can be used
50 51
                          together with t_branch_timeout() to distinguish 
51 52
                          between a remote side that doesn't respond (some 
52 53
                          provisional reply received) and one that is completely
53
-                          dead.
54
+                          dead. (failure_route only)
54 55
                        - t_any_timeout() -- returns true if any of the current
55 56
                          transaction branches did timeout.
56 57
                        - t_any_replied() -- returns true if at least one branch
57
-                          of the current transaction received one reply.
58
+                          of the current transaction received one reply in the
59
+                          past. If called from a failure_route or an
60
+                          onreply_route, the "current" reply is not taken into
61
+                          account.
58 62
                        - t_is_canceled() -- returns true if the current 
59 63
                          transaction  has been canceled.
60 64
                        - new t_set_fr(timeout_fr_inv, timeout_fr) -- allows
... ...
@@ -441,7 +441,8 @@ failure_route[0]{
441 441
 	</title>
442 442
 	<para>
443 443
 		Returns true if the failure route is executed for a branch that did
444
-		receive at least one reply. It can be used only from the 
444
+		receive at least one reply in the past (the "current" reply is not 
445
+		taken into account). It can be used only from the 
445 446
 		<emphasis>failure_route</emphasis>.
446 447
 	</para>
447 448
 	<example>
... ...
@@ -491,7 +492,8 @@ failure_route[0]{
491 491
 	</title>
492 492
 	<para>
493 493
 		Returns true if at least one of the current transactions branches
494
-		did receive some reply.
494
+		did receive some reply in the past. If called from a failure or
495
+		onreply route, the "current" reply is not taken into account.
495 496
 	</para>
496 497
 	<example>
497 498
 	    <title><function>t_any_replied</function> usage</title>
... ...
@@ -41,6 +41,12 @@
41 41
 #include "sr_module.h"
42 42
 
43 43
 #include <stdio.h>
44
+
45
+#define FORK_DONT_WAIT  /* child doesn't wait for parent before starting 
46
+						   => faster startup, but the child should not assume
47
+						   the parent fixed the pt[] entry for it */
48
+
49
+
44 50
 #ifdef PROFILING
45 51
 #include <sys/gmon.h>
46 52
 
... ...
@@ -108,6 +114,8 @@ int my_pid()
108 108
 	return pt ? pt[process_no].pid : getpid();
109 109
 }
110 110
 
111
+
112
+
111 113
 /**
112 114
  * Forks a new process.
113 115
  * @param child_id - rank, if equal to PROC_NOCHLDINIT init_child will not be
... ...
@@ -118,77 +126,100 @@ int my_pid()
118 118
  */
119 119
 int fork_process(int child_id, char *desc, int make_sock)
120 120
 {
121
-	int pid,old_process_no;
121
+	int pid, child_process_no;
122
+	int ret;
122 123
 #ifdef USE_TCP
123 124
 	int sockfd[2];
124 125
 #endif
125 126
 
126
-	lock_get(process_lock);	
127
-	if (*process_count>=estimated_proc_no) {
128
-		LOG(L_CRIT, "ERROR: fork_process(): Process limit of %d exceeded."
129
-					" Will simulate fork fail.\n", estimated_proc_no);
130
-		lock_release(process_lock);
131
-		return -1;
132
-	}	
133
-	
127
+	ret=-1;
134 128
 	#ifdef USE_TCP
129
+		sockfd[0]=sockfd[1]=-1;
135 130
 		if(make_sock && !tcp_disable){
136 131
 			 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd)<0){
137 132
 				LOG(L_ERR, "ERROR: fork_process(): socketpair failed: %s\n",
138
-					strerror(errno));
139
-				return -1;
133
+							strerror(errno));
134
+				goto error;
140 135
 			}
141 136
 		}
142 137
 	#endif
138
+	lock_get(process_lock);
139
+	if (*process_count>=estimated_proc_no) {
140
+		LOG(L_CRIT, "ERROR: fork_process(): Process limit of %d exceeded."
141
+					" Will simulate fork fail.\n", estimated_proc_no);
142
+		lock_release(process_lock);
143
+		goto error;
144
+	}	
143 145
 	
144
-	old_process_no = process_no;
145
-	process_no = *process_count;
146
+	
147
+	child_process_no = *process_count;
146 148
 	pid = fork();
147 149
 	if (pid<0) {
148 150
 		lock_release(process_lock);
149
-		return pid;
150
-	}
151
-	if (pid==0){
151
+		ret=pid;
152
+		goto error;
153
+	}else if (pid==0){
152 154
 		/* child */
155
+		process_no=child_process_no;
153 156
 #ifdef PROFILING
154 157
 		monstartup((u_long) &_start, (u_long) &etext);
155 158
 #endif
159
+#ifdef FORK_DONT_WAIT
160
+		/* record pid twice to avoid the child using it, before
161
+		 * parent gets a chance to set it*/
162
+		pt[process_no].pid=getpid();
163
+#else
156 164
 		/* wait for parent to get out of critical zone.
157 165
 		 * this is actually relevant as the parent updates
158 166
 		 * the pt & process_count. */
159 167
 		lock_get(process_lock);
168
+		lock_release(process_lock);	
169
+#endif
160 170
 		#ifdef USE_TCP
161 171
 			if (make_sock && !tcp_disable){
162 172
 				close(sockfd[0]);
163 173
 				unix_tcp_sock=sockfd[1];
164 174
 			}
165 175
 		#endif		
166
-		lock_release(process_lock);	
167 176
 		if ((child_id!=PROC_NOCHLDINIT) && (init_child(child_id) < 0)) {
168
-			LOG(L_ERR, "ERROR: fork_process(): init_child failed for %s\n",
169
-						pt[process_no].desc);
177
+			LOG(L_ERR, "ERROR: fork_process(): init_child failed for "
178
+					" process %d, pid %d, \"%s\"\n", process_no,
179
+					pt[process_no].pid, pt[process_no].desc);
170 180
 			return -1;
171 181
 		}
172 182
 		return pid;
173 183
 	} else {
174 184
 		/* parent */
175
-		process_no = old_process_no;
185
+		(*process_count)++;
186
+#ifdef FORK_DONT_WAIT
187
+		lock_release(process_lock);
188
+#endif
176 189
 		/* add the process to the list in shm */
177
-		pt[*process_count].pid=pid;
190
+		pt[child_process_no].pid=pid;
178 191
 		if (desc){
179
-			strncpy(pt[*process_count].desc, desc, MAX_PT_DESC);
192
+			strncpy(pt[child_process_no].desc, desc, MAX_PT_DESC);
180 193
 		}
181 194
 		#ifdef USE_TCP
182 195
 			if (make_sock && !tcp_disable){
183 196
 				close(sockfd[1]);
184
-				pt[*process_count].unix_sock=sockfd[0];
185
-				pt[*process_count].idx=-1; /* this is not "tcp" process*/
197
+				pt[child_process_no].unix_sock=sockfd[0];
198
+				pt[child_process_no].idx=-1; /* this is not "tcp" process*/
186 199
 			}
187
-		#endif		
188
-		*process_count = (*process_count) +1;
200
+		#endif
201
+#ifdef FORK_DONT_WAIT
202
+#else
189 203
 		lock_release(process_lock);
190
-		return pid;
204
+#endif
205
+		ret=pid;
206
+		goto end;
191 207
 	}
208
+error:
209
+#ifdef USE_TCP
210
+	if (sockfd[0]!=-1) close(sockfd[0]);
211
+	if (sockfd[1]!=-1) close(sockfd[1]);
212
+#endif
213
+end:
214
+	return ret;
192 215
 }
193 216
 
194 217
 /**
... ...
@@ -199,31 +230,27 @@ int fork_process(int child_id, char *desc, int make_sock)
199 199
  * @returns the pid of the new process
200 200
  */
201 201
 #ifdef USE_TCP
202
-int fork_tcp_process(int child_id,char *desc,int r,int *reader_fd_1)
202
+int fork_tcp_process(int child_id, char *desc, int r, int *reader_fd_1)
203 203
 {
204
-	int pid,old_process_no;
204
+	int pid, child_process_no;
205 205
 	int sockfd[2];
206 206
 	int reader_fd[2]; /* for comm. with the tcp children read  */
207
-
208
-
207
+	int ret;
209 208
 	
210
-	lock_get(process_lock);
211
-	/* set the local process_no */
212
-	if (*process_count>=estimated_proc_no) {
213
-		LOG(L_CRIT, "ERROR: fork_tcp_process(): Process limit of %d exceeded."
214
-					" Simulating fork fail\n", estimated_proc_no);
215
-		return -1;
216
-	}	
209
+	/* init */
210
+	sockfd[0]=sockfd[1]=-1;
211
+	reader_fd[0]=reader_fd[1]=-1;
212
+	ret=-1;
217 213
 	
218 214
 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd)<0){
219 215
 		LOG(L_ERR, "ERROR: fork_tcp_process(): socketpair failed: %s\n",
220 216
 					strerror(errno));
221
-		return -1;
217
+		goto error;
222 218
 	}
223 219
 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, reader_fd)<0){
224 220
 		LOG(L_ERR, "ERROR: fork_tcp_process(): socketpair failed: %s\n",
225 221
 					strerror(errno));
226
-		return -1;
222
+		goto error;
227 223
 	}
228 224
 	if (tcp_fix_child_sockets(reader_fd)<0){
229 225
 		LOG(L_ERR, "ERROR: fork_tcp_process(): failed to set non blocking"
... ...
@@ -231,54 +258,85 @@ int fork_tcp_process(int child_id,char *desc,int r,int *reader_fd_1)
231 231
 		/* continue, it's not critical (it will go slower under
232 232
 		 * very high connection rates) */
233 233
 	}
234
+	lock_get(process_lock);
235
+	/* set the local process_no */
236
+	if (*process_count>=estimated_proc_no) {
237
+		LOG(L_CRIT, "ERROR: fork_tcp_process(): Process limit of %d exceeded."
238
+					" Simulating fork fail\n", estimated_proc_no);
239
+		lock_release(process_lock);
240
+		goto error;
241
+	}
242
+	
234 243
 	
235
-	old_process_no = process_no;
236
-	process_no = *process_count;
244
+	child_process_no = *process_count;
237 245
 	pid = fork();
238 246
 	if (pid<0) {
239 247
 		lock_release(process_lock);
240
-		return pid;
248
+		ret=pid;
249
+		goto end;
241 250
 	}
242 251
 	if (pid==0){
252
+		process_no=child_process_no;
243 253
 #ifdef PROFILING
244 254
 		monstartup((u_long) &_start, (u_long) &etext);
245 255
 #endif
256
+#ifdef FORK_DONT_WAIT
257
+		/* record pid twice to avoid the child using it, before
258
+-		 * parent gets a chance to set it*/
259
+		pt[process_no].pid=getpid();
260
+#else
246 261
 		/* wait for parent to get out of critical zone */
247 262
 		lock_get(process_lock);
248
-			close(sockfd[0]);
249
-			unix_tcp_sock=sockfd[1];
250
-			if (reader_fd_1) *reader_fd_1=reader_fd[1];
251 263
 		lock_release(process_lock);
252
-		if (init_child(child_id) < 0) {
264
+#endif
265
+		close(sockfd[0]);
266
+		unix_tcp_sock=sockfd[1];
267
+		close(reader_fd[0]);
268
+		if (reader_fd_1) *reader_fd_1=reader_fd[1];
269
+		if ((child_id!=PROC_NOCHLDINIT) && (init_child(child_id) < 0)) {
253 270
 			LOG(L_ERR, "ERROR: fork_tcp_process(): init_child failed for "
254
-					"%s\n", pt[process_no].desc);
271
+					"process %d, pid %d, \"%s\"\n", process_no, 
272
+					pt[process_no].pid, pt[process_no].desc);
255 273
 			return -1;
256 274
 		}
257 275
 		return pid;
258 276
 	} else {
259
-		/* parent */		
260
-		process_no = old_process_no;
277
+		/* parent */
278
+		(*process_count)++;
279
+#ifdef FORK_DONT_WAIT
280
+		lock_release(process_lock);
281
+#endif
261 282
 		/* add the process to the list in shm */
262
-		pt[*process_count].pid=pid;
263
-		pt[*process_count].unix_sock=sockfd[0];
264
-		pt[*process_count].idx=r; 	
283
+		pt[child_process_no].pid=pid;
284
+		pt[child_process_no].unix_sock=sockfd[0];
285
+		pt[child_process_no].idx=r;
265 286
 		if (desc){
266
-			snprintf(pt[*process_count].desc, MAX_PT_DESC, "%s child=%d", 
287
+			snprintf(pt[child_process_no].desc, MAX_PT_DESC, "%s child=%d", 
267 288
 						desc, r);
268 289
 		}
290
+#ifdef FORK_DONT_WAIT
291
+#else
292
+		lock_release(process_lock);
293
+#endif
269 294
 		
270 295
 		close(sockfd[1]);
271 296
 		close(reader_fd[1]);
272 297
 		
273 298
 		tcp_children[r].pid=pid;
274
-		tcp_children[r].proc_no=process_no;
299
+		tcp_children[r].proc_no=child_process_no;
275 300
 		tcp_children[r].busy=0;
276 301
 		tcp_children[r].n_reqs=0;
277 302
 		tcp_children[r].unix_sock=reader_fd[0];
278 303
 		
279
-		*process_count = (*process_count) +1;
280
-		lock_release(process_lock);
281
-		return pid;
304
+		ret=pid;
305
+		goto end;
282 306
 	}
307
+error:
308
+	if (sockfd[0]!=-1) close(sockfd[0]);
309
+	if (sockfd[1]!=-1) close(sockfd[1]);
310
+	if (reader_fd[0]!=-1) close(reader_fd[0]);
311
+	if (reader_fd[1]!=-1) close(reader_fd[1]);
312
+end:
313
+	return ret;
283 314
 }
284 315
 #endif
... ...
@@ -1986,7 +1986,7 @@ int tcp_init_children()
1986 1986
 	/* fork children & create the socket pairs*/
1987 1987
 	for(r=0; r<tcp_children_no; r++){
1988 1988
 		child_rank++;
1989
-		pid=fork_tcp_process(child_rank,"tcp receiver",1,&reader_fd_1);
1989
+		pid=fork_tcp_process(child_rank, "tcp receiver", r, &reader_fd_1);
1990 1990
 		if (pid<0){
1991 1991
 			LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
1992 1992
 					strerror(errno));