Browse code

dispatcher: congestion detection load balancing

Thanks to Amy Meyers for her help !

Julien Chavanton authored on 29/03/2018 23:14:21
Showing 4 changed files
... ...
@@ -269,6 +269,9 @@ int ds_set_attrs(ds_dest_t *dest, str *attrs)
269 269
 	for(pit = params_list; pit; pit = pit->next) {
270 270
 		if(pit->name.len == 4 && strncasecmp(pit->name.s, "duid", 4) == 0) {
271 271
 			dest->attrs.duid = pit->body;
272
+		} else if(pit->name.len == 2
273
+				  && strncasecmp(pit->name.s, "cc", 2) == 0) {
274
+			str2sint(&pit->body, &dest->attrs.congestion_control);
272 275
 		} else if(pit->name.len == 6
273 276
 				  && strncasecmp(pit->name.s, "weight", 6) == 0) {
274 277
 			str2sint(&pit->body, &dest->attrs.weight);
... ...
@@ -520,6 +523,7 @@ int dp_init_relative_weights(ds_set_t *dset)
520 520
 	if(dset == NULL || dset->dlist == NULL)
521 521
 		return -1;
522 522
 
523
+	lock_get(&dset->lock);
523 524
 	int rw_sum = 0;
524 525
 	/* find the sum of relative weights*/
525 526
 	for(j = 0; j < dset->nr; j++) {
... ...
@@ -529,6 +533,7 @@ int dp_init_relative_weights(ds_set_t *dset)
529 529
 	}
530 530
 
531 531
 	if(rw_sum == 0) {
532
+		lock_release(&dset->lock);
532 533
 		return 0;
533 534
 	}
534 535
 
... ...
@@ -540,11 +545,13 @@ int dp_init_relative_weights(ds_set_t *dset)
540 540
 
541 541
 		int current_slice =
542 542
 				dset->dlist[j].attrs.rweight * 100 / rw_sum; //truncate here;
543
+		LM_DBG("rw_sum[%d][%d][%d]\n",j, rw_sum, current_slice);
543 544
 		for(k = 0; k < current_slice; k++) {
544 545
 			dset->rwlist[t] = (unsigned int)j;
545 546
 			t++;
546 547
 		}
547 548
 	}
549
+
548 550
 	/* if the array was not completely filled (i.e., the sum of rweights is
549 551
 	 * less than 100 due to truncated), then use last address to fill the rest */
550 552
 	unsigned int last_insert =
... ...
@@ -557,7 +564,7 @@ int dp_init_relative_weights(ds_set_t *dset)
557 557
 	 * sending first 20 calls to it, but ensure that within a 100 calls,
558 558
 	 * 20 go to first address */
559 559
 	shuffle_uint100array(dset->rwlist);
560
-
560
+	lock_release(&dset->lock);
561 561
 	return 0;
562 562
 }
563 563
 
... ...
@@ -2290,6 +2297,8 @@ static inline void latency_stats_update(ds_latency_stats_t *latency_stats, int l
2290 2290
 		latency_stats->average = latency;
2291 2291
 		latency_stats->estimate = latency;
2292 2292
 	}
2293
+	/* train the average if stable after 10 samples */
2294
+	if (latency_stats->count > 10 && latency_stats->stdev < 0.5) latency_stats->count = 500000;
2293 2295
 	if (latency_stats->min > latency)
2294 2296
 		latency_stats->min = latency;
2295 2297
 	if (latency_stats->max < latency)
... ...
@@ -2329,29 +2338,81 @@ int ds_update_latency(int group, str *address, int code)
2329 2329
 		LM_ERR("destination set [%d] not found\n", group);
2330 2330
 		return -1;
2331 2331
 	}
2332
-
2333
-	while(i < idx->nr) {
2334
-		if(idx->dlist[i].uri.len == address->len
2335
-				&& strncasecmp(idx->dlist[i].uri.s, address->s, address->len)
2336
-						   == 0) {
2337
-
2338
-			/* destination address found */
2339
-			state = idx->dlist[i].flags;
2340
-			ds_latency_stats_t *latency_stats = &idx->dlist[i].latency_stats;
2341
-			if (code == 408 && latency_stats->timeout < UINT32_MAX) {
2332
+	int apply_rweights = 0;
2333
+	int all_gw_congested = 1;
2334
+	int total_congestion_ms = 0;
2335
+	lock_get(&idx->lock);
2336
+	while (i < idx->nr) {
2337
+		ds_dest_t *ds_dest = &idx->dlist[i];
2338
+		ds_latency_stats_t *latency_stats = &ds_dest->latency_stats;
2339
+		if (ds_dest->uri.len == address->len
2340
+				&& strncasecmp(ds_dest->uri.s, address->s, address->len) == 0) {
2341
+			/* Destination address found, this is the gateway that was pinged. */
2342
+			state = ds_dest->flags;
2343
+			if (code == 408 && latency_stats->timeout < UINT32_MAX)
2342 2344
 				latency_stats->timeout++;
2343
-			} else {
2344
-				struct timeval now;
2345
-				gettimeofday(&now, NULL);
2346
-				int latency_ms = (now.tv_sec - latency_stats->start.tv_sec)*1000
2347
-			            + (now.tv_usec - latency_stats->start.tv_usec)/1000;
2348
-				latency_stats_update(latency_stats, latency_ms);
2349
-				LM_DBG("[%d]latency[%d]avg[%.2f][%.*s]code[%d]\n", latency_stats->count, latency_ms,
2350
-					 latency_stats->average, address->len, address->s, code);
2345
+			struct timeval now;
2346
+			gettimeofday(&now, NULL);
2347
+			int latency_ms = (now.tv_sec - latency_stats->start.tv_sec)*1000
2348
+		            + (now.tv_usec - latency_stats->start.tv_usec)/1000;
2349
+			latency_stats_update(latency_stats, latency_ms);
2350
+
2351
+			int congestion_ms = latency_stats->estimate - latency_stats->average;
2352
+			if (congestion_ms < 0) congestion_ms = 0;
2353
+			total_congestion_ms += congestion_ms;
2354
+
2355
+			/* Adjusting weight using congestion detection based on latency estimator. */
2356
+			if (ds_dest->attrs.congestion_control && ds_dest->attrs.weight) {
2357
+				int active_weight = ds_dest->attrs.weight - congestion_ms;
2358
+				if (active_weight <= 0) {
2359
+					active_weight = 0;
2360
+				} else {
2361
+					all_gw_congested = 0;
2362
+				}
2363
+				if (ds_dest->attrs.rweight != active_weight) {
2364
+					apply_rweights = 1;
2365
+					ds_dest->attrs.rweight = active_weight;
2366
+				}
2367
+				LM_DBG("[%d]latency[%d]avg[%.2f][%.*s]code[%d]rweight[%d]cms[%d]\n",
2368
+					latency_stats->count, latency_ms,
2369
+					latency_stats->average, address->len, address->s,
2370
+					code, ds_dest->attrs.rweight, congestion_ms);
2351 2371
 			}
2352
-		}
2372
+		} else {
2373
+			/* Another gateway in the set, we verify if it is congested. */
2374
+			int congestion_ms = latency_stats->estimate - latency_stats->average;
2375
+			if (congestion_ms < 0) congestion_ms = 0;
2376
+			total_congestion_ms += congestion_ms;
2377
+			int active_weight = ds_dest->attrs.weight - congestion_ms;
2378
+			if (active_weight > 0) all_gw_congested = 0;
2379
+		}
2380
+		if (!ds_dest->attrs.congestion_control) all_gw_congested = 0;
2353 2381
 		i++;
2354 2382
 	}
2383
+	/* All the GWs are above their congestion threshold, load distribution will now be based on
2384
+	 * the ratio of congestion_ms each GW is facing. */
2385
+	if (all_gw_congested) {
2386
+		i = 0;
2387
+		while (i < idx->nr) {
2388
+			ds_dest_t *ds_dest = &idx->dlist[i];
2389
+			ds_latency_stats_t *latency_stats = &ds_dest->latency_stats;
2390
+			int congestion_ms = latency_stats->estimate - latency_stats->average;
2391
+			/* We multiply by 2^4 to keep enough precision */
2392
+			int active_weight = (total_congestion_ms << 4) / congestion_ms;
2393
+			if (ds_dest->attrs.rweight != active_weight) {
2394
+				apply_rweights = 1;
2395
+				ds_dest->attrs.rweight = active_weight;
2396
+			}
2397
+			LM_DBG("all gw congested[%d][%d]latency_avg[%.2f][%.*s]code[%d]rweight[%d/%d:%d]cms[%d]\n",
2398
+				        total_congestion_ms, latency_stats->count, latency_stats->average,
2399
+				        address->len, address->s, code, total_congestion_ms, congestion_ms,
2400
+				        ds_dest->attrs.rweight, congestion_ms);
2401
+		i++;
2402
+		}
2403
+	}
2404
+
2405
+	lock_release(&idx->lock);
2406
+	if (apply_rweights) dp_init_relative_weights(idx);
2355 2407
 	return state;
2356 2408
 }
2357 2409
 
... ...
@@ -3099,7 +3160,7 @@ ds_set_t *ds_avl_insert(ds_set_t **root, int id, int *setn)
3099 3099
 		node->id = id;
3100 3100
 		node->longer = AVL_NEITHER;
3101 3101
 		*root = node;
3102
-
3102
+		lock_init(&node->lock);
3103 3103
 		avl_rebalance(rotation_top, id);
3104 3104
 
3105 3105
 		(*setn)++;
... ...
@@ -155,6 +155,7 @@ typedef struct _ds_attrs {
155 155
 	int maxload;
156 156
 	int weight;
157 157
 	int rweight;
158
+	int congestion_control;
158 159
 } ds_attrs_t;
159 160
 
160 161
 typedef struct _ds_latency_stats {
... ...
@@ -195,6 +196,7 @@ typedef struct _ds_set {
195 195
 	unsigned int rwlist[100];
196 196
 	struct _ds_set *next[2];
197 197
 	int longer;
198
+	gen_lock_t lock;
198 199
 } ds_set_t;
199 200
 /* clang-format on */
200 201
 
... ...
@@ -81,7 +81,7 @@
81 81
             <holder>Alessandro Arrichiello, Hewlett Packard</holder>
82 82
         </copyright>
83 83
 	<copyright>
84
-            <year>2017</year>
84
+            <year>2017, 2018</year>
85 85
             <holder>Julien chavanton, Flowroute</holder>
86 86
         </copyright>
87 87
    </bookinfo>
... ...
@@ -1110,6 +1110,19 @@ end
1110 1110
 				will be distributed as 25/50/25. After third host failing
1111 1111
 				distribution will be changed to 33/67/0.
1112 1112
 				</para>
1113
+				<para>
1114
+				Using this algorithm, you can also enable congestion control by setting the
1115
+				attibute 'cc=1', when 'cc' is enabled the 'rweight' attribute will also be
1116
+				used to control congestion tolerance. When facing congestion the weight of
1117
+				a gateway is lowered by 1 for every ms of estimated congestion, a 'rweight'
1118
+				value of 50 is recommended. See the example "configuring load balancing with
1119
+				congestion detection" bellow.
1120
+				</para>
1121
+				<para>
1122
+				The congestion estimation is done using an EWMA (see ds_latency_estimator_alpha).
1123
+				If all the gateways in a set are above their congestion threshold(weight), the
1124
+				load distribution is instead done using the ratio of estimated congestion ms.
1125
+				</para>
1113 1126
 			</listitem>
1114 1127
 			<listitem>
1115 1128
 				<para>
... ...
@@ -1152,6 +1165,48 @@ ds_select_dst("1", "4", "3");
1152 1152
 ...
1153 1153
 </programlisting>
1154 1154
 		</example>
1155
+		<example>
1156
+		<title>configuring load balancing with congestion detection</title>
1157
+		<programlisting format="linespecific">
1158
+...
1159
+# sample of SQL provisionning statements
1160
+INSERT INTO "dispatcher" 
1161
+VALUES(1,1,'sip:192.168.0.1:5060',0,12,'rweight=50;weight=50;cc=1;','');
1162
+INSERT INTO "dispatcher" 
1163
+VALUES(2,1,'sip:192.168.0.2:5060',0,12,'rweight=50;weight=50;cc=1;','');
1164
+...
1165
+modparam("dispatcher", "ds_ping_interval", 1) # ping gateways once/second
1166
+modparam("dispatcher", "ds_ping_latency_stats", 1) # update congestion metrics
1167
+# configure the latency estimator
1168
+modparam("dispatcher", "ds_latency_estimator_alpha", 900)
1169
+...
1170
+if (!ds_select_dst("1", "11")) { # use relative weight based load distribution
1171
+...
1172
+# sample of output from 'kamcmd dispatcher.list'
1173
+DEST: {
1174
+	URI: sip:192.168.0.1:5060
1175
+	FLAGS: AP
1176
+	PRIORITY: 12
1177
+	ATTRS: {
1178
+		BODY: rweight=50;weight=50;cc=1 # configuration values
1179
+		DUID: 
1180
+		MAXLOAD: 0
1181
+		WEIGHT: 50
1182
+		RWEIGHT: 50
1183
+		SOCKET: 
1184
+	}
1185
+	LATENCY: {
1186
+		AVG: 20.104000
1187
+		STD: 1.273000
1188
+		# estimated congestion is currently 25ms = 45ms(EST) -20ms(AVG)
1189
+		EST: 45.005000
1190
+		MAX: 132
1191
+		TIMEOUT: 3
1192
+	}
1193
+}
1194
+...
1195
+</programlisting>
1196
+		</example>
1155 1197
 	</section>
1156 1198
   	<section id="dispatcher.f.ds_select_domain">
1157 1199
  		<title>