Browse code

dns: srv load balancing distribution fix

The load distribution when doing SRV based load balancing was not
completely fair. The first listed server had always an extra
(sum_of_weights-weight)/(sum_of_weights*(sum_of_weights+1) chance
of being selected.
For example for 2 servers both with weight 50, the first one
will have an extra (100-50)/100*101 change of being selected =>
extra 0.495% which gives a 50.495% - 49.505% distribution
instead of 50% - 50%. For large weight values this doesn't make a big
difference (e.g. for 500-500 => 50.049% instead of 50%) but for
small values it becomes very noticeable (e.g. for 1-1 =>
66% - 33% distribution instead of 50%-50%).

The chance of selecting a 0-weight record was also changed. It was
1/(sum_of_weights+1) and now is 1/(sum_of_weights*1000).

Reported-by: Michal Matyska michal.matyska iptel org

Andrei Pelinescu-Onciul authored on 09/03/2012 18:12:59
Showing 1 changed files
... ...
@@ -100,6 +100,8 @@
100 100
 #define DNS_HE_MAX_ADDR 10  /* maxium addresses returne in a hostent struct */
101 101
 #define MAX_CNAME_CHAIN  10
102 102
 #define SPACE_FORMAT "    " /* format of view output */
103
+#define DNS_SRV_ZERO_W_CHANCE	1000 /* one in a 1000*weight_sum chance for
104
+										selecting a 0-weight record */
103 105
 
104 106
 int dns_cache_init=1;	/* if 0, the DNS cache is not initialized at startup */
105 107
 static gen_lock_t* dns_hash_lock=0;
... ...
@@ -2286,6 +2288,7 @@ inline static struct dns_rr* dns_srv_get_nxt_rr(struct dns_hash_entry* e,
2286 2286
 	unsigned rand_w;
2287 2287
 	int found;
2288 2288
 	int saved_idx;
2289
+	int zero_weight; /* number of records with 0 weight */
2289 2290
 	int i, idx;
2290 2291
 	struct r_sums_entry{
2291 2292
 			unsigned r_sum;
... ...
@@ -2307,6 +2310,7 @@ retry:
2307 2307
 	prio=((struct srv_rdata*)start_grp->rdata)->priority;
2308 2308
 	sum=0;
2309 2309
 	saved_idx=-1;
2310
+	zero_weight = 0;
2310 2311
 	found=0;
2311 2312
 	for (idx=0;rr && (prio==((struct srv_rdata*)rr->rdata)->priority) &&
2312 2313
 						(idx < MAX_SRV_GRP_IDX); idx++, rr=rr->next){
... ...
@@ -2328,6 +2332,7 @@ retry:
2328 2328
 		if ((saved_idx==-1) || (((struct srv_rdata*)rr->rdata)->weight==0)){
2329 2329
 			saved_idx=idx;
2330 2330
 		}
2331
+		zero_weight += (((struct srv_rdata*)rr->rdata)->weight == 0);
2331 2332
 		sum+=((struct srv_rdata*)rr->rdata)->weight;
2332 2333
 		r_sums[idx].r_sum=sum;
2333 2334
 		r_sums[idx].rr=rr;
... ...
@@ -2338,10 +2343,15 @@ retry:
2338 2338
 		n+=idx; /* next group start idx, last rr */
2339 2339
 		srv_reset_tried(tried);
2340 2340
 		goto retry;
2341
-	}else if ((found==1) || ((rand_w=dns_srv_random(sum))==0)){
2342
-		/* 1. if only one found, avoid a useless random() call or
2343
-		 * 2. if rand_w==0, immediately select a 0 weight record if present,
2344
-		 *     or else the first record found
2341
+	}else if ((found==1) || (sum==0) ||
2342
+				(((rand_w=(dns_srv_random(sum-1)+1))==1) && zero_weight &&
2343
+					(dns_srv_random(DNS_SRV_ZERO_W_CHANCE)==0))){
2344
+		/* 1. if only one found, avoid a useless random() call
2345
+		      and select it (saved_idx will point to it).
2346
+		 * 2. if the sum of weights is 0 (all have 0 weight) or
2347
+		 * 3. rand_w==1 and there are records with 0 weight and
2348
+		 *    random(probab. of selecting a 0-weight)
2349
+		 *     immediately select a 0 weight record.
2345 2350
 		 *  (this takes care of the 0-weight at the beginning requirement) */
2346 2351
 		i=saved_idx; /* saved idx contains either first 0 weight or first
2347 2352
 						valid record */