Browse code

- fix: incorrect iteration through A & AAAA records - support for SRV weight based load balancing as described in rfc2782 (as opposed to simple failover)

Andrei Pelinescu-Onciul authored on 14/06/2007 23:29:31
Showing 2 changed files
... ...
@@ -30,13 +30,20 @@
30 30
  * --------
31 31
  *  2006-07-13  created by andrei
32 32
  *  2006-10-06  port fix (andrei)
33
+ *  2007-06-14  dns iterate through A & AAAA records fix (andrei)
34
+ *  2007-06-15  srv rr weight based load balancing support (andrei)
33 35
  */
34 36
 
35 37
 #ifdef USE_DNS_CACHE
36 38
 
39
+#ifdef DNS_SRV_LB
40
+#include <stdlib.h> /* FIXME: rand() */
41
+#endif
42
+
37 43
 #include "globals.h"
38 44
 #include "dns_cache.h"
39 45
 #include "dns_wrappers.h"
46
+#include "compiler_opt.h"
40 47
 #include "mem/shm_mem.h"
41 48
 #include "hashes.h"
42 49
 #include "clist.h"
... ...
@@ -47,6 +54,7 @@
47 47
 #include "timer_ticks.h"
48 48
 #include "error.h"
49 49
 #include "rpc.h"
50
+#include "rand/fastrand.h"
50 51
 
51 52
 
52 53
 
... ...
@@ -80,6 +88,7 @@ unsigned int dns_cache_min_ttl=DEFAULT_DNS_CACHE_MIN_TTL; /* minimum ttl */
80 80
 unsigned int dns_timer_interval=DEFAULT_DNS_TIMER_INTERVAL; /* in s */
81 81
 int dns_flags=0; /* default flags used for the  dns_*resolvehost 
82 82
                     (compatibility wrappers) */
83
+int dns_srv_lb=1; /* off by default */
83 84
 
84 85
 #define LOCK_DNS_HASH()		lock_get(dns_hash_lock)
85 86
 #define UNLOCK_DNS_HASH()	lock_release(dns_hash_lock)
... ...
@@ -276,7 +285,14 @@ int init_dns_cache()
276 276
 	if (dns_flags & DNS_IPV4_ONLY){
277 277
 		dns_flags&=~(DNS_IPV6_ONLY|DNS_IPV6_FIRST);
278 278
 	}
279
-			;
279
+	if (dns_srv_lb){
280
+#ifdef DNS_SRV_LB
281
+		dns_flags|=DNS_SRV_RR_LB;
282
+#else
283
+		LOG(L_WARN, "WARING: dns_cache_init: SRV loadbalaning is set, but"
284
+					" support for it is not compiled -- ignoring\n");
285
+#endif
286
+	}
280 287
 	dns_timer_h=timer_alloc();
281 288
 	if (dns_timer_h==0){
282 289
 		ret=E_OUT_OF_MEM;
... ...
@@ -1526,7 +1542,7 @@ end:
1526 1526
 /* tries to lookup (name, type) in the hash and if not found tries to make
1527 1527
  *  a dns request
1528 1528
  *  return: 0 on error, pointer to a dns_hash_entry on success
1529
- *  WARNING: when *   not needed anymore dns_hash_put() must be called! */
1529
+ *  WARNING: when not needed anymore dns_hash_put() must be called! */
1530 1530
 inline static struct dns_hash_entry* dns_get_entry(str* name, int type)
1531 1531
 {
1532 1532
 	int h;
... ...
@@ -1614,6 +1630,138 @@ inline static struct dns_rr* dns_entry_get_rr(	struct dns_hash_entry* e,
1614 1614
 }
1615 1615
 
1616 1616
 
1617
+#ifdef DNS_SRV_LB
1618
+
1619
+#define srv_reset_tried(p)	(*(p)=0)
1620
+#define srv_marked(p, i)	(*(p)&(1UL<<(i)))
1621
+#define srv_mark_tried(p, i)	\
1622
+	do{ \
1623
+		(*(p)|=(1UL<<(i))); \
1624
+	}while(0)
1625
+
1626
+#define srv_next_rr(n, f, i) srv_mark_tried(f, i)
1627
+
1628
+/* returns a random number between 0 and max inclusive (0<=r<=max) */
1629
+inline static unsigned dns_srv_random(unsigned max)
1630
+{
1631
+	return fastrand_max(max);
1632
+}
1633
+
1634
+/* for a SRV record it will return the next entry to be tried according
1635
+ * to the RFC2782 server selection mechanism
1636
+ * params:
1637
+ *    e     is a dns srv hash entry
1638
+ *    no    is the start index of the current group (a group is a set of SRV 
1639
+ *          rrs with the same priority)
1640
+ *    tried is a bitmap where the tried srv rrs of the same priority are 
1641
+ *          marked
1642
+ *    now - current time/ticks value
1643
+ * returns pointer to the rr on success and sets no to the rr number
1644
+ *         0 on error and fills the error flags
1645
+ * WARNING: unlike dns_entry_get_rr() this will always return another
1646
+ *           another rr automatically (*no must not be incremented)
1647
+ *
1648
+ * Example usage:
1649
+ * list all non-expired, non-bad-marked, never tried before srv records
1650
+ * using the rfc2782 algo:
1651
+ * e=dns_get_entry(name, T_SRV);
1652
+ * if (e){
1653
+ *    no=0;
1654
+ *    srv_reset_tried(&tried);
1655
+ *    now=get_ticks_raw();
1656
+ *    while(rr=dns_srv_get_nxt_rr(e, &tried, &no, now){
1657
+ *       DBG("address %d\n", *no);
1658
+ *     }
1659
+ *  }
1660
+ *
1661
+ */
1662
+inline static struct dns_rr* dns_srv_get_nxt_rr(struct dns_hash_entry* e,
1663
+											 srv_flags_t* tried,
1664
+											 unsigned char* no, ticks_t now)
1665
+{
1666
+#define MAX_SRV_GRP_IDX		(sizeof(srv_flags_t)*8) 
1667
+	struct dns_rr* rr;
1668
+	struct dns_rr* start_grp;
1669
+	int n;
1670
+	unsigned sum;
1671
+	unsigned prio;
1672
+	unsigned rand_w;
1673
+	int found;
1674
+	int saved_idx;
1675
+	int i, idx;
1676
+	struct r_sums_entry{
1677
+			unsigned r_sum;
1678
+			struct dns_rr* rr;
1679
+			}r_sums[MAX_SRV_GRP_IDX];
1680
+	
1681
+	rand_w=0;
1682
+	for(rr=e->rr_lst, n=0;rr && (n<*no);rr=rr->next, n++);/* skip *no records*/
1683
+	
1684
+retry:
1685
+	if (unlikely(rr==0))
1686
+		goto no_more_rrs;
1687
+	start_grp=rr;
1688
+	prio=((struct srv_rdata*)start_grp->rdata)->priority;
1689
+	sum=0;
1690
+	saved_idx=-1;
1691
+	found=0;
1692
+	for (idx=0;rr && (prio==((struct srv_rdata*)rr->rdata)->priority) &&
1693
+						(idx < MAX_SRV_GRP_IDX); idx++, rr=rr->next){
1694
+		if ( ((s_ticks_t)(now-rr->expire)>=0) /* expired entry */ ||
1695
+				(rr->err_flags) /* bad rr */ ||
1696
+				(srv_marked(tried, idx)) ) /* already tried */{
1697
+			r_sums[idx].r_sum=0; /* 0 sum, to skip over it */
1698
+			r_sums[idx].rr=0;    /* debug: mark it as unused */
1699
+			continue;
1700
+		}
1701
+		/* special case, 0 weight records should be "first":
1702
+		 * remember the first rr int the "virtual" list: A 0 weight must
1703
+		 *  come first if present, else get the first one */
1704
+		if ((saved_idx==-1) || (((struct srv_rdata*)rr->rdata)->weight==0)){
1705
+			saved_idx=idx;
1706
+		}
1707
+		sum+=((struct srv_rdata*)rr->rdata)->weight;
1708
+		r_sums[idx].r_sum=sum;
1709
+		r_sums[idx].rr=rr;
1710
+		found++;
1711
+	}
1712
+	if (found==0){
1713
+		/* try in the next priority group */
1714
+		n+=idx; /* next group start idx, last rr */
1715
+		srv_reset_tried(tried);
1716
+		goto retry;
1717
+	}else if ((found==1) || ((rand_w=dns_srv_random(sum))==0)){
1718
+		/* 1. if only one found, avoid a useless random() call or
1719
+		 * 2. if rand_w==0, immediately select a 0 weight record if present, 
1720
+		 *     or else the first record found
1721
+		 *  (this takes care of the 0-weight at the beginning requirement) */
1722
+		i=saved_idx; /* saved idx contains either first 0 weight or first
1723
+						valid record */
1724
+		goto found;
1725
+	}
1726
+	/* if we are here => rand_w is not 0 and we have at least 2 valid options
1727
+	 * => we can safely iterate on the whole r_sums[] whithout any other
1728
+	 * extra checks */
1729
+	for (i=0; (i<idx) && (r_sums[i].r_sum<rand_w); i++);
1730
+found:
1731
+#ifdef DNS_CACHE_DEBUG
1732
+	DBG("dns_srv_get_nxt_rr(%p, %lx, %d, %u): selected %d/%d in grp. %d"
1733
+			" (rand_w=%d, rr=%p p=%d w=%d rsum=%d)\n",
1734
+		e, (unsigned long)*tried, *no, now, i, idx, n, rand_w, r_sums[i].rr,
1735
+		((struct srv_rdata*)r_sums[i].rr->rdata)->priority,
1736
+		((struct srv_rdata*)r_sums[i].rr->rdata)->weight, r_sums[i].r_sum);
1737
+#endif
1738
+	/* i is the winner */
1739
+	*no=n; /* grp. start */
1740
+	srv_mark_tried(tried, i); /* mark it */
1741
+	return r_sums[i].rr;
1742
+no_more_rrs:
1743
+	*no=n;
1744
+	return 0;
1745
+}
1746
+#endif /* DNS_SRV_LB */
1747
+
1748
+
1617 1749
 
1618 1750
 /* gethostbyname compatibility: converts a dns_hash_entry structure 
1619 1751
  * to a statical internal hostent structure
... ...
@@ -1930,7 +2078,7 @@ skip_srv:
1930 1930
  *                  returned ip
1931 1931
  * returns 0 on success, <0 on error (see the error codes),
1932 1932
  *         fills e, ip and rr_no
1933
- *          On end of records (when use to iterate on all the ips) it
1933
+ *          On end of records (when used to iterate on all the ips) it
1934 1934
  *          will return E_DNS_EOR (you should not log an error for this
1935 1935
  *          value, is just a signal that the address list end has been reached)
1936 1936
  * WARNING: dns_hash_put(*e) must be called when you don't need
... ...
@@ -2043,17 +2191,45 @@ int dns_ip_resolve(struct dns_hash_entry** e, unsigned char* rr_no,
2043 2043
 {
2044 2044
 	int ret;
2045 2045
 	
2046
-	if ((flags&(DNS_IPV6_FIRST|DNS_IPV6_ONLY))){
2047
-		ret=dns_aaaa_resolve(e, rr_no, name, ip);
2048
-		if (ret>=0) return ret;
2049
-	}else{
2046
+	ret=-E_DNS_NO_IP; 
2047
+	if (*e==0){ /* first call */
2048
+		if ((flags&(DNS_IPV6_FIRST|DNS_IPV6_ONLY))){
2049
+			ret=dns_aaaa_resolve(e, rr_no, name, ip);
2050
+			if (ret>=0) return ret;
2051
+		}else{
2052
+			ret=dns_a_resolve(e, rr_no, name, ip);
2053
+			if (ret>=0) return ret;
2054
+		}
2055
+		if (flags&DNS_IPV6_FIRST){
2056
+			ret=dns_a_resolve(e, rr_no, name, ip);
2057
+		}else if (!(flags&(DNS_IPV6_ONLY|DNS_IPV4_ONLY))){
2058
+			ret=dns_aaaa_resolve(e, rr_no, name, ip);
2059
+		}
2060
+	}else if ((*e)->type==T_A){
2061
+		/* continue A resolving */
2050 2062
 		ret=dns_a_resolve(e, rr_no, name, ip);
2051 2063
 		if (ret>=0) return ret;
2052
-	}
2053
-	if (flags&DNS_IPV6_FIRST){
2054
-		ret=dns_a_resolve(e, rr_no, name, ip);
2055
-	}else if (!(flags&(DNS_IPV6_ONLY|DNS_IPV4_ONLY))){
2064
+		if (!(flags&(DNS_IPV6_ONLY|DNS_IPV6_FIRST|DNS_IPV4_ONLY))){
2065
+			/* not found, try with AAAA */
2066
+			dns_hash_put(*e);
2067
+			*e=0;
2068
+			*rr_no=0;
2069
+			ret=dns_aaaa_resolve(e, rr_no, name, ip);
2070
+		}
2071
+	}else if ((*e)->type==T_AAAA){
2072
+		/* continue AAAA resolving */
2056 2073
 		ret=dns_aaaa_resolve(e, rr_no, name, ip);
2074
+		if (ret>=0) return ret;
2075
+		if ((flags&DNS_IPV6_FIRST) && !(flags&DNS_IPV6_ONLY)){
2076
+			/* not found, try with A */
2077
+			dns_hash_put(*e);
2078
+			*e=0;
2079
+			*rr_no=0;
2080
+			ret=dns_a_resolve(e, rr_no, name, ip);
2081
+		}
2082
+	}else{
2083
+		LOG(L_CRIT, "BUG: dns_ip_resolve: invalid record type %d\n", 
2084
+					(*e)->type);
2057 2085
 	}
2058 2086
 	return ret;
2059 2087
 }
... ...
@@ -2061,10 +2237,23 @@ int dns_ip_resolve(struct dns_hash_entry** e, unsigned char* rr_no,
2061 2061
 
2062 2062
 
2063 2063
 /*  gets the first srv record starting at rr_no
2064
- *  (similar to dns_a_resolve but for srv, sets host, port)
2064
+ *  Next call will return the next record a.s.o.
2065
+ *  (similar to dns_a_resolve but for srv, sets host, port and automatically
2066
+ *   switches to the next record in the future)
2067
+ *
2068
+ *   if DNS_SRV_LB and tried!=NULL will do random weight based selection
2069
+ *   for choosing between SRV RRs with the same priority (as described in
2070
+ *    RFC2782).
2071
+ *   If tried==NULL or DNS_SRV_LB is not defined => always returns next
2072
+ *    record in the priority order and for records with the same priority
2073
+ *     the record with the higher weight (from the remaining ones)
2065 2074
  */
2066
-int dns_srv_resolve(struct dns_hash_entry** e, unsigned char* rr_no,
2067
-					str* name, str* host, unsigned short* port)
2075
+int dns_srv_resolve_nxt(struct dns_hash_entry** e,
2076
+#ifdef DNS_SRV_LB
2077
+						srv_flags_t* tried,
2078
+#endif
2079
+						unsigned char* rr_no,
2080
+						str* name, str* host, unsigned short* port)
2068 2081
 {
2069 2082
 	struct dns_rr* rr;
2070 2083
 	int ret;
... ...
@@ -2077,10 +2266,22 @@ int dns_srv_resolve(struct dns_hash_entry** e, unsigned char* rr_no,
2077 2077
 			goto error;
2078 2078
 		/* found it */
2079 2079
 		*rr_no=0;
2080
+#ifdef DNS_SRV_LB
2081
+		if (tried)
2082
+			srv_reset_tried(tried);
2083
+#endif
2080 2084
 		ret=-E_DNS_BAD_SRV_ENTRY;
2081 2085
 	}
2082 2086
 	now=get_ticks_raw();
2083
-	rr=dns_entry_get_rr(*e, rr_no, now);
2087
+#ifdef DNS_SRV_LB
2088
+	if (tried){
2089
+		rr=dns_srv_get_nxt_rr(*e, tried, rr_no, now);
2090
+	}else
2091
+#endif
2092
+	{
2093
+		rr=dns_entry_get_rr(*e, rr_no, now);
2094
+		(*rr_no)++; /* try next record next time */
2095
+	}
2084 2096
 	if (rr){
2085 2097
 		host->s=((struct srv_rdata*)rr->rdata)->name;
2086 2098
 		host->len=((struct srv_rdata*)rr->rdata)->name_len;
... ...
@@ -2111,29 +2312,38 @@ int dns_srv_resolve_ip(struct dns_srv_handle* h,
2111 2111
 	host.len=0;
2112 2112
 	host.s=0;
2113 2113
 	do{
2114
-		if (h->a==0){ 
2115
-			if ((ret=dns_srv_resolve(&h->srv, &h->srv_no,
2116
-													name, &host, port))<0)
2114
+		if (h->a==0){
2115
+#ifdef DNS_SRV_LB
2116
+			if ((ret=dns_srv_resolve_nxt(&h->srv, 
2117
+								(flags & DNS_SRV_RR_LB)?&h->srv_tried_rrs:0,
2118
+								&h->srv_no,
2119
+								name, &host, port))<0)
2117 2120
 				goto error;
2121
+#else
2122
+			if ((ret=dns_srv_resolve_nxt(&h->srv, &h->srv_no,
2123
+								name, &host, port))<0)
2124
+				goto error;
2125
+#endif
2118 2126
 			h->port=*port; /* store new port */
2119 2127
 		}else{
2120 2128
 			*port=h->port; /* return the stored port */
2121 2129
 		}
2122 2130
 		if ((ret=dns_ip_resolve(&h->a, &h->ip_no, &host, ip, flags))<0){
2123 2131
 			/* couldn't find any good ip for this record, try the next one */
2124
-			h->srv_no++;
2125 2132
 			if (h->a){
2126 2133
 				dns_hash_put(h->a);
2127 2134
 				h->a=0;
2128 2135
 			}
2129 2136
 		}else if (h->a==0){
2130 2137
 			/* this was an ip, try the next srv record in the future */
2131
-			h->srv_no++;
2132 2138
 		}
2133 2139
 	}while(ret<0);
2134 2140
 error:
2141
+#ifdef DNS_CACHE_DEBUG
2135 2142
 	DBG("dns_srv_resolve_ip(\"%.*s\", %d, %d), ret=%d, ip=%s\n", 
2136
-			name->len, name->s, h->srv_no, h->ip_no, ret, ip_addr2a(ip));
2143
+			name->len, name->s, h->srv_no, h->ip_no, ret, 
2144
+			ip?ZSW(ip_addr2a(ip)):"");
2145
+#endif
2137 2146
 	return ret;
2138 2147
 }
2139 2148
 
... ...
@@ -2143,11 +2353,11 @@ error:
2143 2143
  * if *port!=0.
2144 2144
  * when performing SRV lookup (*port==0) it will use proto to look for
2145 2145
  * tcp or udp hosts, otherwise proto is unused; if proto==0 => no SRV lookup
2146
- * dns_res_h must be initialized prior to  calling this function and can be
2147
- * used to get the subsequent ips
2146
+ * h must be initialized prior to  calling this function and can be used to 
2147
+ * get the subsequent ips
2148 2148
  * returns:  <0 on error
2149 2149
  *            0 on success and it fills *ip, *port, dns_sip_resolve_h
2150
- * WARNING: when finished, dns_sip_resolve_put(dns_res_h) must be called!
2150
+ * WARNING: when finished, dns_sip_resolve_put(h) must be called!
2151 2151
  */
2152 2152
 int dns_sip_resolve(struct dns_srv_handle* h,  str* name,
2153 2153
 						struct ip_addr* ip, unsigned short* port, int proto,
... ...
@@ -2172,7 +2382,7 @@ int dns_sip_resolve(struct dns_srv_handle* h,  str* name,
2172 2172
 		return -E_DNS_NO_SRV;
2173 2173
 	}
2174 2174
 	len=0;
2175
-	if ((h->srv==0) && (h->a==0)){
2175
+	if ((h->srv==0) && (h->a==0)){ /* first call */
2176 2176
 		h->port=(proto==PROTO_TLS)?SIPS_PORT:SIP_PORT; /* just in case we
2177 2177
 														don't find another */
2178 2178
 		if (port){
... ...
@@ -2234,8 +2444,10 @@ int dns_sip_resolve(struct dns_srv_handle* h,  str* name,
2234 2234
 					if ((ret=dns_srv_resolve_ip(h, &srv_name, ip,
2235 2235
 															port, flags))>=0)
2236 2236
 					{
2237
+#ifdef DNS_CACHE_DEBUG
2237 2238
 						DBG("dns_sip_resolve(%.*s, %d, %d), srv0, ret=%d\n", 
2238 2239
 							name->len, name->s, h->srv_no, h->ip_no, ret);
2240
+#endif
2239 2241
 						return ret;
2240 2242
 					}
2241 2243
 				}
... ...
@@ -46,6 +46,9 @@
46 46
 #error "DNS FAILOVER requires DNS CACHE support (define USE_DNS_CACHE)"
47 47
 #endif
48 48
 
49
+/* uncomment the define below for SRV weight based load balancing */
50
+#define DNS_SRV_LB
51
+
49 52
 #define DNS_LU_LST
50 53
 
51 54
 /* dns functions return them as negative values (e.g. return -E_DNS_NO_IP)
... ...
@@ -76,6 +79,7 @@ enum dns_errors{
76 76
 
77 77
 
78 78
 extern int dns_flags; /* default flags used for dns lookup */
79
+extern int dns_srv_lb; /* default SRV LB support value */
79 80
 
80 81
 /* return a short string, printable error description (err <=0) */
81 82
 const char* dns_strerror(int err);
... ...
@@ -88,6 +92,7 @@ const char* dns_strerror(int err);
88 88
 #define DNS_IPV4_ONLY	1
89 89
 #define DNS_IPV6_ONLY	2
90 90
 #define DNS_IPV6_FIRST	4
91
+#define DNS_SRV_RR_LB		8  /* SRV RR weight based load balancing */
91 92
 
92 93
 
93 94
 /* ip blacklist error flags */
... ...
@@ -136,10 +141,14 @@ struct dns_hash_entry{
136 136
 };
137 137
 
138 138
 
139
+typedef unsigned int srv_flags_t;
139 140
 
140 141
 struct dns_srv_handle{
141 142
 	struct dns_hash_entry* srv; /* srv entry */
142 143
 	struct dns_hash_entry* a;   /* a or aaaa current entry */
144
+#ifdef DNS_SRV_LB
145
+	srv_flags_t srv_tried_rrs;
146
+#endif
143 147
 	unsigned short port; /* current port */
144 148
 	unsigned char srv_no; /* current record no. in the srv entry */
145 149
 	unsigned char ip_no;   /* current record no. in the a/aaaa entry */
... ...
@@ -232,6 +241,9 @@ inline static void dns_srv_handle_init(struct dns_srv_handle* h)
232 232
 {
233 233
 	h->srv=h->a=0;
234 234
 	h->srv_no=h->ip_no=0;
235
+#ifdef DNS_SRV_LB
236
+	h->srv_tried_rrs=0;
237
+#endif
235 238
 }
236 239
 
237 240