Browse code

core: dns slow queries reporting

Julien Chavanton authored on 29/01/2019 18:08:18
Showing 6 changed files
... ...
@@ -309,6 +309,7 @@ DNS_TCP_PREF	dns_tcp_pref|dns_tcp_preference
309 309
 DNS_TLS_PREF	dns_tls_pref|dns_tls_preference
310 310
 DNS_SCTP_PREF	dns_sctp_pref|dns_sctp_preference
311 311
 DNS_RETR_TIME	dns_retr_time
312
+DNS_SLOW_QUERY_MS	dns_slow_query_ms
312 313
 DNS_RETR_NO		dns_retr_no
313 314
 DNS_SERVERS_NO	dns_servers_no
314 315
 DNS_USE_SEARCH	dns_use_search_list
... ...
@@ -723,6 +724,8 @@ IMPORTFILE      "import_file"
723 724
 								return DNS_SCTP_PREF; }
724 725
 <INITIAL>{DNS_RETR_TIME}	{ count(); yylval.strval=yytext;
725 726
 								return DNS_RETR_TIME; }
727
+<INITIAL>{DNS_SLOW_QUERY_MS}	{ count(); yylval.strval=yytext;
728
+								return DNS_SLOW_QUERY_MS; }
726 729
 <INITIAL>{DNS_RETR_NO}	{ count(); yylval.strval=yytext;
727 730
 								return DNS_RETR_NO; }
728 731
 <INITIAL>{DNS_SERVERS_NO}	{ count(); yylval.strval=yytext;
... ...
@@ -337,6 +337,7 @@ extern char *default_routename;
337 337
 %token DNS_TLS_PREF
338 338
 %token DNS_SCTP_PREF
339 339
 %token DNS_RETR_TIME
340
+%token DNS_SLOW_QUERY_MS
340 341
 %token DNS_RETR_NO
341 342
 %token DNS_SERVERS_NO
342 343
 %token DNS_USE_SEARCH
... ...
@@ -827,6 +828,8 @@ assign_stm:
827 828
 	| DNS_SCTP_PREF error { yyerror("number expected"); }
828 829
 	| DNS_RETR_TIME EQUAL NUMBER   { default_core_cfg.dns_retr_time=$3; }
829 830
 	| DNS_RETR_TIME error { yyerror("number expected"); }
831
+	| DNS_SLOW_QUERY_MS EQUAL NUMBER   { default_core_cfg.dns_slow_query_ms=$3; }
832
+	| DNS_SLOW_QUERY_MS error { yyerror("number expected"); }
830 833
 	| DNS_RETR_NO EQUAL NUMBER   { default_core_cfg.dns_retr_no=$3; }
831 834
 	| DNS_RETR_NO error { yyerror("number expected"); }
832 835
 	| DNS_SERVERS_NO EQUAL NUMBER   { default_core_cfg.dns_servers_no=$3; }
... ...
@@ -78,6 +78,7 @@ struct cfg_group_core default_core_cfg = {
78 78
 	10,  /*!< tls transport preference (for naptr) */
79 79
 	20,  /*!< sctp transport preference (for naptr) */
80 80
 	-1, /*!< dns_retr_time */
81
+	0,  /*!< dns_slow_query_ms */
81 82
 	-1, /*!< dns_retr_no */
82 83
 	-1, /*!< dns_servers_no */
83 84
 	1,  /*!< dns_search_list */
... ...
@@ -224,6 +225,8 @@ cfg_def_t core_cfg_def[] = {
224 225
 		"sctp protocol preference when doing NAPTR lookups"},
225 226
 	{"dns_retr_time",	CFG_VAR_INT,	0, 0, 0, resolv_reinit,
226 227
 		"time in s before retrying a dns request"},
228
+	{"dns_slow_query_ms",	CFG_VAR_INT,	0, 0, 0, resolv_reinit,
229
+		"max time in ms before a dns request is considered slow"},
227 230
 	{"dns_retr_no",		CFG_VAR_INT,	0, 0, 0, resolv_reinit,
228 231
 		"number of dns retransmissions before giving up"},
229 232
 	{"dns_servers_no",	CFG_VAR_INT,	0, 0, 0, resolv_reinit,
... ...
@@ -67,6 +67,7 @@ struct cfg_group_core {
67 67
 	int dns_tls_pref;
68 68
 	int dns_sctp_pref;
69 69
 	int dns_retr_time;
70
+	int dns_slow_query_ms;
70 71
 	int dns_retr_no;
71 72
 	int dns_servers_no;
72 73
 	int dns_search_list;
... ...
@@ -51,6 +51,8 @@ struct dns_counters_h dns_cnts_h;
51 51
 counter_def_t dns_cnt_defs[] =  {
52 52
 	{&dns_cnts_h.failed_dns_req, "failed_dns_request", 0, 0, 0,
53 53
 		"incremented each time a DNS request has failed."},
54
+	{&dns_cnts_h.slow_dns_req, "slow_dns_request", 0, 0, 0,
55
+		"incremented each time a DNS request took longer than dns_slow_query_ms."},
54 56
 	{0, 0, 0, 0, 0, 0 }
55 57
 };
56 58
 
... ...
@@ -714,6 +716,8 @@ struct rdata* get_record(char* name, int type, int flags)
714 716
 	int name_len;
715 717
 	struct rdata* fullname_rd;
716 718
 	char c;
719
+	struct timeval start, stop;
720
+	int slow_query_ms = cfg_get(core, core_cfg, dns_slow_query_ms);
717 721
 
718 722
 	name_len=strlen(name);
719 723
 
... ...
@@ -735,8 +739,21 @@ struct rdata* get_record(char* name, int type, int flags)
735 739
 	}
736 740
 	fullname_rd=0;
737 741
 
742
+	if (slow_query_ms > 0)
743
+		gettimeofday(&start, NULL);
744
+
738 745
 	size=dns_func.sr_res_search(name, C_IN, type, buff.buff, sizeof(buff));
739 746
 
747
+	if (slow_query_ms > 0) {
748
+		gettimeofday(&stop, NULL);
749
+		int latency_ms = (stop.tv_sec - start.tv_sec)*1000
750
+                + (stop.tv_usec - start.tv_usec)/1000;
751
+		if (slow_query_ms < latency_ms) {
752
+			LOG(cfg_get(core, core_cfg, latency_log), "res_search[%d][%s]elapsed[%dms]\n", type, name, latency_ms);
753
+			counter_inc(dns_cnts_h.slow_dns_req);
754
+		}
755
+	}
756
+
740 757
 	if (unlikely(size<0)) {
741 758
 		LM_DBG("lookup(%s, %d) failed\n", name, type);
742 759
 		goto not_found;
... ...
@@ -65,10 +65,11 @@
65 65
 #define RES_ONLY_TYPE 1   /* return only the specified type records */
66 66
 #define RES_AR		  2   /* return also the additional records */
67 67
 
68
-/* counter for failed DNS requests
68
+/* counter for failed/slow DNS requests
69 69
 */
70 70
 struct dns_counters_h {
71 71
     counter_handle_t failed_dns_req;
72
+    counter_handle_t slow_dns_req;
72 73
 };
73 74
 
74 75
 extern struct dns_counters_h dns_cnts_h;