blob: 5faf7ca51ab36d624b948a409af76677b0d3ed1f [file] [log] [blame]
Bernie Innocenti318ed2d2018-08-30 04:05:20 +09001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Ken Chen5471dca2019-04-15 15:25:35 +080016#define LOG_TAG "resolv"
Bernie Innocenti9f05f5e2018-09-12 23:20:10 +090017
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090018#include <arpa/nameser.h>
Bernie Innocenti8ad893f2018-08-31 14:09:46 +090019#include <stdbool.h>
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090020#include <string.h>
21
Bernie Innocenti9f05f5e2018-09-12 23:20:10 +090022#include <android-base/logging.h>
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090023
Bernie Innocentiac18b122018-10-01 23:10:18 +090024#include "netd_resolv/stats.h"
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090025
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090026
Bernie Innocenti8fca66a2018-09-25 14:23:19 +090027// Calculate the round-trip-time from start time t0 and end time t1.
28int _res_stats_calculate_rtt(const timespec* t1, const timespec* t0) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090029 // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious)
30 long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000;
31 long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000;
32 return (int) (ms1 - ms0);
33}
34
Bernie Innocenti8fca66a2018-09-25 14:23:19 +090035// Create a sample for calculating server reachability statistics.
Bernie Innocentiac18b122018-10-01 23:10:18 +090036void _res_stats_set_sample(res_sample* sample, time_t now, int rcode, int rtt) {
chenbruceacb832c2019-02-20 19:45:50 +080037 LOG(INFO) << __func__ << ": rcode = " << rcode << ", sec = " << rtt;
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090038 sample->at = now;
39 sample->rcode = rcode;
40 sample->rtt = rtt;
41}
42
43/* Clears all stored samples for the given server. */
Bernie Innocentiac18b122018-10-01 23:10:18 +090044void _res_stats_clear_samples(res_stats* stats) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090045 stats->sample_count = stats->sample_next = 0;
46}
47
48/* Aggregates the reachability statistics for the given server based on on the stored samples. */
Bernie Innocentiac18b122018-10-01 23:10:18 +090049void android_net_res_stats_aggregate(res_stats* stats, int* successes, int* errors, int* timeouts,
50 int* internal_errors, int* rtt_avg, time_t* last_sample_time) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090051 int s = 0; // successes
52 int e = 0; // errors
53 int t = 0; // timouts
54 int ie = 0; // internal errors
55 long rtt_sum = 0;
56 time_t last = 0;
57 int rtt_count = 0;
Bernie Innocenti8ad893f2018-08-31 14:09:46 +090058 for (int i = 0; i < stats->sample_count; ++i) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090059 // Treat everything as an error that the code in send_dg() already considers a
60 // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN
61 // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section
62 // is not treated as an error here either. FORMERR seems to sometimes be returned by
63 // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses
64 // as an indication of a broken server is unclear, though. For now treat such responses,
65 // as well as unknown codes as errors.
66 switch (stats->samples[i].rcode) {
Bernie Innocenti8ad893f2018-08-31 14:09:46 +090067 case NOERROR:
68 case NOTAUTH:
69 case NXDOMAIN:
70 ++s;
71 rtt_sum += stats->samples[i].rtt;
72 ++rtt_count;
73 break;
74 case RCODE_TIMEOUT:
75 ++t;
76 break;
77 case RCODE_INTERNAL_ERROR:
78 ++ie;
79 break;
80 case SERVFAIL:
81 case NOTIMP:
82 case REFUSED:
83 default:
84 ++e;
85 break;
Bernie Innocenti318ed2d2018-08-30 04:05:20 +090086 }
87 }
88 *successes = s;
89 *errors = e;
90 *timeouts = t;
91 *internal_errors = ie;
92 /* If there was at least one successful sample, calculate average RTT. */
93 if (rtt_count) {
94 *rtt_avg = rtt_sum / rtt_count;
95 } else {
96 *rtt_avg = -1;
97 }
98 /* If we had at least one sample, populate last sample time. */
99 if (stats->sample_count > 0) {
100 if (stats->sample_next > 0) {
101 last = stats->samples[stats->sample_next - 1].at;
102 } else {
103 last = stats->samples[stats->sample_count - 1].at;
104 }
105 }
106 *last_sample_time = last;
107}
108
waynema37255182019-03-18 13:22:56 +0800109// Returns true if the server is considered usable, i.e. if the success rate is not lower than the
Bernie Innocenti8fca66a2018-09-25 14:23:19 +0900110// threshold for the stored stored samples. If not enough samples are stored, the server is
111// considered usable.
Bernie Innocenti758005f2019-02-19 18:08:36 +0900112static bool res_stats_usable_server(const res_params* params, res_stats* stats) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900113 int successes = -1;
114 int errors = -1;
115 int timeouts = -1;
116 int internal_errors = -1;
117 int rtt_avg = -1;
118 time_t last_sample_time = 0;
119 android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors,
Bernie Innocenti8ad893f2018-08-31 14:09:46 +0900120 &rtt_avg, &last_sample_time);
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900121 if (successes >= 0 && errors >= 0 && timeouts >= 0) {
122 int total = successes + errors + timeouts;
Ken Chenffc224a2019-03-19 17:41:28 +0800123 LOG(INFO) << __func__ << ": NS stats: S " << successes << " + E " << errors << " + T "
124 << timeouts << " + I " << internal_errors << " = " << total
125 << ", rtt = " << rtt_avg << ", min_samples = " << unsigned(params->min_samples);
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900126 if (total >= params->min_samples && (errors > 0 || timeouts > 0)) {
127 int success_rate = successes * 100 / total;
Ken Chenffc224a2019-03-19 17:41:28 +0800128 LOG(INFO) << __func__ << ": success rate " << success_rate;
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900129 if (success_rate < params->success_threshold) {
Bernie Innocentiafaacf72018-08-30 07:34:37 +0900130 time_t now = time(NULL);
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900131 if (now - last_sample_time > params->sample_validity) {
132 // Note: It might be worth considering to expire old servers after their expiry
133 // date has been reached, however the code for returning the ring buffer to its
134 // previous non-circular state would induce additional complexity.
Ken Chenffc224a2019-03-19 17:41:28 +0800135 LOG(INFO) << __func__ << ": samples stale, retrying server";
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900136 _res_stats_clear_samples(stats);
137 } else {
Ken Chenffc224a2019-03-19 17:41:28 +0800138 LOG(INFO) << __func__ << ": too many resolution errors, ignoring server";
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900139 return 0;
140 }
141 }
142 }
143 }
144 return 1;
145}
146
Luke Huang70931aa2019-01-31 11:57:41 +0800147int android_net_res_stats_get_usable_servers(const res_params* params, res_stats stats[],
148 int nscount, bool usable_servers[]) {
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900149 unsigned usable_servers_found = 0;
150 for (int ns = 0; ns < nscount; ns++) {
Bernie Innocenti8fca66a2018-09-25 14:23:19 +0900151 bool usable = res_stats_usable_server(params, &stats[ns]);
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900152 if (usable) {
153 ++usable_servers_found;
154 }
155 usable_servers[ns] = usable;
156 }
157 // If there are no usable servers, consider all of them usable.
158 // TODO: Explore other possibilities, such as enabling only the best N servers, etc.
159 if (usable_servers_found == 0) {
160 for (int ns = 0; ns < nscount; ns++) {
161 usable_servers[ns] = true;
162 }
163 }
Luke Huang70931aa2019-01-31 11:57:41 +0800164 return (usable_servers_found == 0) ? nscount : usable_servers_found;
Bernie Innocenti318ed2d2018-08-30 04:05:20 +0900165}