blob: d14fdeb1e2af9d4df1e5b5879985c20c814724b3 [file] [log] [blame]
Mike Yubab3daa2018-10-19 22:11:43 +08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Ken Chen5471dca2019-04-15 15:25:35 +080017#define LOG_TAG "resolv"
Mike Yubab3daa2018-10-19 22:11:43 +080018
Bernie Innocentiec4219b2019-01-30 11:16:36 +090019#include "DnsTlsDispatcher.h"
Mike Yue655b1d2019-08-28 17:49:59 +080020
lifr94981782019-05-17 21:15:19 +080021#include <netdutils/Stopwatch.h>
Mike Yue655b1d2019-08-28 17:49:59 +080022
Bernie Innocentiec4219b2019-01-30 11:16:36 +090023#include "DnsTlsSocketFactory.h"
Mike Yu82ae84b2020-12-02 21:04:40 +080024#include "Experiments.h"
25#include "PrivateDnsConfiguration.h"
Mike Yue655b1d2019-08-28 17:49:59 +080026#include "resolv_cache.h"
lifr94981782019-05-17 21:15:19 +080027#include "resolv_private.h"
28#include "stats.pb.h"
Mike Yubab3daa2018-10-19 22:11:43 +080029
chenbruceaff85842019-05-31 15:46:42 +080030#include <android-base/logging.h>
Mike Yubab3daa2018-10-19 22:11:43 +080031
32namespace android {
33namespace net {
34
Mike Yue655b1d2019-08-28 17:49:59 +080035using android::netdutils::IPSockAddr;
lifr94981782019-05-17 21:15:19 +080036using android::netdutils::Stopwatch;
Mike Yubab3daa2018-10-19 22:11:43 +080037using netdutils::Slice;
38
39// static
40std::mutex DnsTlsDispatcher::sLock;
41
42DnsTlsDispatcher::DnsTlsDispatcher() {
43 mFactory.reset(new DnsTlsSocketFactory());
44}
45
Mike Yu9e8cf8d2020-10-26 19:04:33 +080046DnsTlsDispatcher& DnsTlsDispatcher::getInstance() {
47 static DnsTlsDispatcher instance;
48 return instance;
49}
50
Mike Yu82ae84b2020-12-02 21:04:40 +080051std::list<DnsTlsServer> DnsTlsDispatcher::getOrderedAndUsableServerList(
52 const std::list<DnsTlsServer>& tlsServers, unsigned netId, unsigned mark) {
Mike Yubab3daa2018-10-19 22:11:43 +080053 // Our preferred DnsTlsServer order is:
54 // 1) reuse existing IPv6 connections
55 // 2) reuse existing IPv4 connections
56 // 3) establish new IPv6 connections
57 // 4) establish new IPv4 connections
58 std::list<DnsTlsServer> existing6;
59 std::list<DnsTlsServer> existing4;
60 std::list<DnsTlsServer> new6;
61 std::list<DnsTlsServer> new4;
62
63 // Pull out any servers for which we might have existing connections and
64 // place them at the from the list of servers to try.
65 {
66 std::lock_guard guard(sLock);
67
68 for (const auto& tlsServer : tlsServers) {
69 const Key key = std::make_pair(mark, tlsServer);
Mike Yu82ae84b2020-12-02 21:04:40 +080070 if (const Transport* xport = getTransport(key); xport != nullptr) {
71 // DoT revalidation specific feature.
72 if (!xport->usable()) {
73 // Don't use this xport. It will be removed after timeout
74 // (IDLE_TIMEOUT minutes).
75 LOG(DEBUG) << "Skip using DoT server " << tlsServer.toIpString() << " on "
76 << netId;
77 continue;
78 }
79
Mike Yubab3daa2018-10-19 22:11:43 +080080 switch (tlsServer.ss.ss_family) {
81 case AF_INET:
82 existing4.push_back(tlsServer);
83 break;
84 case AF_INET6:
85 existing6.push_back(tlsServer);
86 break;
87 }
88 } else {
89 switch (tlsServer.ss.ss_family) {
90 case AF_INET:
91 new4.push_back(tlsServer);
92 break;
93 case AF_INET6:
94 new6.push_back(tlsServer);
95 break;
96 }
97 }
98 }
99 }
100
101 auto& out = existing6;
102 out.splice(out.cend(), existing4);
103 out.splice(out.cend(), new6);
104 out.splice(out.cend(), new4);
105 return out;
106}
107
lifr94981782019-05-17 21:15:19 +0800108DnsTlsTransport::Response DnsTlsDispatcher::query(const std::list<DnsTlsServer>& tlsServers,
109 res_state statp, const Slice query,
110 const Slice ans, int* resplen) {
Mike Yu82ae84b2020-12-02 21:04:40 +0800111 const std::list<DnsTlsServer> servers(
112 getOrderedAndUsableServerList(tlsServers, statp->netid, statp->_mark));
Mike Yubab3daa2018-10-19 22:11:43 +0800113
Mike Yu82ae84b2020-12-02 21:04:40 +0800114 if (servers.empty()) LOG(WARNING) << "No usable DnsTlsServers";
Mike Yubab3daa2018-10-19 22:11:43 +0800115
116 DnsTlsTransport::Response code = DnsTlsTransport::Response::internal_error;
lifr94981782019-05-17 21:15:19 +0800117 int serverCount = 0;
Mike Yu82ae84b2020-12-02 21:04:40 +0800118 for (const auto& server : servers) {
lifr94981782019-05-17 21:15:19 +0800119 DnsQueryEvent* dnsQueryEvent =
120 statp->event->mutable_dns_query_events()->add_dns_query_event();
Mike Yucb2bb7c2019-11-22 20:42:13 +0800121
122 bool connectTriggered = false;
lifrd4d9fbb2019-07-31 20:18:35 +0800123 Stopwatch queryStopwatch;
Mike Yu82ae84b2020-12-02 21:04:40 +0800124 code = this->query(server, statp->netid, statp->_mark, query, ans, resplen,
125 &connectTriggered);
lifr94981782019-05-17 21:15:19 +0800126
lifrd4d9fbb2019-07-31 20:18:35 +0800127 dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
lifr94981782019-05-17 21:15:19 +0800128 dnsQueryEvent->set_dns_server_index(serverCount++);
129 dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(server.ss.ss_family));
130 dnsQueryEvent->set_protocol(PROTO_DOT);
131 dnsQueryEvent->set_type(getQueryType(query.base(), query.size()));
Mike Yucb2bb7c2019-11-22 20:42:13 +0800132 dnsQueryEvent->set_connected(connectTriggered);
lifr94981782019-05-17 21:15:19 +0800133
Mike Yubab3daa2018-10-19 22:11:43 +0800134 switch (code) {
135 // These response codes are valid responses and not expected to
136 // change if another server is queried.
137 case DnsTlsTransport::Response::success:
lifr94981782019-05-17 21:15:19 +0800138 dnsQueryEvent->set_rcode(
139 static_cast<NsRcode>(reinterpret_cast<HEADER*>(ans.base())->rcode));
Mike Yue655b1d2019-08-28 17:49:59 +0800140 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
lifrd4d9fbb2019-07-31 20:18:35 +0800141 return code;
Mike Yubab3daa2018-10-19 22:11:43 +0800142 case DnsTlsTransport::Response::limit_error:
lifrd4d9fbb2019-07-31 20:18:35 +0800143 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
Mike Yue655b1d2019-08-28 17:49:59 +0800144 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
Mike Yubab3daa2018-10-19 22:11:43 +0800145 return code;
Mike Yubab3daa2018-10-19 22:11:43 +0800146 // These response codes might differ when trying other servers, so
147 // keep iterating to see if we can get a different (better) result.
148 case DnsTlsTransport::Response::network_error:
lifr94981782019-05-17 21:15:19 +0800149 // Sync from res_tls_send in res_send.cpp
150 dnsQueryEvent->set_rcode(NS_R_TIMEOUT);
Mike Yue655b1d2019-08-28 17:49:59 +0800151 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
152 break;
Mike Yubab3daa2018-10-19 22:11:43 +0800153 case DnsTlsTransport::Response::internal_error:
lifrd4d9fbb2019-07-31 20:18:35 +0800154 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
Mike Yue655b1d2019-08-28 17:49:59 +0800155 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
156 break;
Mike Yubab3daa2018-10-19 22:11:43 +0800157 // No "default" statement.
158 }
159 }
160
161 return code;
162}
163
Mike Yu82ae84b2020-12-02 21:04:40 +0800164DnsTlsTransport::Response DnsTlsDispatcher::query(const DnsTlsServer& server, unsigned netId,
165 unsigned mark, const Slice query, const Slice ans,
166 int* resplen, bool* connectTriggered) {
Mike Yue9b78d82020-05-20 20:58:49 +0800167 // TODO: This can cause the resolver to create multiple connections to the same DoT server
168 // merely due to different mark, such as the bit explicitlySelected unset.
169 // See if we can save them and just create one connection for one DoT server.
Mike Yubab3daa2018-10-19 22:11:43 +0800170 const Key key = std::make_pair(mark, server);
171 Transport* xport;
172 {
173 std::lock_guard guard(sLock);
Mike Yu82ae84b2020-12-02 21:04:40 +0800174 if (xport = getTransport(key); xport == nullptr) {
175 xport = addTransport(server, mark);
Mike Yubab3daa2018-10-19 22:11:43 +0800176 }
177 ++xport->useCount;
178 }
179
Mike Yu568ed6c2020-07-01 12:02:14 +0800180 // Don't call this function and hold sLock at the same time because of the following reason:
181 // TLS handshake requires a lock which is also needed by this function, if the handshake gets
182 // stuck, this function also gets blocked.
183 const int connectCounter = xport->transport.getConnectCounter();
184
Mike Yu08b2f2b2020-12-16 11:45:36 +0800185 const auto& result = queryInternal(*xport, query);
Mike Yu568ed6c2020-07-01 12:02:14 +0800186 *connectTriggered = (xport->transport.getConnectCounter() > connectCounter);
187
Mike Yubab3daa2018-10-19 22:11:43 +0800188 DnsTlsTransport::Response code = result.code;
189 if (code == DnsTlsTransport::Response::success) {
190 if (result.response.size() > ans.size()) {
chenbruceaff85842019-05-31 15:46:42 +0800191 LOG(DEBUG) << "Response too large: " << result.response.size() << " > " << ans.size();
Mike Yubab3daa2018-10-19 22:11:43 +0800192 code = DnsTlsTransport::Response::limit_error;
193 } else {
chenbruceaff85842019-05-31 15:46:42 +0800194 LOG(DEBUG) << "Got response successfully";
Mike Yubab3daa2018-10-19 22:11:43 +0800195 *resplen = result.response.size();
196 netdutils::copy(ans, netdutils::makeSlice(result.response));
197 }
198 } else {
chenbruceaff85842019-05-31 15:46:42 +0800199 LOG(DEBUG) << "Query failed: " << (unsigned int)code;
Mike Yubab3daa2018-10-19 22:11:43 +0800200 }
201
202 auto now = std::chrono::steady_clock::now();
203 {
204 std::lock_guard guard(sLock);
Mike Yubab3daa2018-10-19 22:11:43 +0800205 --xport->useCount;
206 xport->lastUsed = now;
Mike Yu82ae84b2020-12-02 21:04:40 +0800207
208 // DoT revalidation specific feature.
209 if (xport->checkRevalidationNecessary(code)) {
210 // Even if the revalidation passes, it doesn't guarantee that DoT queries
211 // to the xport can stop failing because revalidation creates a new connection
212 // to probe while the xport still uses an existing connection. So far, there isn't
213 // a feasible way to force the xport to disconnect the connection. If the case
214 // happens, the xport will be marked as unusable and DoT queries won't be sent to
215 // it anymore. Eventually, after IDLE_TIMEOUT, the xport will be destroyed, and
216 // a new xport will be created.
217 const auto result =
218 PrivateDnsConfiguration::getInstance().requestValidation(netId, server, mark);
219 LOG(WARNING) << "Requested validation for " << server.toIpString() << " with mark 0x"
220 << std::hex << mark << ", "
221 << (result.ok() ? "succeeded" : "failed: " + result.error().message());
222 }
223
Mike Yubab3daa2018-10-19 22:11:43 +0800224 cleanup(now);
225 }
226 return code;
227}
228
Mike Yu08b2f2b2020-12-16 11:45:36 +0800229DnsTlsTransport::Result DnsTlsDispatcher::queryInternal(Transport& xport,
230 const netdutils::Slice query) {
231 LOG(DEBUG) << "Sending query of length " << query.size();
232
233 // If dot_async_handshake is not set, the call might block in some cases; otherwise,
234 // the call should return very soon.
235 auto res = xport.transport.query(query);
236 LOG(DEBUG) << "Awaiting response";
237
238 if (xport.timeout().count() == -1) {
239 // Infinite timeout.
240 return res.get();
241 }
242
243 const auto status = res.wait_for(xport.timeout());
244 if (status == std::future_status::timeout) {
Mike Yu1f663fc2021-02-20 17:22:24 +0800245 // TODO(b/186613628): notify the Transport to remove this query.
Mike Yu08b2f2b2020-12-16 11:45:36 +0800246 LOG(WARNING) << "DoT query timed out after " << xport.timeout().count() << " ms";
247 return DnsTlsTransport::Result{
248 .code = DnsTlsTransport::Response::network_error,
249 .response = {},
250 };
251 }
252
253 return res.get();
254}
255
Mike Yubab3daa2018-10-19 22:11:43 +0800256// This timeout effectively controls how long to keep SSL session tickets.
257static constexpr std::chrono::minutes IDLE_TIMEOUT(5);
258void DnsTlsDispatcher::cleanup(std::chrono::time_point<std::chrono::steady_clock> now) {
259 // To avoid scanning mStore after every query, return early if a cleanup has been
260 // performed recently.
261 if (now - mLastCleanup < IDLE_TIMEOUT) {
262 return;
263 }
264 for (auto it = mStore.begin(); it != mStore.end();) {
265 auto& s = it->second;
266 if (s->useCount == 0 && now - s->lastUsed > IDLE_TIMEOUT) {
267 it = mStore.erase(it);
268 } else {
269 ++it;
270 }
271 }
272 mLastCleanup = now;
273}
274
Mike Yu82ae84b2020-12-02 21:04:40 +0800275DnsTlsDispatcher::Transport* DnsTlsDispatcher::addTransport(const DnsTlsServer& server,
276 unsigned mark) {
277 const Key key = std::make_pair(mark, server);
278 Transport* ret = getTransport(key);
279 if (ret != nullptr) return ret;
280
281 const Experiments* const instance = Experiments::getInstance();
282 int triggerThr =
283 instance->getFlag("dot_revalidation_threshold", Transport::kDotRevalidationThreshold);
284 int unusableThr = instance->getFlag("dot_xport_unusable_threshold",
285 Transport::kDotXportUnusableThreshold);
Mike Yu08b2f2b2020-12-16 11:45:36 +0800286 int queryTimeout = instance->getFlag("dot_query_timeout_ms", Transport::kDotQueryTimeoutMs);
Mike Yu82ae84b2020-12-02 21:04:40 +0800287
288 // Check and adjust the parameters if they are improperly set.
289 bool revalidationEnabled = false;
290 const bool isForOpportunisticMode = server.name.empty();
291 if (triggerThr > 0 && unusableThr > 0 && isForOpportunisticMode) {
292 revalidationEnabled = true;
293 } else {
294 triggerThr = -1;
295 unusableThr = -1;
296 }
Mike Yu08b2f2b2020-12-16 11:45:36 +0800297 if (queryTimeout < 0) {
298 queryTimeout = -1;
299 } else if (queryTimeout < 1000) {
300 queryTimeout = 1000;
301 }
Mike Yu82ae84b2020-12-02 21:04:40 +0800302
Mike Yu08b2f2b2020-12-16 11:45:36 +0800303 ret = new Transport(server, mark, mFactory.get(), revalidationEnabled, triggerThr, unusableThr,
304 queryTimeout);
305 LOG(DEBUG) << "Transport is initialized with { " << triggerThr << ", " << unusableThr << ", "
306 << queryTimeout << "ms }"
Mike Yu82ae84b2020-12-02 21:04:40 +0800307 << " for server { " << server.toIpString() << "/" << server.name << " }";
308
309 mStore[key].reset(ret);
310
311 return ret;
312}
313
314DnsTlsDispatcher::Transport* DnsTlsDispatcher::getTransport(const Key& key) {
315 auto it = mStore.find(key);
316 return (it == mStore.end() ? nullptr : it->second.get());
317}
318
319bool DnsTlsDispatcher::Transport::checkRevalidationNecessary(DnsTlsTransport::Response code) {
320 if (!revalidationEnabled) return false;
321
322 if (code == DnsTlsTransport::Response::network_error) {
323 continuousfailureCount++;
324 } else {
325 continuousfailureCount = 0;
326 }
327
328 // triggerThreshold must be greater than 0 because the value of revalidationEnabled is true.
329 if (usable() && continuousfailureCount == triggerThreshold) {
330 return true;
331 }
332 return false;
333}
334
335bool DnsTlsDispatcher::Transport::usable() const {
336 if (!revalidationEnabled) return true;
337
338 return continuousfailureCount < unusableThreshold;
339}
340
Mike Yubab3daa2018-10-19 22:11:43 +0800341} // end of namespace net
342} // end of namespace android