blob: bc8717a3906915005ace19c167e75ef8456a7233 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "resolv"
#include "DnsTlsDispatcher.h"
#include <netdutils/Stopwatch.h>
#include "DnsTlsSocketFactory.h"
#include "Experiments.h"
#include "PrivateDnsConfiguration.h"
#include "resolv_cache.h"
#include "resolv_private.h"
#include "stats.pb.h"
#include <android-base/logging.h>
namespace android {
namespace net {
using android::netdutils::IPSockAddr;
using android::netdutils::Stopwatch;
using netdutils::Slice;
// static
std::mutex DnsTlsDispatcher::sLock;
DnsTlsDispatcher::DnsTlsDispatcher() {
mFactory.reset(new DnsTlsSocketFactory());
}
DnsTlsDispatcher& DnsTlsDispatcher::getInstance() {
static DnsTlsDispatcher instance;
return instance;
}
std::list<DnsTlsServer> DnsTlsDispatcher::getOrderedAndUsableServerList(
const std::list<DnsTlsServer>& tlsServers, unsigned netId, unsigned mark) {
// Our preferred DnsTlsServer order is:
// 1) reuse existing IPv6 connections
// 2) reuse existing IPv4 connections
// 3) establish new IPv6 connections
// 4) establish new IPv4 connections
std::list<DnsTlsServer> existing6;
std::list<DnsTlsServer> existing4;
std::list<DnsTlsServer> new6;
std::list<DnsTlsServer> new4;
// Pull out any servers for which we might have existing connections and
// place them at the from the list of servers to try.
{
std::lock_guard guard(sLock);
for (const auto& tlsServer : tlsServers) {
const Key key = std::make_pair(mark, tlsServer);
if (const Transport* xport = getTransport(key); xport != nullptr) {
// DoT revalidation specific feature.
if (!xport->usable()) {
// Don't use this xport. It will be removed after timeout
// (IDLE_TIMEOUT minutes).
LOG(DEBUG) << "Skip using DoT server " << tlsServer.toIpString() << " on "
<< netId;
continue;
}
switch (tlsServer.ss.ss_family) {
case AF_INET:
existing4.push_back(tlsServer);
break;
case AF_INET6:
existing6.push_back(tlsServer);
break;
}
} else {
switch (tlsServer.ss.ss_family) {
case AF_INET:
new4.push_back(tlsServer);
break;
case AF_INET6:
new6.push_back(tlsServer);
break;
}
}
}
}
auto& out = existing6;
out.splice(out.cend(), existing4);
out.splice(out.cend(), new6);
out.splice(out.cend(), new4);
return out;
}
DnsTlsTransport::Response DnsTlsDispatcher::query(const std::list<DnsTlsServer>& tlsServers,
ResState* statp, const Slice query,
const Slice ans, int* resplen,
bool dotQuickFallback) {
const std::list<DnsTlsServer> servers(
getOrderedAndUsableServerList(tlsServers, statp->netid, statp->mark));
if (servers.empty()) LOG(WARNING) << "No usable DnsTlsServers";
DnsTlsTransport::Response code = DnsTlsTransport::Response::internal_error;
int serverCount = 0;
for (const auto& server : servers) {
DnsQueryEvent* dnsQueryEvent =
statp->event->mutable_dns_query_events()->add_dns_query_event();
bool connectTriggered = false;
Stopwatch queryStopwatch;
code = this->query(server, statp->netid, statp->mark, query, ans, resplen,
&connectTriggered);
dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
dnsQueryEvent->set_dns_server_index(serverCount++);
dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(server.ss.ss_family));
dnsQueryEvent->set_protocol(PROTO_DOT);
std::span<const uint8_t> msg(query.base(), query.size());
dnsQueryEvent->set_type(getQueryType(msg));
dnsQueryEvent->set_connected(connectTriggered);
switch (code) {
// These response codes are valid responses and not expected to
// change if another server is queried.
case DnsTlsTransport::Response::success:
dnsQueryEvent->set_rcode(
static_cast<NsRcode>(reinterpret_cast<HEADER*>(ans.base())->rcode));
resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
return code;
case DnsTlsTransport::Response::limit_error:
dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
return code;
// These response codes might differ when trying other servers, so
// keep iterating to see if we can get a different (better) result.
case DnsTlsTransport::Response::network_error:
// Sync from res_tls_send in res_send.cpp
dnsQueryEvent->set_rcode(NS_R_TIMEOUT);
resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
if (dotQuickFallback) {
return code;
}
break;
case DnsTlsTransport::Response::internal_error:
dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
break;
// No "default" statement.
}
}
return code;
}
DnsTlsTransport::Response DnsTlsDispatcher::query(const DnsTlsServer& server, unsigned netId,
unsigned mark, const Slice query, const Slice ans,
int* resplen, bool* connectTriggered) {
// TODO: This can cause the resolver to create multiple connections to the same DoT server
// merely due to different mark, such as the bit explicitlySelected unset.
// See if we can save them and just create one connection for one DoT server.
const Key key = std::make_pair(mark, server);
Transport* xport;
{
std::lock_guard guard(sLock);
if (xport = getTransport(key); xport == nullptr) {
xport = addTransport(server, mark, netId);
}
++xport->useCount;
}
// Don't call this function and hold sLock at the same time because of the following reason:
// TLS handshake requires a lock which is also needed by this function, if the handshake gets
// stuck, this function also gets blocked.
const int connectCounter = xport->transport.getConnectCounter();
const auto& result = queryInternal(*xport, query);
*connectTriggered = (xport->transport.getConnectCounter() > connectCounter);
DnsTlsTransport::Response code = result.code;
if (code == DnsTlsTransport::Response::success) {
if (result.response.size() > ans.size()) {
LOG(DEBUG) << "Response too large: " << result.response.size() << " > " << ans.size();
code = DnsTlsTransport::Response::limit_error;
} else {
LOG(DEBUG) << "Got response successfully";
*resplen = result.response.size();
netdutils::copy(ans, netdutils::makeSlice(result.response));
}
} else {
LOG(DEBUG) << "Query failed: " << (unsigned int)code;
}
auto now = std::chrono::steady_clock::now();
{
std::lock_guard guard(sLock);
--xport->useCount;
xport->lastUsed = now;
// DoT revalidation specific feature.
if (xport->checkRevalidationNecessary(code)) {
// Even if the revalidation passes, it doesn't guarantee that DoT queries
// to the xport can stop failing because revalidation creates a new connection
// to probe while the xport still uses an existing connection. So far, there isn't
// a feasible way to force the xport to disconnect the connection. If the case
// happens, the xport will be marked as unusable and DoT queries won't be sent to
// it anymore. Eventually, after IDLE_TIMEOUT, the xport will be destroyed, and
// a new xport will be created.
const auto result = PrivateDnsConfiguration::getInstance().requestValidation(
netId, PrivateDnsConfiguration::ServerIdentity{server}, mark);
LOG(WARNING) << "Requested validation for " << server.toIpString() << " with mark 0x"
<< std::hex << mark << ", "
<< (result.ok() ? "succeeded" : "failed: " + result.error().message());
}
cleanup(now);
}
return code;
}
void DnsTlsDispatcher::forceCleanup(unsigned netId) {
std::lock_guard guard(sLock);
forceCleanupLocked(netId);
}
DnsTlsTransport::Result DnsTlsDispatcher::queryInternal(Transport& xport,
const netdutils::Slice query) {
LOG(DEBUG) << "Sending query of length " << query.size();
// If dot_async_handshake is not set, the call might block in some cases; otherwise,
// the call should return very soon.
auto res = xport.transport.query(query);
LOG(DEBUG) << "Awaiting response";
if (xport.timeout().count() == -1) {
// Infinite timeout.
return res.get();
}
const auto status = res.wait_for(xport.timeout());
if (status == std::future_status::timeout) {
// TODO(b/186613628): notify the Transport to remove this query.
LOG(WARNING) << "DoT query timed out after " << xport.timeout().count() << " ms";
return DnsTlsTransport::Result{
.code = DnsTlsTransport::Response::network_error,
.response = {},
};
}
return res.get();
}
// This timeout effectively controls how long to keep SSL session tickets.
static constexpr std::chrono::minutes IDLE_TIMEOUT(5);
void DnsTlsDispatcher::cleanup(std::chrono::time_point<std::chrono::steady_clock> now) {
// To avoid scanning mStore after every query, return early if a cleanup has been
// performed recently.
if (now - mLastCleanup < IDLE_TIMEOUT) {
return;
}
for (auto it = mStore.begin(); it != mStore.end();) {
auto& s = it->second;
if (s->useCount == 0 && now - s->lastUsed > IDLE_TIMEOUT) {
it = mStore.erase(it);
} else {
++it;
}
}
mLastCleanup = now;
}
// TODO: unify forceCleanupLocked() and cleanup().
void DnsTlsDispatcher::forceCleanupLocked(unsigned netId) {
for (auto it = mStore.begin(); it != mStore.end();) {
auto& s = it->second;
if (s->useCount == 0 && s->mNetId == netId) {
it = mStore.erase(it);
} else {
++it;
}
}
}
DnsTlsDispatcher::Transport* DnsTlsDispatcher::addTransport(const DnsTlsServer& server,
unsigned mark, unsigned netId) {
const Key key = std::make_pair(mark, server);
Transport* ret = getTransport(key);
if (ret != nullptr) return ret;
const Experiments* const instance = Experiments::getInstance();
int triggerThr =
instance->getFlag("dot_revalidation_threshold", Transport::kDotRevalidationThreshold);
int unusableThr = instance->getFlag("dot_xport_unusable_threshold",
Transport::kDotXportUnusableThreshold);
int queryTimeout = instance->getFlag("dot_query_timeout_ms", Transport::kDotQueryTimeoutMs);
// Check and adjust the parameters if they are improperly set.
bool revalidationEnabled = false;
const bool isForOpportunisticMode = server.name.empty();
if (triggerThr > 0 && unusableThr > 0 && isForOpportunisticMode) {
revalidationEnabled = true;
} else {
triggerThr = -1;
unusableThr = -1;
}
if (queryTimeout < 0) {
queryTimeout = -1;
} else if (queryTimeout < 1000) {
queryTimeout = 1000;
}
ret = new Transport(server, mark, netId, mFactory.get(), revalidationEnabled, triggerThr,
unusableThr, queryTimeout);
LOG(DEBUG) << "Transport is initialized with { " << triggerThr << ", " << unusableThr << ", "
<< queryTimeout << "ms }"
<< " for server { " << server.toIpString() << "/" << server.name << " }";
mStore[key].reset(ret);
return ret;
}
DnsTlsDispatcher::Transport* DnsTlsDispatcher::getTransport(const Key& key) {
auto it = mStore.find(key);
return (it == mStore.end() ? nullptr : it->second.get());
}
bool DnsTlsDispatcher::Transport::checkRevalidationNecessary(DnsTlsTransport::Response code) {
if (!revalidationEnabled) return false;
if (code == DnsTlsTransport::Response::network_error) {
continuousfailureCount++;
} else {
continuousfailureCount = 0;
}
// triggerThreshold must be greater than 0 because the value of revalidationEnabled is true.
if (usable() && continuousfailureCount == triggerThreshold) {
return true;
}
return false;
}
bool DnsTlsDispatcher::Transport::usable() const {
if (!revalidationEnabled) return true;
return continuousfailureCount < unusableThreshold;
}
} // end of namespace net
} // end of namespace android