Merge "NetdEventListener: add rolling log of connect and dns stats"
am: 64ef2a4d28
Change-Id: I7f25f4ed42f0aecea7a1bb09b8c65a5d2e4a84cd
diff --git a/core/java/android/net/metrics/ConnectStats.java b/core/java/android/net/metrics/ConnectStats.java
index 30b2656..2495cab 100644
--- a/core/java/android/net/metrics/ConnectStats.java
+++ b/core/java/android/net/metrics/ConnectStats.java
@@ -20,6 +20,7 @@
import android.system.OsConstants;
import android.util.IntArray;
import android.util.SparseIntArray;
+
import com.android.internal.util.BitUtils;
import com.android.internal.util.TokenBucket;
@@ -43,6 +44,8 @@
public final TokenBucket mLatencyTb;
/** Maximum number of latency values recorded. */
public final int mMaxLatencyRecords;
+ /** Total count of events */
+ public int eventCount = 0;
/** Total count of successful connects. */
public int connectCount = 0;
/** Total count of successful connects done in blocking mode. */
@@ -57,12 +60,15 @@
mMaxLatencyRecords = maxLatencyRecords;
}
- public void addEvent(int errno, int latencyMs, String ipAddr) {
+ boolean addEvent(int errno, int latencyMs, String ipAddr) {
+ eventCount++;
if (isSuccess(errno)) {
countConnect(errno, ipAddr);
countLatency(errno, latencyMs);
+ return true;
} else {
countError(errno);
+ return false;
}
}
@@ -101,7 +107,7 @@
return (errno == 0) || isNonBlocking(errno);
}
- private static boolean isNonBlocking(int errno) {
+ static boolean isNonBlocking(int errno) {
// On non-blocking TCP sockets, connect() immediately returns EINPROGRESS.
// On non-blocking TCP sockets that are connecting, connect() immediately returns EALREADY.
return (errno == EINPROGRESS) || (errno == EALREADY);
@@ -117,6 +123,7 @@
for (int t : BitUtils.unpackBits(transports)) {
builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
}
+ builder.append(String.format("%d events, ", eventCount));
builder.append(String.format("%d success, ", connectCount));
builder.append(String.format("%d blocking, ", connectBlockingCount));
builder.append(String.format("%d IPv6 dst", ipv6ConnectCount));
diff --git a/core/java/android/net/metrics/DnsEvent.java b/core/java/android/net/metrics/DnsEvent.java
index a4970e4..81b098b 100644
--- a/core/java/android/net/metrics/DnsEvent.java
+++ b/core/java/android/net/metrics/DnsEvent.java
@@ -17,11 +17,13 @@
package android.net.metrics;
import android.net.NetworkCapabilities;
-import java.util.Arrays;
+
import com.android.internal.util.BitUtils;
+import java.util.Arrays;
+
/**
- * A DNS event recorded by NetdEventListenerService.
+ * A batch of DNS events recorded by NetdEventListenerService for a specific network.
* {@hide}
*/
final public class DnsEvent {
@@ -38,6 +40,8 @@
// the eventTypes, returnCodes, and latenciesMs arrays have the same length and the i-th event
// is spread across the three array at position i.
public int eventCount;
+ // The number of successful DNS queries recorded.
+ public int successCount;
// The types of DNS queries as defined in INetdEventListener.
public byte[] eventTypes;
// Current getaddrinfo codes go from 1 to EAI_MAX = 15. gethostbyname returns errno, but there
@@ -54,10 +58,11 @@
latenciesMs = new int[initialCapacity];
}
- public void addResult(byte eventType, byte returnCode, int latencyMs) {
+ boolean addResult(byte eventType, byte returnCode, int latencyMs) {
+ boolean isSuccess = (returnCode == 0);
if (eventCount >= SIZE_LIMIT) {
// TODO: implement better rate limiting that does not biases metrics.
- return;
+ return isSuccess;
}
if (eventCount == eventTypes.length) {
resize((int) (1.4 * eventCount));
@@ -66,6 +71,10 @@
returnCodes[eventCount] = returnCode;
latenciesMs[eventCount] = latencyMs;
eventCount++;
+ if (isSuccess) {
+ successCount++;
+ }
+ return isSuccess;
}
public void resize(int newLength) {
@@ -80,6 +89,8 @@
for (int t : BitUtils.unpackBits(transports)) {
builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
}
- return builder.append(eventCount).append(" events)").toString();
+ builder.append(String.format("%d events, ", eventCount));
+ builder.append(String.format("%d success)", successCount));
+ return builder.toString();
}
}
diff --git a/core/java/android/net/metrics/NetworkMetrics.java b/core/java/android/net/metrics/NetworkMetrics.java
new file mode 100644
index 0000000..2b662a0
--- /dev/null
+++ b/core/java/android/net/metrics/NetworkMetrics.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.net.metrics;
+
+import android.net.NetworkCapabilities;
+
+import com.android.internal.util.BitUtils;
+import com.android.internal.util.TokenBucket;
+
+import java.util.StringJoiner;
+
+/**
+ * A class accumulating network metrics received from Netd regarding dns queries and
+ * connect() calls on a given network.
+ *
+ * This class also accumulates running sums of dns and connect latency stats and
+ * error counts for bug report logging.
+ *
+ * @hide
+ */
+public class NetworkMetrics {
+
+ private static final int INITIAL_DNS_BATCH_SIZE = 100;
+ private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
+
+ // The network id of the Android Network.
+ public final int netId;
+ // The transport types bitmap of the Android Network, as defined in NetworkCapabilities.java.
+ public final long transports;
+ // Accumulated metrics for connect events.
+ public final ConnectStats connectMetrics;
+ // Accumulated metrics for dns events.
+ public final DnsEvent dnsMetrics;
+ // Running sums of latencies and error counts for connect and dns events.
+ public final Summary summary;
+ // Running sums of the most recent latencies and error counts for connect and dns events.
+ // Starts null until some events are accumulated.
+ // Allows to collect periodic snapshot of the running summaries for a given network.
+ public Summary pendingSummary;
+
+ public NetworkMetrics(int netId, long transports, TokenBucket tb) {
+ this.netId = netId;
+ this.transports = transports;
+ this.connectMetrics =
+ new ConnectStats(netId, transports, tb, CONNECT_LATENCY_MAXIMUM_RECORDS);
+ this.dnsMetrics = new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
+ this.summary = new Summary(netId, transports);
+ }
+
+ /**
+ * Get currently pending Summary statistics, if any, for this NetworkMetrics, merge them
+ * into the long running Summary statistics of this NetworkMetrics, and also clear them.
+ */
+ public Summary getPendingStats() {
+ Summary s = pendingSummary;
+ pendingSummary = null;
+ if (s != null) {
+ summary.merge(s);
+ }
+ return s;
+ }
+
+ /** Accumulate a dns query result reported by netd. */
+ public void addDnsResult(int eventType, int returnCode, int latencyMs) {
+ if (pendingSummary == null) {
+ pendingSummary = new Summary(netId, transports);
+ }
+ boolean isSuccess = dnsMetrics.addResult((byte) eventType, (byte) returnCode, latencyMs);
+ pendingSummary.dnsLatencies.count(latencyMs);
+ pendingSummary.dnsErrorRate.count(isSuccess ? 0 : 1);
+ }
+
+ /** Accumulate a connect query result reported by netd. */
+ public void addConnectResult(int error, int latencyMs, String ipAddr) {
+ if (pendingSummary == null) {
+ pendingSummary = new Summary(netId, transports);
+ }
+ boolean isSuccess = connectMetrics.addEvent(error, latencyMs, ipAddr);
+ pendingSummary.connectErrorRate.count(isSuccess ? 0 : 1);
+ if (ConnectStats.isNonBlocking(error)) {
+ pendingSummary.connectLatencies.count(latencyMs);
+ }
+ }
+
+ /** Represents running sums for dns and connect average error counts and average latencies. */
+ public static class Summary {
+
+ public final int netId;
+ public final long transports;
+ // DNS latencies measured in milliseconds.
+ public final Metrics dnsLatencies = new Metrics();
+ // DNS error rate measured in percentage points.
+ public final Metrics dnsErrorRate = new Metrics();
+ // Blocking connect latencies measured in milliseconds.
+ public final Metrics connectLatencies = new Metrics();
+ // Blocking and non blocking connect error rate measured in percentage points.
+ public final Metrics connectErrorRate = new Metrics();
+
+ public Summary(int netId, long transports) {
+ this.netId = netId;
+ this.transports = transports;
+ }
+
+ void merge(Summary that) {
+ dnsLatencies.merge(that.dnsLatencies);
+ dnsErrorRate.merge(that.dnsErrorRate);
+ connectLatencies.merge(that.connectLatencies);
+ connectErrorRate.merge(that.connectErrorRate);
+ }
+
+ @Override
+ public String toString() {
+ StringJoiner j = new StringJoiner(", ", "{", "}");
+ j.add("netId=" + netId);
+ for (int t : BitUtils.unpackBits(transports)) {
+ j.add(NetworkCapabilities.transportNameOf(t));
+ }
+ j.add(String.format("dns avg=%dms max=%dms err=%.1f%% tot=%d",
+ (int) dnsLatencies.average(), (int) dnsLatencies.max,
+ 100 * dnsErrorRate.average(), dnsErrorRate.count));
+ j.add(String.format("connect avg=%dms max=%dms err=%.1f%% tot=%d",
+ (int) connectLatencies.average(), (int) connectLatencies.max,
+ 100 * connectErrorRate.average(), connectErrorRate.count));
+ return j.toString();
+ }
+ }
+
+ /** Tracks a running sum and returns the average of a metric. */
+ static class Metrics {
+ public double sum;
+ public double max = Double.MIN_VALUE;
+ public int count;
+
+ void merge(Metrics that) {
+ this.count += that.count;
+ this.sum += that.sum;
+ this.max = Math.max(this.max, that.max);
+ }
+
+ void count(double value) {
+ count++;
+ sum += value;
+ max = Math.max(max, value);
+ }
+
+ double average() {
+ double a = sum / (double) count;
+ if (Double.isNaN(a)) {
+ a = 0;
+ }
+ return a;
+ }
+ }
+}
diff --git a/services/core/java/com/android/server/connectivity/NetdEventListenerService.java b/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
index 6206dfc..05c6e69 100644
--- a/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
+++ b/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
@@ -27,6 +27,7 @@
import android.net.metrics.DnsEvent;
import android.net.metrics.INetdEventListener;
import android.net.metrics.IpConnectivityLog;
+import android.net.metrics.NetworkMetrics;
import android.net.metrics.WakeupEvent;
import android.net.metrics.WakeupStats;
import android.os.RemoteException;
@@ -34,6 +35,7 @@
import android.util.Log;
import android.util.ArrayMap;
import android.util.SparseArray;
+
import com.android.internal.annotations.GuardedBy;
import com.android.internal.annotations.VisibleForTesting;
import com.android.internal.util.BitUtils;
@@ -41,10 +43,11 @@
import com.android.internal.util.RingBuffer;
import com.android.internal.util.TokenBucket;
import com.android.server.connectivity.metrics.nano.IpConnectivityLogClass.IpConnectivityEvent;
+
import java.io.PrintWriter;
+import java.util.ArrayList;
import java.util.List;
-import java.util.function.Function;
-import java.util.function.IntFunction;
+import java.util.StringJoiner;
/**
* Implementation of the INetdEventListener interface.
@@ -57,13 +60,13 @@
private static final boolean DBG = false;
private static final boolean VDBG = false;
- private static final int INITIAL_DNS_BATCH_SIZE = 100;
-
// Rate limit connect latency logging to 1 measurement per 15 seconds (5760 / day) with maximum
// bursts of 5000 measurements.
private static final int CONNECT_LATENCY_BURST_LIMIT = 5000;
private static final int CONNECT_LATENCY_FILL_RATE = 15 * (int) DateUtils.SECOND_IN_MILLIS;
- private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
+
+ private static final long METRICS_SNAPSHOT_SPAN_MS = 5 * DateUtils.MINUTE_IN_MILLIS;
+ private static final int METRICS_SNAPSHOT_BUFFER_SIZE = 48; // 4 hours
@VisibleForTesting
static final int WAKEUP_EVENT_BUFFER_LENGTH = 1024;
@@ -72,11 +75,15 @@
@VisibleForTesting
static final String WAKEUP_EVENT_IFACE_PREFIX = "iface:";
- // Sparse arrays of DNS and connect events, grouped by net id.
+ // Array of aggregated DNS and connect events sent by netd, grouped by net id.
@GuardedBy("this")
- private final SparseArray<DnsEvent> mDnsEvents = new SparseArray<>();
+ private final SparseArray<NetworkMetrics> mNetworkMetrics = new SparseArray<>();
+
@GuardedBy("this")
- private final SparseArray<ConnectStats> mConnectEvents = new SparseArray<>();
+ private final RingBuffer<NetworkMetricsSnapshot> mNetworkMetricsSnapshots =
+ new RingBuffer<>(NetworkMetricsSnapshot.class, METRICS_SNAPSHOT_BUFFER_SIZE);
+ @GuardedBy("this")
+ private long mLastSnapshot = 0;
// Array of aggregated wakeup event stats, grouped by interface name.
@GuardedBy("this")
@@ -84,7 +91,7 @@
// Ring buffer array for storing packet wake up events sent by Netd.
@GuardedBy("this")
private final RingBuffer<WakeupEvent> mWakeupEvents =
- new RingBuffer(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
+ new RingBuffer<>(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
private final ConnectivityManager mCm;
@@ -116,6 +123,41 @@
mCm = cm;
}
+ private static long projectSnapshotTime(long timeMs) {
+ return (timeMs / METRICS_SNAPSHOT_SPAN_MS) * METRICS_SNAPSHOT_SPAN_MS;
+ }
+
+ private NetworkMetrics getMetricsForNetwork(long timeMs, int netId) {
+ collectPendingMetricsSnapshot(timeMs);
+ NetworkMetrics metrics = mNetworkMetrics.get(netId);
+ if (metrics == null) {
+ // TODO: allow to change transport for a given netid.
+ metrics = new NetworkMetrics(netId, getTransports(netId), mConnectTb);
+ mNetworkMetrics.put(netId, metrics);
+ }
+ return metrics;
+ }
+
+ private NetworkMetricsSnapshot[] getNetworkMetricsSnapshots() {
+ collectPendingMetricsSnapshot(System.currentTimeMillis());
+ return mNetworkMetricsSnapshots.toArray();
+ }
+
+ private void collectPendingMetricsSnapshot(long timeMs) {
+ // Detects time differences larger than the snapshot collection period.
+ // This is robust against clock jumps and long inactivity periods.
+ if (Math.abs(timeMs - mLastSnapshot) <= METRICS_SNAPSHOT_SPAN_MS) {
+ return;
+ }
+ mLastSnapshot = projectSnapshotTime(timeMs);
+ NetworkMetricsSnapshot snapshot =
+ NetworkMetricsSnapshot.collect(mLastSnapshot, mNetworkMetrics);
+ if (snapshot.stats.isEmpty()) {
+ return;
+ }
+ mNetworkMetricsSnapshots.append(snapshot);
+ }
+
@Override
// Called concurrently by multiple binder threads.
// This method must not block or perform long-running operations.
@@ -124,15 +166,10 @@
throws RemoteException {
maybeVerboseLog("onDnsEvent(%d, %d, %d, %dms)", netId, eventType, returnCode, latencyMs);
- DnsEvent dnsEvent = mDnsEvents.get(netId);
- if (dnsEvent == null) {
- dnsEvent = makeDnsEvent(netId);
- mDnsEvents.put(netId, dnsEvent);
- }
- dnsEvent.addResult((byte) eventType, (byte) returnCode, latencyMs);
+ long timestamp = System.currentTimeMillis();
+ getMetricsForNetwork(timestamp, netId).addDnsResult(eventType, returnCode, latencyMs);
if (mNetdEventCallback != null) {
- long timestamp = System.currentTimeMillis();
mNetdEventCallback.onDnsEvent(hostname, ipAddresses, ipAddressesCount, timestamp, uid);
}
}
@@ -144,15 +181,11 @@
int port, int uid) throws RemoteException {
maybeVerboseLog("onConnectEvent(%d, %d, %dms)", netId, error, latencyMs);
- ConnectStats connectStats = mConnectEvents.get(netId);
- if (connectStats == null) {
- connectStats = makeConnectStats(netId);
- mConnectEvents.put(netId, connectStats);
- }
- connectStats.addEvent(error, latencyMs, ipAddr);
+ long timestamp = System.currentTimeMillis();
+ getMetricsForNetwork(timestamp, netId).addConnectResult(error, latencyMs, ipAddr);
if (mNetdEventCallback != null) {
- mNetdEventCallback.onConnectEvent(ipAddr, port, System.currentTimeMillis(), uid);
+ mNetdEventCallback.onConnectEvent(ipAddr, port, timestamp, uid);
}
}
@@ -189,11 +222,24 @@
}
public synchronized void flushStatistics(List<IpConnectivityEvent> events) {
- flushProtos(events, mConnectEvents, IpConnectivityEventBuilder::toProto);
- flushProtos(events, mDnsEvents, IpConnectivityEventBuilder::toProto);
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ ConnectStats stats = mNetworkMetrics.valueAt(i).connectMetrics;
+ if (stats.eventCount == 0) {
+ continue;
+ }
+ events.add(IpConnectivityEventBuilder.toProto(stats));
+ }
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ DnsEvent ev = mNetworkMetrics.valueAt(i).dnsMetrics;
+ if (ev.eventCount == 0) {
+ continue;
+ }
+ events.add(IpConnectivityEventBuilder.toProto(ev));
+ }
for (int i = 0; i < mWakeupStats.size(); i++) {
events.add(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
}
+ mNetworkMetrics.clear();
mWakeupStats.clear();
}
@@ -206,8 +252,15 @@
}
public synchronized void list(PrintWriter pw) {
- listEvents(pw, mConnectEvents, (x) -> x, "\n");
- listEvents(pw, mDnsEvents, (x) -> x, "\n");
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ pw.println(mNetworkMetrics.valueAt(i).connectMetrics);
+ }
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ pw.println(mNetworkMetrics.valueAt(i).dnsMetrics);
+ }
+ for (NetworkMetricsSnapshot s : getNetworkMetricsSnapshots()) {
+ pw.println(s);
+ }
for (int i = 0; i < mWakeupStats.size(); i++) {
pw.println(mWakeupStats.valueAt(i));
}
@@ -217,41 +270,17 @@
}
public synchronized void listAsProtos(PrintWriter pw) {
- listEvents(pw, mConnectEvents, IpConnectivityEventBuilder::toProto, "");
- listEvents(pw, mDnsEvents, IpConnectivityEventBuilder::toProto, "");
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).connectMetrics));
+ }
+ for (int i = 0; i < mNetworkMetrics.size(); i++) {
+ pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).dnsMetrics));
+ }
for (int i = 0; i < mWakeupStats.size(); i++) {
pw.print(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
}
}
- private static <T> void flushProtos(List<IpConnectivityEvent> out, SparseArray<T> in,
- Function<T, IpConnectivityEvent> mapper) {
- for (int i = 0; i < in.size(); i++) {
- out.add(mapper.apply(in.valueAt(i)));
- }
- in.clear();
- }
-
- private static <T> void listEvents(
- PrintWriter pw, SparseArray<T> events, Function<T, Object> mapper, String separator) {
- // Proto derived Classes have toString method that adds a \n at the end.
- // Let the caller control that by passing in the line separator explicitly.
- for (int i = 0; i < events.size(); i++) {
- pw.print(mapper.apply(events.valueAt(i)));
- pw.print(separator);
- }
- }
-
- private ConnectStats makeConnectStats(int netId) {
- long transports = getTransports(netId);
- return new ConnectStats(netId, transports, mConnectTb, CONNECT_LATENCY_MAXIMUM_RECORDS);
- }
-
- private DnsEvent makeDnsEvent(int netId) {
- long transports = getTransports(netId);
- return new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
- }
-
private long getTransports(int netId) {
// TODO: directly query ConnectivityService instead of going through Binder interface.
NetworkCapabilities nc = mCm.getNetworkCapabilities(new Network(netId));
@@ -268,4 +297,32 @@
private static void maybeVerboseLog(String s, Object... args) {
if (VDBG) Log.d(TAG, String.format(s, args));
}
+
+ /** Helper class for buffering summaries of NetworkMetrics at regular time intervals */
+ static class NetworkMetricsSnapshot {
+
+ public long timeMs;
+ public List<NetworkMetrics.Summary> stats = new ArrayList<>();
+
+ static NetworkMetricsSnapshot collect(long timeMs, SparseArray<NetworkMetrics> networkMetrics) {
+ NetworkMetricsSnapshot snapshot = new NetworkMetricsSnapshot();
+ snapshot.timeMs = timeMs;
+ for (int i = 0; i < networkMetrics.size(); i++) {
+ NetworkMetrics.Summary s = networkMetrics.valueAt(i).getPendingStats();
+ if (s != null) {
+ snapshot.stats.add(s);
+ }
+ }
+ return snapshot;
+ }
+
+ @Override
+ public String toString() {
+ StringJoiner j = new StringJoiner(", ");
+ for (NetworkMetrics.Summary s : stats) {
+ j.add(s.toString());
+ }
+ return String.format("%tT.%tL: %s", timeMs, timeMs, j.toString());
+ }
+ }
}