Merge "NetdEventListener: add rolling log of connect and dns stats"
am: 64ef2a4d28

Change-Id: I7f25f4ed42f0aecea7a1bb09b8c65a5d2e4a84cd
diff --git a/core/java/android/net/metrics/ConnectStats.java b/core/java/android/net/metrics/ConnectStats.java
index 30b2656..2495cab 100644
--- a/core/java/android/net/metrics/ConnectStats.java
+++ b/core/java/android/net/metrics/ConnectStats.java
@@ -20,6 +20,7 @@
 import android.system.OsConstants;
 import android.util.IntArray;
 import android.util.SparseIntArray;
+
 import com.android.internal.util.BitUtils;
 import com.android.internal.util.TokenBucket;
 
@@ -43,6 +44,8 @@
     public final TokenBucket mLatencyTb;
     /** Maximum number of latency values recorded. */
     public final int mMaxLatencyRecords;
+    /** Total count of events */
+    public int eventCount = 0;
     /** Total count of successful connects. */
     public int connectCount = 0;
     /** Total count of successful connects done in blocking mode. */
@@ -57,12 +60,15 @@
         mMaxLatencyRecords = maxLatencyRecords;
     }
 
-    public void addEvent(int errno, int latencyMs, String ipAddr) {
+    boolean addEvent(int errno, int latencyMs, String ipAddr) {
+        eventCount++;
         if (isSuccess(errno)) {
             countConnect(errno, ipAddr);
             countLatency(errno, latencyMs);
+            return true;
         } else {
             countError(errno);
+            return false;
         }
     }
 
@@ -101,7 +107,7 @@
         return (errno == 0) || isNonBlocking(errno);
     }
 
-    private static boolean isNonBlocking(int errno) {
+    static boolean isNonBlocking(int errno) {
         // On non-blocking TCP sockets, connect() immediately returns EINPROGRESS.
         // On non-blocking TCP sockets that are connecting, connect() immediately returns EALREADY.
         return (errno == EINPROGRESS) || (errno == EALREADY);
@@ -117,6 +123,7 @@
         for (int t : BitUtils.unpackBits(transports)) {
             builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
         }
+        builder.append(String.format("%d events, ", eventCount));
         builder.append(String.format("%d success, ", connectCount));
         builder.append(String.format("%d blocking, ", connectBlockingCount));
         builder.append(String.format("%d IPv6 dst", ipv6ConnectCount));
diff --git a/core/java/android/net/metrics/DnsEvent.java b/core/java/android/net/metrics/DnsEvent.java
index a4970e4..81b098b 100644
--- a/core/java/android/net/metrics/DnsEvent.java
+++ b/core/java/android/net/metrics/DnsEvent.java
@@ -17,11 +17,13 @@
 package android.net.metrics;
 
 import android.net.NetworkCapabilities;
-import java.util.Arrays;
+
 import com.android.internal.util.BitUtils;
 
+import java.util.Arrays;
+
 /**
- * A DNS event recorded by NetdEventListenerService.
+ * A batch of DNS events recorded by NetdEventListenerService for a specific network.
  * {@hide}
  */
 final public class DnsEvent {
@@ -38,6 +40,8 @@
     // the eventTypes, returnCodes, and latenciesMs arrays have the same length and the i-th event
     // is spread across the three array at position i.
     public int eventCount;
+    // The number of successful DNS queries recorded.
+    public int successCount;
     // The types of DNS queries as defined in INetdEventListener.
     public byte[] eventTypes;
     // Current getaddrinfo codes go from 1 to EAI_MAX = 15. gethostbyname returns errno, but there
@@ -54,10 +58,11 @@
         latenciesMs = new int[initialCapacity];
     }
 
-    public void addResult(byte eventType, byte returnCode, int latencyMs) {
+    boolean addResult(byte eventType, byte returnCode, int latencyMs) {
+        boolean isSuccess = (returnCode == 0);
         if (eventCount >= SIZE_LIMIT) {
             // TODO: implement better rate limiting that does not biases metrics.
-            return;
+            return isSuccess;
         }
         if (eventCount == eventTypes.length) {
             resize((int) (1.4 * eventCount));
@@ -66,6 +71,10 @@
         returnCodes[eventCount] = returnCode;
         latenciesMs[eventCount] = latencyMs;
         eventCount++;
+        if (isSuccess) {
+            successCount++;
+        }
+        return isSuccess;
     }
 
     public void resize(int newLength) {
@@ -80,6 +89,8 @@
         for (int t : BitUtils.unpackBits(transports)) {
             builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
         }
-        return builder.append(eventCount).append(" events)").toString();
+        builder.append(String.format("%d events, ", eventCount));
+        builder.append(String.format("%d success)", successCount));
+        return builder.toString();
     }
 }
diff --git a/core/java/android/net/metrics/NetworkMetrics.java b/core/java/android/net/metrics/NetworkMetrics.java
new file mode 100644
index 0000000..2b662a0
--- /dev/null
+++ b/core/java/android/net/metrics/NetworkMetrics.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.net.metrics;
+
+import android.net.NetworkCapabilities;
+
+import com.android.internal.util.BitUtils;
+import com.android.internal.util.TokenBucket;
+
+import java.util.StringJoiner;
+
+/**
+ * A class accumulating network metrics received from Netd regarding dns queries and
+ * connect() calls on a given network.
+ *
+ * This class also accumulates running sums of dns and connect latency stats and
+ * error counts for bug report logging.
+ *
+ * @hide
+ */
+public class NetworkMetrics {
+
+    private static final int INITIAL_DNS_BATCH_SIZE = 100;
+    private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
+
+    // The network id of the Android Network.
+    public final int netId;
+    // The transport types bitmap of the Android Network, as defined in NetworkCapabilities.java.
+    public final long transports;
+    // Accumulated metrics for connect events.
+    public final ConnectStats connectMetrics;
+    // Accumulated metrics for dns events.
+    public final DnsEvent dnsMetrics;
+    // Running sums of latencies and error counts for connect and dns events.
+    public final Summary summary;
+    // Running sums of the most recent latencies and error counts for connect and dns events.
+    // Starts null until some events are accumulated.
+    // Allows to collect periodic snapshot of the running summaries for a given network.
+    public Summary pendingSummary;
+
+    public NetworkMetrics(int netId, long transports, TokenBucket tb) {
+        this.netId = netId;
+        this.transports = transports;
+        this.connectMetrics =
+                new ConnectStats(netId, transports, tb, CONNECT_LATENCY_MAXIMUM_RECORDS);
+        this.dnsMetrics = new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
+        this.summary = new Summary(netId, transports);
+    }
+
+    /**
+     * Get currently pending Summary statistics, if any, for this NetworkMetrics, merge them
+     * into the long running Summary statistics of this NetworkMetrics, and also clear them.
+     */
+    public Summary getPendingStats() {
+        Summary s = pendingSummary;
+        pendingSummary = null;
+        if (s != null) {
+            summary.merge(s);
+        }
+        return s;
+    }
+
+    /** Accumulate a dns query result reported by netd. */
+    public void addDnsResult(int eventType, int returnCode, int latencyMs) {
+        if (pendingSummary == null) {
+            pendingSummary = new Summary(netId, transports);
+        }
+        boolean isSuccess = dnsMetrics.addResult((byte) eventType, (byte) returnCode, latencyMs);
+        pendingSummary.dnsLatencies.count(latencyMs);
+        pendingSummary.dnsErrorRate.count(isSuccess ? 0 : 1);
+    }
+
+    /** Accumulate a connect query result reported by netd. */
+    public void addConnectResult(int error, int latencyMs, String ipAddr) {
+        if (pendingSummary == null) {
+            pendingSummary = new Summary(netId, transports);
+        }
+        boolean isSuccess = connectMetrics.addEvent(error, latencyMs, ipAddr);
+        pendingSummary.connectErrorRate.count(isSuccess ? 0 : 1);
+        if (ConnectStats.isNonBlocking(error)) {
+            pendingSummary.connectLatencies.count(latencyMs);
+        }
+    }
+
+    /** Represents running sums for dns and connect average error counts and average latencies. */
+    public static class Summary {
+
+        public final int netId;
+        public final long transports;
+        // DNS latencies measured in milliseconds.
+        public final Metrics dnsLatencies = new Metrics();
+        // DNS error rate measured in percentage points.
+        public final Metrics dnsErrorRate = new Metrics();
+        // Blocking connect latencies measured in milliseconds.
+        public final Metrics connectLatencies = new Metrics();
+        // Blocking and non blocking connect error rate measured in percentage points.
+        public final Metrics connectErrorRate = new Metrics();
+
+        public Summary(int netId, long transports) {
+            this.netId = netId;
+            this.transports = transports;
+        }
+
+        void merge(Summary that) {
+            dnsLatencies.merge(that.dnsLatencies);
+            dnsErrorRate.merge(that.dnsErrorRate);
+            connectLatencies.merge(that.connectLatencies);
+            connectErrorRate.merge(that.connectErrorRate);
+        }
+
+        @Override
+        public String toString() {
+            StringJoiner j = new StringJoiner(", ", "{", "}");
+            j.add("netId=" + netId);
+            for (int t : BitUtils.unpackBits(transports)) {
+                j.add(NetworkCapabilities.transportNameOf(t));
+            }
+            j.add(String.format("dns avg=%dms max=%dms err=%.1f%% tot=%d",
+                    (int) dnsLatencies.average(), (int) dnsLatencies.max,
+                    100 * dnsErrorRate.average(), dnsErrorRate.count));
+            j.add(String.format("connect avg=%dms max=%dms err=%.1f%% tot=%d",
+                    (int) connectLatencies.average(), (int) connectLatencies.max,
+                    100 * connectErrorRate.average(), connectErrorRate.count));
+            return j.toString();
+        }
+    }
+
+    /** Tracks a running sum and returns the average of a metric. */
+    static class Metrics {
+        public double sum;
+        public double max = Double.MIN_VALUE;
+        public int count;
+
+        void merge(Metrics that) {
+            this.count += that.count;
+            this.sum += that.sum;
+            this.max = Math.max(this.max, that.max);
+        }
+
+        void count(double value) {
+            count++;
+            sum += value;
+            max = Math.max(max, value);
+        }
+
+        double average() {
+            double a = sum / (double) count;
+            if (Double.isNaN(a)) {
+                a = 0;
+            }
+            return a;
+        }
+    }
+}
diff --git a/services/core/java/com/android/server/connectivity/NetdEventListenerService.java b/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
index 6206dfc..05c6e69 100644
--- a/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
+++ b/services/core/java/com/android/server/connectivity/NetdEventListenerService.java
@@ -27,6 +27,7 @@
 import android.net.metrics.DnsEvent;
 import android.net.metrics.INetdEventListener;
 import android.net.metrics.IpConnectivityLog;
+import android.net.metrics.NetworkMetrics;
 import android.net.metrics.WakeupEvent;
 import android.net.metrics.WakeupStats;
 import android.os.RemoteException;
@@ -34,6 +35,7 @@
 import android.util.Log;
 import android.util.ArrayMap;
 import android.util.SparseArray;
+
 import com.android.internal.annotations.GuardedBy;
 import com.android.internal.annotations.VisibleForTesting;
 import com.android.internal.util.BitUtils;
@@ -41,10 +43,11 @@
 import com.android.internal.util.RingBuffer;
 import com.android.internal.util.TokenBucket;
 import com.android.server.connectivity.metrics.nano.IpConnectivityLogClass.IpConnectivityEvent;
+
 import java.io.PrintWriter;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.function.Function;
-import java.util.function.IntFunction;
+import java.util.StringJoiner;
 
 /**
  * Implementation of the INetdEventListener interface.
@@ -57,13 +60,13 @@
     private static final boolean DBG = false;
     private static final boolean VDBG = false;
 
-    private static final int INITIAL_DNS_BATCH_SIZE = 100;
-
     // Rate limit connect latency logging to 1 measurement per 15 seconds (5760 / day) with maximum
     // bursts of 5000 measurements.
     private static final int CONNECT_LATENCY_BURST_LIMIT  = 5000;
     private static final int CONNECT_LATENCY_FILL_RATE    = 15 * (int) DateUtils.SECOND_IN_MILLIS;
-    private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
+
+    private static final long METRICS_SNAPSHOT_SPAN_MS = 5 * DateUtils.MINUTE_IN_MILLIS;
+    private static final int METRICS_SNAPSHOT_BUFFER_SIZE = 48; // 4 hours
 
     @VisibleForTesting
     static final int WAKEUP_EVENT_BUFFER_LENGTH = 1024;
@@ -72,11 +75,15 @@
     @VisibleForTesting
     static final String WAKEUP_EVENT_IFACE_PREFIX = "iface:";
 
-    // Sparse arrays of DNS and connect events, grouped by net id.
+    // Array of aggregated DNS and connect events sent by netd, grouped by net id.
     @GuardedBy("this")
-    private final SparseArray<DnsEvent> mDnsEvents = new SparseArray<>();
+    private final SparseArray<NetworkMetrics> mNetworkMetrics = new SparseArray<>();
+
     @GuardedBy("this")
-    private final SparseArray<ConnectStats> mConnectEvents = new SparseArray<>();
+    private final RingBuffer<NetworkMetricsSnapshot> mNetworkMetricsSnapshots =
+            new RingBuffer<>(NetworkMetricsSnapshot.class, METRICS_SNAPSHOT_BUFFER_SIZE);
+    @GuardedBy("this")
+    private long mLastSnapshot = 0;
 
     // Array of aggregated wakeup event stats, grouped by interface name.
     @GuardedBy("this")
@@ -84,7 +91,7 @@
     // Ring buffer array for storing packet wake up events sent by Netd.
     @GuardedBy("this")
     private final RingBuffer<WakeupEvent> mWakeupEvents =
-            new RingBuffer(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
+            new RingBuffer<>(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
 
     private final ConnectivityManager mCm;
 
@@ -116,6 +123,41 @@
         mCm = cm;
     }
 
+    private static long projectSnapshotTime(long timeMs) {
+        return (timeMs / METRICS_SNAPSHOT_SPAN_MS) * METRICS_SNAPSHOT_SPAN_MS;
+    }
+
+    private NetworkMetrics getMetricsForNetwork(long timeMs, int netId) {
+        collectPendingMetricsSnapshot(timeMs);
+        NetworkMetrics metrics = mNetworkMetrics.get(netId);
+        if (metrics == null) {
+            // TODO: allow to change transport for a given netid.
+            metrics = new NetworkMetrics(netId, getTransports(netId), mConnectTb);
+            mNetworkMetrics.put(netId, metrics);
+        }
+        return metrics;
+    }
+
+    private NetworkMetricsSnapshot[] getNetworkMetricsSnapshots() {
+        collectPendingMetricsSnapshot(System.currentTimeMillis());
+        return mNetworkMetricsSnapshots.toArray();
+    }
+
+    private void collectPendingMetricsSnapshot(long timeMs) {
+        // Detects time differences larger than the snapshot collection period.
+        // This is robust against clock jumps and long inactivity periods.
+        if (Math.abs(timeMs - mLastSnapshot) <= METRICS_SNAPSHOT_SPAN_MS) {
+            return;
+        }
+        mLastSnapshot = projectSnapshotTime(timeMs);
+        NetworkMetricsSnapshot snapshot =
+                NetworkMetricsSnapshot.collect(mLastSnapshot, mNetworkMetrics);
+        if (snapshot.stats.isEmpty()) {
+            return;
+        }
+        mNetworkMetricsSnapshots.append(snapshot);
+    }
+
     @Override
     // Called concurrently by multiple binder threads.
     // This method must not block or perform long-running operations.
@@ -124,15 +166,10 @@
             throws RemoteException {
         maybeVerboseLog("onDnsEvent(%d, %d, %d, %dms)", netId, eventType, returnCode, latencyMs);
 
-        DnsEvent dnsEvent = mDnsEvents.get(netId);
-        if (dnsEvent == null) {
-            dnsEvent = makeDnsEvent(netId);
-            mDnsEvents.put(netId, dnsEvent);
-        }
-        dnsEvent.addResult((byte) eventType, (byte) returnCode, latencyMs);
+        long timestamp = System.currentTimeMillis();
+        getMetricsForNetwork(timestamp, netId).addDnsResult(eventType, returnCode, latencyMs);
 
         if (mNetdEventCallback != null) {
-            long timestamp = System.currentTimeMillis();
             mNetdEventCallback.onDnsEvent(hostname, ipAddresses, ipAddressesCount, timestamp, uid);
         }
     }
@@ -144,15 +181,11 @@
             int port, int uid) throws RemoteException {
         maybeVerboseLog("onConnectEvent(%d, %d, %dms)", netId, error, latencyMs);
 
-        ConnectStats connectStats = mConnectEvents.get(netId);
-        if (connectStats == null) {
-            connectStats = makeConnectStats(netId);
-            mConnectEvents.put(netId, connectStats);
-        }
-        connectStats.addEvent(error, latencyMs, ipAddr);
+        long timestamp = System.currentTimeMillis();
+        getMetricsForNetwork(timestamp, netId).addConnectResult(error, latencyMs, ipAddr);
 
         if (mNetdEventCallback != null) {
-            mNetdEventCallback.onConnectEvent(ipAddr, port, System.currentTimeMillis(), uid);
+            mNetdEventCallback.onConnectEvent(ipAddr, port, timestamp, uid);
         }
     }
 
@@ -189,11 +222,24 @@
     }
 
     public synchronized void flushStatistics(List<IpConnectivityEvent> events) {
-        flushProtos(events, mConnectEvents, IpConnectivityEventBuilder::toProto);
-        flushProtos(events, mDnsEvents, IpConnectivityEventBuilder::toProto);
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            ConnectStats stats = mNetworkMetrics.valueAt(i).connectMetrics;
+            if (stats.eventCount == 0) {
+                continue;
+            }
+            events.add(IpConnectivityEventBuilder.toProto(stats));
+        }
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            DnsEvent ev = mNetworkMetrics.valueAt(i).dnsMetrics;
+            if (ev.eventCount == 0) {
+                continue;
+            }
+            events.add(IpConnectivityEventBuilder.toProto(ev));
+        }
         for (int i = 0; i < mWakeupStats.size(); i++) {
             events.add(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
         }
+        mNetworkMetrics.clear();
         mWakeupStats.clear();
     }
 
@@ -206,8 +252,15 @@
     }
 
     public synchronized void list(PrintWriter pw) {
-        listEvents(pw, mConnectEvents, (x) -> x, "\n");
-        listEvents(pw, mDnsEvents, (x) -> x, "\n");
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            pw.println(mNetworkMetrics.valueAt(i).connectMetrics);
+        }
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            pw.println(mNetworkMetrics.valueAt(i).dnsMetrics);
+        }
+        for (NetworkMetricsSnapshot s : getNetworkMetricsSnapshots()) {
+            pw.println(s);
+        }
         for (int i = 0; i < mWakeupStats.size(); i++) {
             pw.println(mWakeupStats.valueAt(i));
         }
@@ -217,41 +270,17 @@
     }
 
     public synchronized void listAsProtos(PrintWriter pw) {
-        listEvents(pw, mConnectEvents, IpConnectivityEventBuilder::toProto, "");
-        listEvents(pw, mDnsEvents, IpConnectivityEventBuilder::toProto, "");
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).connectMetrics));
+        }
+        for (int i = 0; i < mNetworkMetrics.size(); i++) {
+            pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).dnsMetrics));
+        }
         for (int i = 0; i < mWakeupStats.size(); i++) {
             pw.print(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
         }
     }
 
-    private static <T> void flushProtos(List<IpConnectivityEvent> out, SparseArray<T> in,
-            Function<T, IpConnectivityEvent> mapper) {
-        for (int i = 0; i < in.size(); i++) {
-            out.add(mapper.apply(in.valueAt(i)));
-        }
-        in.clear();
-    }
-
-    private static <T> void listEvents(
-            PrintWriter pw, SparseArray<T> events, Function<T, Object> mapper, String separator) {
-        // Proto derived Classes have toString method that adds a \n at the end.
-        // Let the caller control that by passing in the line separator explicitly.
-        for (int i = 0; i < events.size(); i++) {
-            pw.print(mapper.apply(events.valueAt(i)));
-            pw.print(separator);
-        }
-    }
-
-    private ConnectStats makeConnectStats(int netId) {
-        long transports = getTransports(netId);
-        return new ConnectStats(netId, transports, mConnectTb, CONNECT_LATENCY_MAXIMUM_RECORDS);
-    }
-
-    private DnsEvent makeDnsEvent(int netId) {
-        long transports = getTransports(netId);
-        return new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
-    }
-
     private long getTransports(int netId) {
         // TODO: directly query ConnectivityService instead of going through Binder interface.
         NetworkCapabilities nc = mCm.getNetworkCapabilities(new Network(netId));
@@ -268,4 +297,32 @@
     private static void maybeVerboseLog(String s, Object... args) {
         if (VDBG) Log.d(TAG, String.format(s, args));
     }
+
+    /** Helper class for buffering summaries of NetworkMetrics at regular time intervals */
+    static class NetworkMetricsSnapshot {
+
+        public long timeMs;
+        public List<NetworkMetrics.Summary> stats = new ArrayList<>();
+
+        static NetworkMetricsSnapshot collect(long timeMs, SparseArray<NetworkMetrics> networkMetrics) {
+            NetworkMetricsSnapshot snapshot = new NetworkMetricsSnapshot();
+            snapshot.timeMs = timeMs;
+            for (int i = 0; i < networkMetrics.size(); i++) {
+                NetworkMetrics.Summary s = networkMetrics.valueAt(i).getPendingStats();
+                if (s != null) {
+                    snapshot.stats.add(s);
+                }
+            }
+            return snapshot;
+        }
+
+        @Override
+        public String toString() {
+            StringJoiner j = new StringJoiner(", ");
+            for (NetworkMetrics.Summary s : stats) {
+                j.add(s.toString());
+            }
+            return String.format("%tT.%tL: %s", timeMs, timeMs, j.toString());
+        }
+    }
 }