Partial buckets on app upgrade and fix duration. Statsd will create partial buckets in all metrics producers when an app is upgraded so that we can separate metrics between different versions of an app. By looking at the uid map changes, we can tell which app versions belong to a bucket; for metrics that are not affected by an app version, we can instead join the buckets together. To simplify the logic, the ends of the full buckets are always aligned to when the metric producers were created. These boundaries are computed on the fly by using the bucket number and the metric producers' start times. We keep the anomaly trackers to only be given full buckets; we buffer the partial buckets within each metric producer. Duration metric's MAX_SPARSE is fixed to be implemented as such. In addition, after further discussion, we find anomaly detection on MAX_SPARSE to be unnecessary, so this functionality is removed. Test: Unit-tests added and modified, passed on marlin-eng. Change-Id: I5ff7a9c7f05c406e9faf400c6a39162970ded102

commit: 27785a8a4a684c831c18f7189a6fa1b98c3573e6 [log] [tgz]
author: David Chen <dwchen@google.com> Fri Jan 19 17:06:45 2018 -0800
committer: David Chen <dwchen@google.com> Fri Feb 02 13:30:10 2018 -0800
tree: dd4bac285defe0f60e912970cf1d0bc606357118
parent: c326b50fa009be5e30797f37c23ec7fa2c3c29aa [diff] [blame]
diff --git a/cmds/statsd/src/metrics/MetricProducer.h b/cmds/statsd/src/metrics/MetricProducer.h
index 3b1498f..542dd8a 100644
--- a/cmds/statsd/src/metrics/MetricProducer.h
+++ b/cmds/statsd/src/metrics/MetricProducer.h

@@ -53,15 +53,32 @@
 
     virtual ~MetricProducer(){};
 
-    void notifyAppUpgrade(const string& apk, const int uid, const int64_t version) override{
+    /**
+     * Forces this metric to split into a partial bucket right now. If we're past a full bucket, we
+     * first call the standard flushing code to flush up to the latest full bucket. Then we call
+     * the flush again when the end timestamp is forced to be now, and then after flushing, update
+     * the start timestamp to be now.
+     */
+    void notifyAppUpgrade(const uint64_t& eventTimeNs, const string& apk, const int uid,
+                          const int64_t version) override {
+        std::lock_guard<std::mutex> lock(mMutex);
+
+        if (eventTimeNs > getCurrentBucketEndTimeNs()) {
+            // Flush full buckets on the normal path up to the latest bucket boundary.
+            flushIfNeededLocked(eventTimeNs);
+        }
+        // Now flush a partial bucket.
+        flushCurrentBucketLocked(eventTimeNs);
+        mCurrentBucketStartTimeNs = eventTimeNs;
+        // Don't update the current bucket number so that the anomaly tracker knows this bucket
+        // is a partial bucket and can merge it with the previous bucket.
+    };
+
+    void notifyAppRemoved(const uint64_t& eventTimeNs, const string& apk, const int uid) override{
             // TODO: Implement me.
     };
 
-    void notifyAppRemoved(const string& apk, const int uid) override{
-            // TODO: Implement me.
-    };
-
-    void onUidMapReceived() override{
+    void onUidMapReceived(const uint64_t& eventTimeNs) override{
             // TODO: Implement me.
     };
 
@@ -87,11 +104,12 @@
     };
 
     // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp.
+    // This method clears all the past buckets.
     void onDumpReport(const uint64_t dumpTimeNs, android::util::ProtoOutputStream* protoOutput) {
         std::lock_guard<std::mutex> lock(mMutex);
         return onDumpReportLocked(dumpTimeNs, protoOutput);
     }
-
+    // This method does not clear the past buckets.
     void onDumpReport(const uint64_t dumpTimeNs, StatsLogReport* report) {
         std::lock_guard<std::mutex> lock(mMutex);
         return onDumpReportLocked(dumpTimeNs, report);
@@ -136,15 +154,43 @@
     virtual size_t byteSizeLocked() const = 0;
     virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0;
 
+    /**
+     * Flushes the current bucket if the eventTime is after the current bucket's end time.
+     */
+    virtual void flushIfNeededLocked(const uint64_t& eventTime){};
+
+    /**
+     * For metrics that aggregate (ie, every metric producer except for EventMetricProducer),
+     * we need to be able to flush the current buckets on demand (ie, end the current bucket and
+     * start new bucket). If this function is called when eventTimeNs is greater than the current
+     * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise,
+     * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket
+     * number are not changed by this function. This method should only be called by
+     * flushIfNeededLocked or the app upgrade handler; the caller MUST update the bucket timestamp
+     * and bucket number as needed.
+     */
+    virtual void flushCurrentBucketLocked(const uint64_t& eventTimeNs){};
+
+    // Convenience to compute the current bucket's end time, which is always aligned with the
+    // start time of the metric.
+    uint64_t getCurrentBucketEndTimeNs() {
+        return mStartTimeNs + (mCurrentBucketNum + 1) * mBucketSizeNs;
+    }
+
     const int64_t mMetricId;
 
     const ConfigKey mConfigKey;
 
-    // The start time for the current in memory metrics data.
+    // The time when this metric producer was first created. The end time for the current bucket
+    // can be computed from this based on mCurrentBucketNum.
     uint64_t mStartTimeNs;
 
+    // Start time may not be aligned with the start of statsd if there is an app upgrade in the
+    // middle of a bucket.
     uint64_t mCurrentBucketStartTimeNs;
 
+    // Used by anomaly detector to track which bucket we are in. This is not sent with the produced
+    // report.
     uint64_t mCurrentBucketNum;
 
     int64_t mBucketSizeNs;
commit	27785a8a4a684c831c18f7189a6fa1b98c3573e6	[log] [tgz]
author	David Chen <dwchen@google.com>	Fri Jan 19 17:06:45 2018 -0800
committer	David Chen <dwchen@google.com>	Fri Feb 02 13:30:10 2018 -0800
tree	dd4bac285defe0f60e912970cf1d0bc606357118
parent	c326b50fa009be5e30797f37c23ec7fa2c3c29aa [diff] [blame]