update_engine: UM: Async request expiration handled differently.

As discussed on the tracker issue, we're changing the way an async
policy request expiration timeout is being handled: instead of failing
the policy request entirely, this now only causes the UpdateManager to
dump the evaluation context, reset the expiration deadline and
reevaluate the policy (which is necessary in order for evaluation time
and corresponding timeouts to be recomputed).  This is aimed to ensure
that policies are allowed to block for arbitrarily long periods, while
still emitting useful information to the log (which will help diagnose
if this is due to an implementation error).

Since the expiration timeout no longer returns control to the caller, we
remove it from the AsyncPolicyRequest() API. Instead, we use a single
timeout value, which is set during the UpdateManager construction and
used for all policy calls. By default, the update engine sets it to 12
hours; for testing and debugging purposes, a smaller value is used.

This CL also forbids the default (fallback) policy from blocking,
forcing a failure instead; a situation like that makes no sense anyway,
and may lead to inconsistent return values leaking to the caller.

BUG=chromium:401687
TEST=Unit tests.

Change-Id: I0bf60875bb7f524c99ed72dac61720633ab2061b
Reviewed-on: https://chromium-review.googlesource.com/211647
Tested-by: Gilad Arnold <garnold@chromium.org>
Reviewed-by: Alex Vakulenko <avakulenko@chromium.org>
Reviewed-by: Alex Deymo <deymo@chromium.org>
Commit-Queue: Gilad Arnold <garnold@chromium.org>
diff --git a/real_system_state.cc b/real_system_state.cc
index af227da..44ff5fb 100644
--- a/real_system_state.cc
+++ b/real_system_state.cc
@@ -54,7 +54,8 @@
   }
   update_manager_.reset(
       new chromeos_update_manager::UpdateManager(
-          &clock_, base::TimeDelta::FromSeconds(5), um_state));
+          &clock_, base::TimeDelta::FromSeconds(5),
+          base::TimeDelta::FromHours(12), um_state));
 
   if (!payload_state_.Initialize(this)) {
     LOG(ERROR) << "Failed to initialize the payload state object.";
diff --git a/update_manager/evaluation_context.cc b/update_manager/evaluation_context.cc
index 2d5d705..04bc3ee 100644
--- a/update_manager/evaluation_context.cc
+++ b/update_manager/evaluation_context.cc
@@ -52,9 +52,10 @@
                                      TimeDelta expiration_timeout)
     : clock_(clock),
       evaluation_timeout_(evaluation_timeout),
-      expiration_monotonic_deadline_(MonotonicDeadline(expiration_timeout)),
+      expiration_timeout_(expiration_timeout),
       weak_ptr_factory_(this) {
   ResetEvaluation();
+  ResetExpiration();
 }
 
 EvaluationContext::~EvaluationContext() {
@@ -132,6 +133,11 @@
   }
 }
 
+void EvaluationContext::ResetExpiration() {
+  expiration_monotonic_deadline_ = MonotonicDeadline(expiration_timeout_);
+  is_expired_ = false;
+}
+
 bool EvaluationContext::RunOnValueChangeOrTimeout(Closure callback) {
   // Check that the method was not called more than once.
   if (callback_.get() != nullptr) {
diff --git a/update_manager/evaluation_context.h b/update_manager/evaluation_context.h
index 909201e..81574d4 100644
--- a/update_manager/evaluation_context.h
+++ b/update_manager/evaluation_context.h
@@ -86,6 +86,11 @@
   // be called right before any new evaluation starts.
   void ResetEvaluation();
 
+  // Clears the expiration status of the EvaluationContext and resets its
+  // expiration timeout based on |expiration_timeout_|. This should be called if
+  // expiration occurred, prior to re-evaluating the policy.
+  void ResetExpiration();
+
   // Schedules the passed |callback| closure to be called when a cached
   // variable changes its value, a polling interval passes, or the context
   // expiration occurs. If none of these events can happen, for example if
@@ -175,9 +180,11 @@
   // current evaluation should finish.
   base::Time evaluation_monotonic_deadline_;
 
-  // The monotonic clock deadline at which expiration occurs. This is set once
-  // during construction.
-  const base::Time expiration_monotonic_deadline_;
+  // The expiration timeout of the evaluation context.
+  const base::TimeDelta expiration_timeout_;
+
+  // The monotonic clock deadline at which expiration occurs.
+  base::Time expiration_monotonic_deadline_;
 
   base::WeakPtrFactory<EvaluationContext> weak_ptr_factory_;
 
diff --git a/update_manager/evaluation_context_unittest.cc b/update_manager/evaluation_context_unittest.cc
index 52755fc..9835648 100644
--- a/update_manager/evaluation_context_unittest.cc
+++ b/update_manager/evaluation_context_unittest.cc
@@ -238,7 +238,7 @@
 }
 
 // Test that callback is called when evaluation context expires, and that it
-// cannot be used again.
+// cannot be used again unless the expiration deadline is reset.
 TEST_F(UmEvaluationContextTest, RunOnValueChangeOrTimeoutExpires) {
   fake_async_var_.reset(new string("Async value"));
   eval_ctx_->GetValue(&fake_async_var_);
@@ -253,6 +253,10 @@
 
   // Ensure that we cannot reschedule an evaluation.
   EXPECT_FALSE(eval_ctx_->RunOnValueChangeOrTimeout(Bind(&DoNothing)));
+
+  // Ensure that we can reschedule an evaluation after resetting expiration.
+  eval_ctx_->ResetExpiration();
+  EXPECT_TRUE(eval_ctx_->RunOnValueChangeOrTimeout(Bind(&DoNothing)));
 }
 
 // Test that we clear the events when destroying the EvaluationContext.
diff --git a/update_manager/fake_update_manager.h b/update_manager/fake_update_manager.h
index b509454..34a3813 100644
--- a/update_manager/fake_update_manager.h
+++ b/update_manager/fake_update_manager.h
@@ -15,7 +15,8 @@
 class FakeUpdateManager : public UpdateManager {
  public:
   explicit FakeUpdateManager(chromeos_update_engine::ClockInterface* clock)
-      : UpdateManager(clock, base::TimeDelta::FromSeconds(5), new FakeState()) {
+      : UpdateManager(clock, base::TimeDelta::FromSeconds(5),
+                      base::TimeDelta::FromHours(1), new FakeState()) {
     // The FakeUpdateManager uses a DefaultPolicy.
     set_policy(new DefaultPolicy(clock));
   }
diff --git a/update_manager/update_manager-inl.h b/update_manager/update_manager-inl.h
index 982ec93..649dcff 100644
--- a/update_manager/update_manager-inl.h
+++ b/update_manager/update_manager-inl.h
@@ -21,43 +21,43 @@
                                         std::string*, R*,
                                         Args...) const,
     R* result, Args... args) {
-  const std::string policy_name = policy_->PolicyRequestName(policy_method);
-  const bool timed_out = ec->is_expired();
+  // If expiration timeout fired, dump the context and reset expiration.
+  // IMPORTANT: We must still proceed with evaluation of the policy in this
+  // case, so that the evaluation time (and corresponding reevaluation timeouts)
+  // are readjusted.
+  if (ec->is_expired()) {
+    LOG(WARNING) << "Request timed out, evaluation context: "
+                 << ec->DumpContext();
+    ec->ResetExpiration();
+  }
 
   // Reset the evaluation context.
   ec->ResetEvaluation();
 
-  LOG(INFO) << "Evaluating " << policy_name << " START";
+  const std::string policy_name = policy_->PolicyRequestName(policy_method);
+  LOG(INFO) << policy_name << ": START";
 
-  // First try calling the actual policy, if the request did not time out.
-  EvalStatus status = EvalStatus::kFailed;
-  if (timed_out) {
-    LOG(WARNING) << "Skipping reevaluation because the request timed out.";
-  } else {
-    std::string error;
-    status = (policy_.get()->*policy_method)(ec, state_.get(), &error, result,
-                                             args...);
-    LOG_IF(WARNING, status == EvalStatus::kFailed)
-        << "Evaluating policy failed: " << error
-        << "\nEvaluation context: " << ec->DumpContext();
-  }
-
+  // First try calling the actual policy.
+  std::string error;
+  EvalStatus status = (policy_.get()->*policy_method)(ec, state_.get(), &error,
+                                                      result, args...);
   // If evaluating the main policy failed, defer to the default policy.
   if (status == EvalStatus::kFailed) {
-    std::string error;
+    LOG(WARNING) << "Evaluating policy failed: " << error
+                 << "\nEvaluation context: " << ec->DumpContext();
+    error.clear();
     status = (default_policy_.*policy_method)(ec, state_.get(), &error, result,
                                               args...);
-    LOG_IF(WARNING, status == EvalStatus::kFailed)
-        << "Evaluating default policy failed: " << error;
-
-    if (timed_out && status == EvalStatus::kAskMeAgainLater) {
-      LOG(WARNING) << "Default policy would block but request timed out, "
-                   << "forcing failure.";
+    if (status == EvalStatus::kFailed) {
+      LOG(WARNING) << "Evaluating default policy failed: " << error;
+    } else if (status == EvalStatus::kAskMeAgainLater) {
+      LOG(ERROR)
+          << "Default policy would block; this is a bug, forcing failure.";
       status = EvalStatus::kFailed;
     }
   }
 
-  LOG(INFO) << "Evaluating " << policy_name << " END";
+  LOG(INFO) << policy_name << ": END";
 
   return status;
 }
@@ -122,13 +122,12 @@
 template<typename R, typename... ActualArgs, typename... ExpectedArgs>
 void UpdateManager::AsyncPolicyRequest(
     base::Callback<void(EvalStatus, const R& result)> callback,
-    base::TimeDelta request_timeout,
     EvalStatus (Policy::*policy_method)(EvaluationContext*, State*,
                                         std::string*, R*,
                                         ExpectedArgs...) const,
     ActualArgs... args) {
   scoped_refptr<EvaluationContext> ec =
-      new EvaluationContext(clock_, evaluation_timeout_, request_timeout);
+      new EvaluationContext(clock_, evaluation_timeout_, expiration_timeout_);
   // IMPORTANT: To ensure that ActualArgs can be converted to ExpectedArgs, we
   // explicitly instantiate UpdateManager::OnPolicyReadyToEvaluate with the
   // latter in lieu of the former.
diff --git a/update_manager/update_manager.cc b/update_manager/update_manager.cc
index fdedc29..fb3800c 100644
--- a/update_manager/update_manager.cc
+++ b/update_manager/update_manager.cc
@@ -10,9 +10,11 @@
 namespace chromeos_update_manager {
 
 UpdateManager::UpdateManager(chromeos_update_engine::ClockInterface* clock,
-                             base::TimeDelta evaluation_timeout, State* state)
+                             base::TimeDelta evaluation_timeout,
+                             base::TimeDelta expiration_timeout, State* state)
       : default_policy_(clock), state_(state), clock_(clock),
-        evaluation_timeout_(evaluation_timeout) {
+        evaluation_timeout_(evaluation_timeout),
+        expiration_timeout_(expiration_timeout) {
   // TODO(deymo): Make it possible to replace this policy with a different
   // implementation with a build-time flag.
   policy_.reset(new ChromeOSPolicy());
diff --git a/update_manager/update_manager.h b/update_manager/update_manager.h
index 0b04c07..2cd21d4 100644
--- a/update_manager/update_manager.h
+++ b/update_manager/update_manager.h
@@ -25,7 +25,8 @@
   // Creates the UpdateManager instance, assuming ownership on the provided
   // |state|.
   UpdateManager(chromeos_update_engine::ClockInterface* clock,
-                base::TimeDelta evaluation_timeout, State* state);
+                base::TimeDelta evaluation_timeout,
+                base::TimeDelta expiration_timeout, State* state);
 
   virtual ~UpdateManager() {}
 
@@ -53,17 +54,15 @@
 
   // Evaluates the given |policy_method| policy with the provided |args|
   // arguments and calls the |callback| callback with the result when done.
-  // Evaluation is not allowed to exceed |request_timeout|.
   //
   // If the policy implementation should block, returning a
   // EvalStatus::kAskMeAgainLater status the Update Manager will re-evaluate the
   // policy until another status is returned. If the policy implementation based
   // its return value solely on const variables, the callback will be called
-  // with the EvalStatus::kAskMeAgainLater status.
+  // with the EvalStatus::kAskMeAgainLater status (which indicates an error).
   template<typename R, typename... ActualArgs, typename... ExpectedArgs>
   void AsyncPolicyRequest(
       base::Callback<void(EvalStatus, const R& result)> callback,
-      base::TimeDelta request_timeout,
       EvalStatus (Policy::*policy_method)(EvaluationContext*, State*,
                                           std::string*, R*,
                                           ExpectedArgs...) const,
@@ -83,7 +82,7 @@
   FRIEND_TEST(UmUpdateManagerTest, PolicyRequestCallsDefaultOnError);
   FRIEND_TEST(UmUpdateManagerTest, PolicyRequestDoesntBlockDeathTest);
   FRIEND_TEST(UmUpdateManagerTest, AsyncPolicyRequestDelaysEvaluation);
-  FRIEND_TEST(UmUpdateManagerTest, AsyncPolicyRequestDoesNotTimeOut);
+  FRIEND_TEST(UmUpdateManagerTest, AsyncPolicyRequestTimeoutDoesNotFire);
   FRIEND_TEST(UmUpdateManagerTest, AsyncPolicyRequestTimesOut);
 
   // EvaluatePolicy() evaluates the passed |policy_method| method on the current
@@ -129,6 +128,9 @@
   // Timeout for a policy evaluation.
   const base::TimeDelta evaluation_timeout_;
 
+  // Timeout for expiration of the evaluation context, used for async requests.
+  const base::TimeDelta expiration_timeout_;
+
   DISALLOW_COPY_AND_ASSIGN(UpdateManager);
 };
 
diff --git a/update_manager/update_manager_unittest.cc b/update_manager/update_manager_unittest.cc
index 32fd5ac..cb9b61b 100644
--- a/update_manager/update_manager_unittest.cc
+++ b/update_manager/update_manager_unittest.cc
@@ -61,7 +61,7 @@
   virtual void SetUp() {
     fake_state_ = new FakeState();
     umut_.reset(new UpdateManager(&fake_clock_, TimeDelta::FromSeconds(5),
-                                  fake_state_));
+                                  TimeDelta::FromSeconds(1), fake_state_));
   }
 
   FakeState* fake_state_;  // Owned by the umut_.
@@ -104,12 +104,14 @@
   virtual std::string PolicyName() const override { return "LazyPolicy"; }
 };
 
-// A policy that sleeps and returns EvalStatus::kAskMeAgainlater. Will check
-// that time is greater than a given threshold (if non-zero). Increments a
-// counter every time it is being queried, if a pointer to it is provided.
+// A policy that sleeps for a predetermined amount of time, then checks for a
+// wallclock-based time threshold (if given) and returns
+// EvalStatus::kAskMeAgainLater if not passed; otherwise, returns
+// EvalStatus::kSucceeded. Increments a counter every time it is being queried,
+// if a pointer to it is provided.
 class DelayPolicy : public DefaultPolicy {
  public:
-  DelayPolicy(int sleep_secs, base::Time time_threshold, int* num_called_p)
+  DelayPolicy(int sleep_secs, Time time_threshold, int* num_called_p)
       : sleep_secs_(sleep_secs), time_threshold_(time_threshold),
         num_called_p_(num_called_p) {}
   virtual EvalStatus UpdateCheckAllowed(EvaluationContext* ec, State* state,
@@ -117,12 +119,17 @@
                                         UpdateCheckParams* result) const {
     if (num_called_p_)
       (*num_called_p_)++;
-    sleep(sleep_secs_);
-    // We check for a time threshold to ensure that the policy has some
-    // non-constant dependency. The threshold is far enough in the future to
-    // ensure that it does not fire immediately.
-    if (time_threshold_ < base::Time::Max())
-      ec->IsWallclockTimeGreaterThan(time_threshold_);
+
+    // Sleep for a predetermined amount of time.
+    if (sleep_secs_ > 0)
+      sleep(sleep_secs_);
+
+    // Check for a time threshold. This can be used to ensure that the policy
+    // has some non-constant dependency.
+    if (time_threshold_ < Time::Max() &&
+        ec->IsWallclockTimeGreaterThan(time_threshold_))
+      return EvalStatus::kSucceeded;
+
     return EvalStatus::kAskMeAgainLater;
   }
 
@@ -131,7 +138,7 @@
 
  private:
   int sleep_secs_;
-  base::Time time_threshold_;
+  Time time_threshold_;
   int* num_called_p_;
 };
 
@@ -201,15 +208,14 @@
   Callback<void(EvalStatus, const UpdateCheckParams&)> callback = Bind(
       AccumulateCallsCallback<UpdateCheckParams>, &calls);
 
-  umut_->AsyncPolicyRequest(callback, base::TimeDelta::FromSeconds(5),
-                            &Policy::UpdateCheckAllowed);
+  umut_->AsyncPolicyRequest(callback, &Policy::UpdateCheckAllowed);
   // The callback should wait until we run the main loop for it to be executed.
   EXPECT_EQ(0, calls.size());
   chromeos_update_engine::RunGMainLoopMaxIterations(100);
   EXPECT_EQ(1, calls.size());
 }
 
-TEST_F(UmUpdateManagerTest, AsyncPolicyRequestDoesNotTimeOut) {
+TEST_F(UmUpdateManagerTest, AsyncPolicyRequestTimeoutDoesNotFire) {
   // Set up an async policy call to return immediately, then wait a little and
   // ensure that the timeout event does not fire.
   int num_called = 0;
@@ -219,8 +225,7 @@
   Callback<void(EvalStatus, const UpdateCheckParams&)> callback =
       Bind(AccumulateCallsCallback<UpdateCheckParams>, &calls);
 
-  umut_->AsyncPolicyRequest(callback, base::TimeDelta::FromSeconds(1),
-                            &Policy::UpdateCheckAllowed);
+  umut_->AsyncPolicyRequest(callback, &Policy::UpdateCheckAllowed);
   // Run the main loop, ensure that policy was attempted once before deferring
   // to the default.
   chromeos_update_engine::RunGMainLoopMaxIterations(100);
@@ -236,33 +241,42 @@
 }
 
 TEST_F(UmUpdateManagerTest, AsyncPolicyRequestTimesOut) {
-  // Set up an async policy call to exceed its overall execution time, and make
-  // sure that it is aborted. Also ensure that the async call is not reattempted
-  // after the timeout fires by waiting pas the time threshold for reevaluation.
+  // Set up an async policy call to exceed its expiration timeout, make sure
+  // that the default policy was not used (no callback) and that evaluation is
+  // reattempted.
   int num_called = 0;
   umut_->set_policy(new DelayPolicy(
-          2, fake_clock_.GetWallclockTime() + base::TimeDelta::FromSeconds(3),
+          0, fake_clock_.GetWallclockTime() + TimeDelta::FromSeconds(3),
           &num_called));
 
   vector<pair<EvalStatus, UpdateCheckParams>> calls;
   Callback<void(EvalStatus, const UpdateCheckParams&)> callback =
       Bind(AccumulateCallsCallback<UpdateCheckParams>, &calls);
 
-  umut_->AsyncPolicyRequest(callback, base::TimeDelta::FromSeconds(1),
-                            &Policy::UpdateCheckAllowed);
+  umut_->AsyncPolicyRequest(callback, &Policy::UpdateCheckAllowed);
   // Run the main loop, ensure that policy was attempted once but the callback
   // was not invoked.
   chromeos_update_engine::RunGMainLoopMaxIterations(100);
   EXPECT_EQ(1, num_called);
   EXPECT_EQ(0, calls.size());
-  // Wait for the time threshold to be satisfied, run the main loop again,
-  // ensure that reevaluation was not attempted but the callback invoked.
+  // Wait for the expiration timeout to expire, run the main loop again,
+  // ensure that reevaluation occurred but callback was not invoked (i.e.
+  // default policy was not consulted).
   sleep(2);
+  fake_clock_.SetWallclockTime(fake_clock_.GetWallclockTime() +
+                               TimeDelta::FromSeconds(2));
   chromeos_update_engine::RunGMainLoopMaxIterations(10);
-  EXPECT_EQ(1, num_called);
+  EXPECT_EQ(2, num_called);
+  EXPECT_EQ(0, calls.size());
+  // Wait for reevaluation due to delay to happen, ensure that it occurs and
+  // that the callback is invoked.
+  sleep(2);
+  fake_clock_.SetWallclockTime(fake_clock_.GetWallclockTime() +
+                               TimeDelta::FromSeconds(2));
+  chromeos_update_engine::RunGMainLoopMaxIterations(10);
+  EXPECT_EQ(3, num_called);
   ASSERT_EQ(1, calls.size());
   EXPECT_EQ(EvalStatus::kSucceeded, calls[0].first);
-  EXPECT_EQ(true, calls[0].second.updates_enabled);
 }
 
 }  // namespace chromeos_update_manager