AU: When server dies, don't retry forever

BUG=4871
TEST=attached unittests

Review URL: http://codereview.chromium.org/3010009
diff --git a/http_fetcher_unittest.cc b/http_fetcher_unittest.cc
index 4cabd1c..a579ea3 100644
--- a/http_fetcher_unittest.cc
+++ b/http_fetcher_unittest.cc
@@ -106,7 +106,7 @@
     // request that the server exit itself
     LOG(INFO) << "running wget to exit";
     int rc = system((string("wget -t 1 --output-document=/dev/null ") +
-                    LocalServerUrlForPath("/quitquitquit")).c_str());
+                     LocalServerUrlForPath("/quitquitquit")).c_str());
     LOG(INFO) << "done running wget to exit";
     if (validate_quit_)
       EXPECT_EQ(0, rc);
@@ -174,7 +174,7 @@
 }  // namespace {}
 
 TYPED_TEST(HttpFetcherTest, SimpleTest) {
-  GMainLoop *loop = g_main_loop_new(g_main_context_default(), FALSE);
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
   {
     HttpFetcherTestDelegate delegate;
     delegate.loop_ = loop;
@@ -193,7 +193,7 @@
 }
 
 TYPED_TEST(HttpFetcherTest, SimpleBigTest) {
-  GMainLoop *loop = g_main_loop_new(g_main_context_default(), FALSE);
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
   {
     HttpFetcherTestDelegate delegate;
     delegate.loop_ = loop;
@@ -246,7 +246,7 @@
 }  // namespace {}
 
 TYPED_TEST(HttpFetcherTest, PauseTest) {
-  GMainLoop *loop = g_main_loop_new(g_main_context_default(), FALSE);
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
   {
     PausingHttpFetcherTestDelegate delegate;
     scoped_ptr<HttpFetcher> fetcher(this->NewLargeFetcher());
@@ -306,7 +306,7 @@
 }  // namespace {}
 
 TYPED_TEST(HttpFetcherTest, AbortTest) {
-  GMainLoop *loop = g_main_loop_new(g_main_context_default(), FALSE);
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
   {
     AbortingHttpFetcherTestDelegate delegate;
     scoped_ptr<HttpFetcher> fetcher(this->NewLargeFetcher());
@@ -349,7 +349,7 @@
 TYPED_TEST(HttpFetcherTest, FlakyTest) {
   if (this->IsMock())
     return;
-  GMainLoop *loop = g_main_loop_new(g_main_context_default(), FALSE);
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
   {
     FlakyHttpFetcherTestDelegate delegate;
     delegate.loop_ = loop;
@@ -377,4 +377,74 @@
   g_main_loop_unref(loop);
 }
 
+namespace {
+class FailureHttpFetcherTestDelegate : public HttpFetcherDelegate {
+ public:
+  FailureHttpFetcherTestDelegate() : loop_(NULL), server_(NULL) {}
+  virtual void ReceivedBytes(HttpFetcher* fetcher,
+                             const char* bytes, int length) {
+    if (server_) {
+      LOG(INFO) << "Stopping server";
+      delete server_;
+      LOG(INFO) << "server stopped";
+      server_ = NULL;
+    }
+  }
+  virtual void TransferComplete(HttpFetcher* fetcher, bool successful) {
+    EXPECT_FALSE(successful);
+    g_main_loop_quit(loop_);
+  }
+  GMainLoop* loop_;
+  PythonHttpServer* server_;
+};
+}  // namespace {}
+
+
+TYPED_TEST(HttpFetcherTest, FailureTest) {
+  if (this->IsMock())
+    return;
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
+  {
+    FailureHttpFetcherTestDelegate delegate;
+    delegate.loop_ = loop;
+    scoped_ptr<HttpFetcher> fetcher(this->NewSmallFetcher());
+    fetcher->set_delegate(&delegate);
+
+    StartTransferArgs start_xfer_args = {
+      fetcher.get(),
+      LocalServerUrlForPath(this->SmallUrl())
+    };
+
+    g_timeout_add(0, StartTransfer, &start_xfer_args);
+    g_main_loop_run(loop);
+
+    // Exiting and testing happens in the delegate
+  }
+  g_main_loop_unref(loop);
+}
+
+TYPED_TEST(HttpFetcherTest, ServerDiesTest) {
+  if (this->IsMock())
+    return;
+  GMainLoop* loop = g_main_loop_new(g_main_context_default(), FALSE);
+  {
+    FailureHttpFetcherTestDelegate delegate;
+    delegate.loop_ = loop;
+    delegate.server_ = new PythonHttpServer;
+    scoped_ptr<HttpFetcher> fetcher(this->NewSmallFetcher());
+    fetcher->set_delegate(&delegate);
+
+    StartTransferArgs start_xfer_args = {
+      fetcher.get(),
+      LocalServerUrlForPath("/flaky")
+    };
+
+    g_timeout_add(0, StartTransfer, &start_xfer_args);
+    g_main_loop_run(loop);
+
+    // Exiting and testing happens in the delegate
+  }
+  g_main_loop_unref(loop);
+}
+
 }  // namespace chromeos_update_engine
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index 7a032fe..e2f18af 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -14,6 +14,10 @@
 
 namespace chromeos_update_engine {
 
+namespace {
+const int kMaxRetriesCount = 20;
+}
+
 LibcurlHttpFetcher::~LibcurlHttpFetcher() {
   CleanUp();
 }
@@ -66,9 +70,9 @@
   transfer_size_ = -1;
   bytes_downloaded_ = 0;
   resume_offset_ = 0;
-  do {
-    ResumeTransfer(url);
-  } while (CurlPerformOnce());
+  retry_count_ = 0;
+  ResumeTransfer(url);
+  CurlPerformOnce();
 }
 
 void LibcurlHttpFetcher::TerminateTransfer() {
@@ -86,15 +90,37 @@
     retcode = curl_multi_perform(curl_multi_handle_, &running_handles);
   }
   if (0 == running_handles) {
+    long http_response_code = 0;
+    if (curl_easy_getinfo(curl_handle_,
+                          CURLINFO_RESPONSE_CODE,
+                          &http_response_code) == CURLE_OK) {
+      LOG(INFO) << "HTTP response code: " << http_response_code;
+    } else {
+      LOG(ERROR) << "Unable to get http response code.";
+    }
+    
     // we're done!
     CleanUp();
 
     if ((transfer_size_ >= 0) && (bytes_downloaded_ < transfer_size_)) {
       // Need to restart transfer
-      return true;
+      retry_count_++;
+      LOG(INFO) << "Restarting transfer b/c we finished, had downloaded "
+                << bytes_downloaded_ << " bytes, but transfer_size_ is "
+                << transfer_size_ << ". retry_count: " << retry_count_;
+      if (retry_count_ > kMaxRetriesCount) {
+        if (delegate_)
+          delegate_->TransferComplete(this, false);  // success
+      } else {
+        g_timeout_add(5 * 1000,
+                      &LibcurlHttpFetcher::StaticRetryTimeoutCallback,
+                      this);
+      }
+      return false;
     } else {
       if (delegate_) {
-        delegate_->TransferComplete(this, true);  // success
+        // success is when http_response_code is 200
+        delegate_->TransferComplete(this, http_response_code == 200);
       }
     }
   } else {
@@ -217,9 +243,7 @@
 
 bool LibcurlHttpFetcher::FDCallback(GIOChannel *source,
                                     GIOCondition condition) {
-  while (CurlPerformOnce()) {
-    ResumeTransfer(url_);
-  }
+  CurlPerformOnce();
   // We handle removing of this source elsewhere, so we always return true.
   // The docs say, "the function should return FALSE if the event source
   // should be removed."
@@ -227,6 +251,12 @@
   return true;
 }
 
+gboolean LibcurlHttpFetcher::RetryTimeoutCallback() {
+  ResumeTransfer(url_);
+  CurlPerformOnce();
+  return FALSE;  // Don't have glib auto call this callback again
+}
+
 gboolean LibcurlHttpFetcher::TimeoutCallback() {
   if (!transfer_in_progress_)
     return TRUE;
@@ -236,9 +266,7 @@
   // timeout callback then.
   // TODO(adlr): optimize by checking if we can keep this timeout callback.
   //timeout_source_ = NULL;
-  while (CurlPerformOnce()) {
-    ResumeTransfer(url_);
-  }
+  CurlPerformOnce();
   return TRUE;
 }
 
diff --git a/libcurl_http_fetcher.h b/libcurl_http_fetcher.h
index fd55d8e..a7799cb 100644
--- a/libcurl_http_fetcher.h
+++ b/libcurl_http_fetcher.h
@@ -23,7 +23,7 @@
   LibcurlHttpFetcher()
       : curl_multi_handle_(NULL), curl_handle_(NULL),
         timeout_source_(NULL), transfer_in_progress_(false),
-        idle_ms_(1000) {}
+        retry_count_(0), idle_ms_(1000) {}
 
   // Cleans up all internal state. Does not notify delegate
   ~LibcurlHttpFetcher();
@@ -73,6 +73,11 @@
   static gboolean StaticTimeoutCallback(gpointer data) {
     return reinterpret_cast<LibcurlHttpFetcher*>(data)->TimeoutCallback();
   }
+  
+  gboolean RetryTimeoutCallback();
+  static gboolean StaticRetryTimeoutCallback(void* arg) {
+    return static_cast<LibcurlHttpFetcher*>(arg)->RetryTimeoutCallback();
+  }
 
   // Calls into curl_multi_perform to let libcurl do its work. Returns after
   // curl_multi_perform is finished, which may actually be after more than
@@ -123,6 +128,9 @@
   // If we resumed an earlier transfer, data offset that we used for the
   // new connection.  0 otherwise.
   off_t resume_offset_;
+  
+  // Number of resumes performed.
+  int retry_count_;
 
   long idle_ms_;
   DISALLOW_COPY_AND_ASSIGN(LibcurlHttpFetcher);