shill: Unbind DHCPConfig from it's PID when dhcpcd is killed.

This allows the destruction of the DHCPConfig instance. The patch also
refactors and cleans up a bit resetting of client state in DHCPConfig.

BUG=chromium-os:37652
TEST=unit tests; tested on device by connecting to different APs, then
'stop shill' and inspecting logs for any stale DHCPConfig object
destructions; checked connectivity.

Change-Id: I991011191b553daa970b9225b85696f20add9dea
Reviewed-on: https://gerrit.chromium.org/gerrit/42532
Tested-by: Darin Petkov <petkov@chromium.org>
Reviewed-by: Paul Stewart <pstew@chromium.org>
Reviewed-by: mukesh agrawal <quiche@chromium.org>
Commit-Queue: Darin Petkov <petkov@chromium.org>
diff --git a/dhcp_config.cc b/dhcp_config.cc
index 710775e..b2b7da3 100644
--- a/dhcp_config.cc
+++ b/dhcp_config.cc
@@ -91,9 +91,6 @@
 
   // Don't leave behind dhcpcd running.
   Stop(__func__);
-
-  // Make sure we don't get any callbacks to the destroyed instance.
-  CleanupClientState();
 }
 
 bool DHCPConfig::RequestIP() {
@@ -223,46 +220,45 @@
 }
 
 void DHCPConfig::Stop(const char *reason) {
-  if (pid_) {
-    LOG(INFO) << "Terminating " << pid_ << " (" << reason << ")";
-    if (kill(pid_, SIGTERM) < 0) {
-      PLOG(ERROR);
-      return;
-    }
-    pid_t ret;
-    int num_iterations =
-        kDHCPCDExitWaitMilliseconds / kDHCPCDExitPollMilliseconds;
-    for (int count = 0; count < num_iterations; ++count) {
-      ret = waitpid(pid_, NULL, WNOHANG);
-      if (ret == pid_ || ret == -1)
-        break;
-      usleep(kDHCPCDExitPollMilliseconds * 1000);
-      if (count == num_iterations / 2) {
-        // Make one last attempt to kill dhcpcd.
-        LOG(WARNING) << "Terminating " << pid_ << " with SIGKILL "
-                     << "(" << reason << ")";
-        kill(pid_, SIGKILL);
-      }
-    }
-    if (ret != pid_)
-      PLOG(ERROR);
+  LOG_IF(INFO, pid_) << "Stopping " << pid_ << " (" << reason << ")";
+  KillClient();
+  // KillClient waits for the client to terminate so it's safe to cleanup the
+  // state.
+  CleanupClientState();
+}
+
+void DHCPConfig::KillClient() {
+  if (!pid_) {
+    return;
   }
-  StopDHCPTimeout();
+  if (kill(pid_, SIGTERM) < 0) {
+    PLOG(ERROR);
+    return;
+  }
+  pid_t ret;
+  int num_iterations =
+      kDHCPCDExitWaitMilliseconds / kDHCPCDExitPollMilliseconds;
+  for (int count = 0; count < num_iterations; ++count) {
+    ret = waitpid(pid_, NULL, WNOHANG);
+    if (ret == pid_ || ret == -1)
+      break;
+    usleep(kDHCPCDExitPollMilliseconds * 1000);
+    if (count == num_iterations / 2) {
+      // Make one last attempt to kill dhcpcd.
+      LOG(WARNING) << "Terminating " << pid_ << " with SIGKILL.";
+      kill(pid_, SIGKILL);
+    }
+  }
+  if (ret != pid_)
+    PLOG(ERROR);
 }
 
 bool DHCPConfig::Restart() {
-  // Check to ensure that this instance doesn't get destroyed in the middle of
-  // this call. If stopping a running client while there's only one reference to
-  // this instance, we will end up destroying it when the PID is unbound from
-  // the Provider. Since the Provider doesn't invoke Restart, this would mean
-  // that Restart was erroneously executed through a bare reference.
-  CHECK(!pid_ || !HasOneRef());
-  Stop(__func__);
-  if (pid_) {
-    provider_->UnbindPID(pid_);
-  }
-  CleanupClientState();
-  return Start();
+  // Take a reference of this instance to make sure we don't get destroyed in
+  // the middle of this call.
+  DHCPConfigRefPtr me = this;
+  me->Stop(__func__);
+  return me->Start();
 }
 
 string DHCPConfig::GetIPv4AddressString(unsigned int address) {
@@ -420,18 +416,17 @@
   DHCPConfig *config = reinterpret_cast<DHCPConfig *>(data);
   config->child_watch_tag_ = 0;
   CHECK_EQ(pid, config->pid_);
-  config->CleanupClientState();
-
   // |config| instance may be destroyed after this call.
-  config->provider_->UnbindPID(pid);
+  config->CleanupClientState();
 }
 
 void DHCPConfig::CleanupClientState() {
+  SLOG(DHCP, 2) << __func__ << ": " << device_name();
+  StopDHCPTimeout();
   if (child_watch_tag_) {
     glib_->SourceRemove(child_watch_tag_);
     child_watch_tag_ = 0;
   }
-  pid_ = 0;
   proxy_.reset();
   if (lease_file_suffix_ == device_name()) {
     // If the lease file suffix was left as default, clean it up at exit.
@@ -441,6 +436,12 @@
   }
   file_util::Delete(root_.Append(
       base::StringPrintf(kDHCPCDPathFormatPID, device_name().c_str())), false);
+  if (pid_) {
+    int pid = pid_;
+    pid_ = 0;
+    // |this| instance may be destroyed after this call.
+    provider_->UnbindPID(pid);
+  }
 }
 
 void DHCPConfig::StartDHCPTimeout() {
diff --git a/dhcp_config.h b/dhcp_config.h
index 2c3d907..4465e30 100644
--- a/dhcp_config.h
+++ b/dhcp_config.h
@@ -165,6 +165,8 @@
   // Informs upper layers of the failure.
   void ProcessDHCPTimeout();
 
+  void KillClient();
+
   // Store cached copies of singletons for speed/ease of testing.
   ProxyFactory *proxy_factory_;
 
diff --git a/dhcp_config_unittest.cc b/dhcp_config_unittest.cc
index 559eb1a..5bf2223 100644
--- a/dhcp_config_unittest.cc
+++ b/dhcp_config_unittest.cc
@@ -553,15 +553,17 @@
 }
 
 TEST_F(DHCPConfigTest, Stop) {
-  // Ensure no crashes.
   const int kPID = 1 << 17;  // Ensure unknown positive PID.
   ScopedMockLog log;
   EXPECT_CALL(log, Log(_, _, _)).Times(AnyNumber());
   EXPECT_CALL(log, Log(_, _, ContainsRegex(
-      base::StringPrintf("Terminating.+%s", __func__))));
+      base::StringPrintf("Stopping.+%s", __func__))));
   config_->pid_ = kPID;
+  DHCPProvider::GetInstance()->BindPID(kPID, config_);
   config_->Stop(__func__);
   EXPECT_TRUE(config_->lease_acquisition_timeout_callback_.IsCancelled());
+  EXPECT_FALSE(DHCPProvider::GetInstance()->GetConfig(kPID));
+  EXPECT_FALSE(config_->pid_);
 }
 
 TEST_F(DHCPConfigTest, StopDuringRequestIP) {