shill: Configures netlink broadcast messaging with raw sockets.

This CL provides an interface for software to request the netlink
message_type and available multicast groups corresponding to a netlink
family name.  The new software completely replaces the old event_type
stuff that utilized libnl to get this information (and, as such, it is
the last nail in the coffin of using libnl for netlink communication
with the kernel).  |NetlinkMessage|s now get their message_type from
Config80211.

Also includes code to handle multi-part responses to a netlink message.

BUG=chromium:218211
TEST=unittests and manual tests.  The manual tests consist of:
  - Start shill with log=-10/wifi.  On the target, in a shell window,
    do the following:
      o stop shill
      o shill --log-level=-10 --log-scopes=wifi
  - Wait five seconds (the code will do everything it needs to at
    startup).
  - Look in /var/log/net.log and verify the following:
      o a CTRL_CMD_GETFAMILY message is sent to the kernel (just search
        for the 'CTRL_CMD_GETFAMILY' string.
      o a CTRL_CMD_NEWFAMILY message is received from the kernel
      o after those messages, verify that one or more
        NL80211_CMD_TRIGGER_SCAN messages received from the kernel.

Change-Id: I04571bdff9908ad8bd39a3a64a24e7e11074bf18
Reviewed-on: https://gerrit.chromium.org/gerrit/44770
Commit-Queue: Wade Guthrie <wdg@chromium.org>
Reviewed-by: Wade Guthrie <wdg@chromium.org>
Tested-by: Wade Guthrie <wdg@chromium.org>
Reviewed-by: mukesh agrawal <quiche@chromium.org>
diff --git a/config80211.cc b/config80211.cc
index aaac6e8..57e6e29 100644
--- a/config80211.cc
+++ b/config80211.cc
@@ -6,6 +6,8 @@
 
 #include <ctype.h>
 #include <netlink/msg.h>
+#include <sys/select.h>
+#include <sys/time.h>
 
 #include <map>
 #include <sstream>
@@ -14,12 +16,14 @@
 #include <base/memory/weak_ptr.h>
 #include <base/stl_util.h>
 
+#include "shill/attribute_list.h"
 #include "shill/error.h"
 #include "shill/io_handler.h"
 #include "shill/logging.h"
 #include "shill/netlink_socket.h"
 #include "shill/nl80211_message.h"
 #include "shill/scope_logger.h"
+#include "shill/shill_time.h"
 
 using base::Bind;
 using base::LazyInstance;
@@ -32,32 +36,30 @@
 LazyInstance<Config80211> g_config80211 = LAZY_INSTANCE_INITIALIZER;
 }  // namespace
 
-Config80211::EventTypeStrings *Config80211::event_types_ = NULL;
+const char Config80211::kEventTypeConfig[] = "config";
+const char Config80211::kEventTypeScan[] = "scan";
+const char Config80211::kEventTypeRegulatory[] = "regulatory";
+const char Config80211::kEventTypeMlme[] = "mlme";
+const long Config80211::kMaximumNewFamilyWaitSeconds = 1;
+const long Config80211::kMaximumNewFamilyWaitMicroSeconds = 0;
+
+Config80211::MessageType::MessageType() :
+  family_id(NetlinkMessage::kIllegalMessageType) {}
 
 Config80211::Config80211()
-    : wifi_state_(kWifiDown),
-      dispatcher_(NULL),
+    : dispatcher_(NULL),
       weak_ptr_factory_(this),
       dispatcher_callback_(Bind(&Config80211::OnRawNlMessageReceived,
                                 weak_ptr_factory_.GetWeakPtr())),
-      sock_(NULL) {
-}
-
-Config80211::~Config80211() {
-  // Since Config80211 is a singleton, it should be safe to delete a static
-  // member.
-  delete event_types_;
-  event_types_ = NULL;
-}
+      sock_(NULL) {}
 
 Config80211 *Config80211::GetInstance() {
   return g_config80211.Pointer();
 }
 
 void Config80211::Reset(bool full) {
-  wifi_state_ = kWifiDown;
-  subscribed_events_.clear();
   ClearBroadcastHandlers();
+  message_types_.clear();
   if (full) {
     dispatcher_ = NULL;
     delete sock_;
@@ -65,7 +67,77 @@
   }
 }
 
-bool Config80211::Init(EventDispatcher *dispatcher) {
+void Config80211::OnNewFamilyMessage(const NetlinkMessage &raw_message) {
+  uint16_t family_id;
+  string family_name;
+
+  if (raw_message.message_type() == ErrorAckMessage::kMessageType) {
+    const ErrorAckMessage *error_ack_message =
+        reinterpret_cast<const ErrorAckMessage *>(&raw_message);
+    if (error_ack_message->error()) {
+      LOG(ERROR) << __func__ << ": Message (seq: "
+                 << raw_message.sequence_number() << ") failed: "
+                 << error_ack_message->ToString();
+    } else {
+      SLOG(WiFi, 6) << __func__ << ": Message (seq: "
+                 << raw_message.sequence_number() << ") ACKed";
+    }
+    return;
+  }
+
+  if (raw_message.message_type() != ControlNetlinkMessage::kMessageType) {
+    LOG(ERROR) << "Received unexpected message type: "
+               << raw_message.message_type();
+    return;
+  }
+
+  const ControlNetlinkMessage *message =
+      reinterpret_cast<const ControlNetlinkMessage *>(&raw_message);
+
+  if (!message->const_attributes()->GetU16AttributeValue(CTRL_ATTR_FAMILY_ID,
+                                                         &family_id)) {
+    LOG(ERROR) << __func__ << ": Couldn't get family_id attribute";
+    return;
+  }
+
+  if (!message->const_attributes()->GetStringAttributeValue(
+      CTRL_ATTR_FAMILY_NAME, &family_name)) {
+    LOG(ERROR) << __func__ << ": Couldn't get family_name attribute";
+    return;
+  }
+
+  SLOG(WiFi, 3) << "Socket family '" << family_name << "' has id=" << family_id;
+
+  // Extract the available multicast groups from the message.
+  AttributeListConstRefPtr multicast_groups;
+  if (message->const_attributes()->ConstGetNestedAttributeList(
+      CTRL_ATTR_MCAST_GROUPS, &multicast_groups)) {
+    AttributeListConstRefPtr current_group;
+
+    for (int i = 1;
+         multicast_groups->ConstGetNestedAttributeList(i, &current_group);
+         ++i) {
+      string group_name;
+      uint32_t group_id;
+      if (!current_group->GetStringAttributeValue(CTRL_ATTR_MCAST_GRP_NAME,
+                                                  &group_name)) {
+        LOG(WARNING) << "Expected CTRL_ATTR_MCAST_GRP_NAME, found none";
+        continue;
+      }
+      if (!current_group->GetU32AttributeValue(CTRL_ATTR_MCAST_GRP_ID,
+                                               &group_id)) {
+        LOG(WARNING) << "Expected CTRL_ATTR_MCAST_GRP_ID, found none";
+        continue;
+      }
+      SLOG(WiFi, 3) << "  Adding group '" << group_name << "' = " << group_id;
+      message_types_[family_name].groups[group_name] = group_id;
+    }
+  }
+
+  message_types_[family_name].family_id = family_id;
+}
+
+bool Config80211::Init() {
   if (!sock_) {
     sock_ = new NetlinkSocket;
     if (!sock_) {
@@ -77,17 +149,98 @@
       return false;
     }
   }
+  return true;
+}
 
-  if (!event_types_) {
-    event_types_ = new EventTypeStrings;
-    (*event_types_)[Config80211::kEventTypeConfig] = "config";
-    (*event_types_)[Config80211::kEventTypeScan] = "scan";
-    (*event_types_)[Config80211::kEventTypeRegulatory] = "regulatory";
-    (*event_types_)[Config80211::kEventTypeMlme] = "mlme";
+void Config80211::Start(EventDispatcher *dispatcher) {
+  dispatcher_ = dispatcher;
+  CHECK(dispatcher_);
+  // Install ourselves in the shill mainloop so we receive messages on the
+  // netlink socket.
+  dispatcher_handler_.reset(dispatcher_->CreateInputHandler(
+      file_descriptor(),
+      dispatcher_callback_,
+      Bind(&Config80211::OnReadError, weak_ptr_factory_.GetWeakPtr())));
+}
+
+uint16_t Config80211::GetFamily(string name) {
+  MessageType &message_type = message_types_[name];
+  if (message_type.family_id != NetlinkMessage::kIllegalMessageType) {
+    return message_type.family_id;
+  }
+  if (!sock_) {
+    LOG(FATAL) << "Must call |Init| before this method.";
+    return false;
   }
 
-  dispatcher_ = dispatcher;
-  return true;
+  GetFamilyMessage msg;
+  if (!msg.attributes()->CreateStringAttribute(CTRL_ATTR_FAMILY_NAME,
+                                               "CTRL_ATTR_FAMILY_NAME")) {
+    LOG(ERROR) << "Couldn't create string attribute";
+    return false;
+  }
+  if (!msg.attributes()->SetStringAttributeValue(CTRL_ATTR_FAMILY_NAME, name)) {
+    LOG(ERROR) << "Couldn't set string attribute";
+    return false;
+  }
+  SendMessage(&msg, Bind(&Config80211::OnNewFamilyMessage,
+                         weak_ptr_factory_.GetWeakPtr()));
+
+  // Wait for a response.  The code absolutely needs family_ids for its
+  // message types so we do a synchronous wait.  It's OK to do this because
+  // a) libnl does a synchronous wait (so there's prior art), b) waiting
+  // asynchronously would add significant and unnecessary complexity to the
+  // code that deals with pending messages that could, potentially, be waiting
+  // for a message type, and c) it really doesn't take very long for the
+  // GETFAMILY / NEWFAMILY transaction to transpire (this transaction was timed
+  // over 20 times and found a maximum duration of 11.1 microseconds and an
+  // average of 4.0 microseconds).
+  struct timeval start_time, now, end_time;
+  struct timeval maximum_wait_duration = {kMaximumNewFamilyWaitSeconds,
+                                          kMaximumNewFamilyWaitMicroSeconds};
+  Time *time = Time::GetInstance();
+  time->GetTimeMonotonic(&start_time);
+  now = start_time;
+  timeradd(&start_time, &maximum_wait_duration, &end_time);
+
+  do {
+    // Wait with timeout for a message from the netlink socket.
+    fd_set read_fds;
+    FD_ZERO(&read_fds);
+    FD_SET(file_descriptor(), &read_fds);
+    struct timeval wait_duration;
+    timersub(&end_time, &now, &wait_duration);
+    int result = select(file_descriptor() + 1, &read_fds, NULL, NULL,
+                        &wait_duration);
+    if (result < 0) {
+      PLOG(ERROR) << "Select failed";
+      return NetlinkMessage::kIllegalMessageType;
+    }
+    if (result == 0) {
+      LOG(WARNING) << "Timed out waiting for family_id for family '"
+                   << name << "'.";
+      return NetlinkMessage::kIllegalMessageType;
+    }
+
+    // Read and process any messages.
+    ByteString received;
+    sock_->RecvMessage(&received);
+    InputData input_data(received.GetData(), received.GetLength());
+    OnRawNlMessageReceived(&input_data);
+    if (message_type.family_id != NetlinkMessage::kIllegalMessageType) {
+      time->GetTimeMonotonic(&now);
+      timersub(&now, &start_time, &wait_duration);
+      SLOG(WiFi, 5) << "Found id " << message_type.family_id
+                    << " for name '" << name << "' in "
+                    << wait_duration.tv_sec << " sec, "
+                    << wait_duration.tv_usec << " usec.";
+      return message_type.family_id;
+    }
+    time->GetTimeMonotonic(&now);
+  } while (timercmp(&now, &end_time, <));
+
+  LOG(ERROR) << "Timed out waiting for family_id for family '" << name << "'.";
+  return NetlinkMessage::kIllegalMessageType;
 }
 
 bool Config80211::AddBroadcastHandler(const NetlinkMessageHandler &handler) {
@@ -138,22 +291,19 @@
 
 bool Config80211::SendMessage(NetlinkMessage *message,
                               const NetlinkMessageHandler &handler) {
-  // TODO(wdg): Replace the following with a discovered value in the absolute
-  // next CL!
-  static const uint16_t kNl80211FamilyId = 19;
   if (!message) {
     LOG(ERROR) << "Message is NULL.";
     return false;
   }
 
-  ByteString message_string = message->Encode(this->GetSequenceNumber(),
-                                              kNl80211FamilyId);
+  ByteString message_string = message->Encode(this->GetSequenceNumber());
 
   if (handler.is_null()) {
     SLOG(WiFi, 3) << "Handler for message was null.";
   } else if (ContainsKey(message_handlers_, message->sequence_number())) {
     LOG(ERROR) << "A handler already existed for sequence: "
-               << message->sequence_number() << ". Ignoring new handler.";
+               << message->sequence_number();
+    return false;
   } else {
     message_handlers_[message->sequence_number()] = handler;
   }
@@ -180,82 +330,29 @@
   return true;
 }
 
-// static
-bool Config80211::GetEventTypeString(EventType type, string *value) {
-  if (!value) {
-    LOG(ERROR) << "NULL |value|";
-    return false;
-  }
-  if (!event_types_) {
-    LOG(ERROR) << "NULL |event_types_|";
-    return false;
-  }
-
-  EventTypeStrings::iterator match = (*event_types_).find(type);
-  if (match == (*event_types_).end()) {
-    LOG(ERROR) << "Event type " << type << " not found";
-    return false;
-  }
-  *value = match->second;
-  return true;
-}
-
-void Config80211::SetWifiState(WifiState new_state) {
-  if (wifi_state_ == new_state) {
-    return;
-  }
-
-  if (!sock_) {
-    LOG(ERROR) << "Config80211::Init needs to be called before this";
-    return;
-  }
-
-  if (new_state == kWifiUp) {
-    // Install ourselves in the shill mainloop so we receive messages on the
-    // netlink socket.
-    if (dispatcher_) {
-      dispatcher_handler_.reset(dispatcher_->CreateInputHandler(
-          file_descriptor(),
-          dispatcher_callback_,
-          Bind(&Config80211::OnReadError, weak_ptr_factory_.GetWeakPtr())));
-    }
-
-    // If we're newly-up, subscribe to all the event types that have been
-    // requested.
-    SubscribedEvents::const_iterator i;
-    for (i = subscribed_events_.begin(); i != subscribed_events_.end(); ++i) {
-      ActuallySubscribeToEvents(*i);
-    }
-  }
-  wifi_state_ = new_state;
-}
-
 uint32_t Config80211::GetSequenceNumber() {
   return sock_ ?
       sock_->GetSequenceNumber() : NetlinkMessage::kBroadcastSequenceNumber;
 }
 
-bool Config80211::SubscribeToEvents(EventType type) {
-  bool it_worked = true;
-  if (!ContainsKey(subscribed_events_, type)) {
-    if (wifi_state_ == kWifiUp) {
-      it_worked = ActuallySubscribeToEvents(type);
-    }
-    // |subscribed_events_| is a list of events to which we want to subscribe
-    // when wifi comes up (including when it comes _back_ up after it goes
-    // down sometime in the future).
-    subscribed_events_.insert(type);
-  }
-  return it_worked;
-}
-
-bool Config80211::ActuallySubscribeToEvents(EventType type) {
-  string group_name;
-
-  if (!GetEventTypeString(type, &group_name)) {
+bool Config80211::SubscribeToEvents(const string &family_id,
+                                    const string &group_name) {
+  if (!ContainsKey(message_types_, family_id)) {
+    LOG(ERROR) << "Family '" << family_id << "' doesn't exist";
     return false;
   }
-  return true;
+
+  if (!ContainsKey(message_types_[family_id].groups, group_name)) {
+    LOG(ERROR) << "Group '" << group_name << "' doesn't exist in family '"
+               << family_id << "'";
+    return false;
+  }
+
+  uint32_t group_id = message_types_[family_id].groups[group_name];
+  if (!sock_) {
+    LOG(FATAL) << "Need to call |Init| first.";
+  }
+  return sock_->SubscribeToEvents(group_id);
 }
 
 void Config80211::OnRawNlMessageReceived(InputData *data) {
@@ -303,8 +400,6 @@
                              msg->nlmsg_len);
 
   // Call (then erase) any message-specific handler.
-  // TODO(wdg): Support multi-part messages; don't delete handler until last
-  // part appears.
   if (ContainsKey(message_handlers_, sequence_number)) {
     SLOG(WiFi, 3) << "found message-specific handler";
     if (message_handlers_[sequence_number].is_null()) {
@@ -313,7 +408,23 @@
     } else {
       message_handlers_[sequence_number].Run(*message);
     }
-    message_handlers_.erase(sequence_number);
+
+    if (message->message_type() == ErrorAckMessage::kMessageType) {
+      const ErrorAckMessage *error_ack_message =
+          reinterpret_cast<const ErrorAckMessage *>(message.get());
+      if (error_ack_message->error()) {
+        SLOG(WiFi, 3) << "Removing callback";
+        message_handlers_.erase(sequence_number);
+      } else {
+        SLOG(WiFi, 3) << "ACK message -- not removing callback";
+      }
+    } else if ((message->flags() & NLM_F_MULTI) &&
+        (message->message_type() != NLMSG_DONE)) {
+      SLOG(WiFi, 3) << "Multi-part message -- not removing callback";
+    } else {
+      SLOG(WiFi, 3) << "Removing callback";
+      message_handlers_.erase(sequence_number);
+    }
   } else {
     list<NetlinkMessageHandler>::const_iterator i =
         broadcast_handlers_.begin();