shill: Add http_proxy class

The http_proxy adds a device/connection based proxy that guarantees
to the caller that its HTTP request will go out a particular device's
connection.  DNS requests occur through a bound socket to this device
and goes to DNS servers configured on this connection.  HTTP requests
will also be bound to this interface.  This facility will be used by
a number of peripheral bits including portal detection, activation and
cashew.

BUG=chromium-os:21664
TEST=New unit test.  New (disabled) functional test, against which I
can run "curl -x" and Chrome with manual proxy settings.

Change-Id: I0d59bf0ae27d3538ef359f786742f5c2f1d6fef9
Reviewed-on: https://gerrit.chromium.org/gerrit/10165
Reviewed-by: Thieu Le <thieule@chromium.org>
Tested-by: Paul Stewart <pstew@chromium.org>
Commit-Ready: Paul Stewart <pstew@chromium.org>
diff --git a/http_proxy.cc b/http_proxy.cc
new file mode 100644
index 0000000..60bc281
--- /dev/null
+++ b/http_proxy.cc
@@ -0,0 +1,652 @@
+// Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "shill/http_proxy.h"
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/if.h>  // Needs definitions from netinet/in.h
+#include <stdio.h>
+#include <time.h>
+
+#include <string>
+#include <vector>
+
+#include <base/logging.h>
+#include <base/string_number_conversions.h>
+#include <base/string_split.h>
+#include <base/string_util.h>
+#include <base/stringprintf.h>
+
+#include "shill/async_connection.h"
+#include "shill/dns_client.h"
+#include "shill/event_dispatcher.h"
+#include "shill/ip_address.h"
+#include "shill/sockets.h"
+
+using base::StringPrintf;
+using std::string;
+using std::vector;
+
+namespace shill {
+
+const int HTTPProxy::kClientHeaderTimeoutSeconds = 1;
+const int HTTPProxy::kConnectTimeoutSeconds = 10;
+const int HTTPProxy::kDNSTimeoutSeconds = 5;
+const int HTTPProxy::kDefaultServerPort = 80;
+const int HTTPProxy::kInputTimeoutSeconds = 30;
+const size_t HTTPProxy::kMaxClientQueue = 10;
+const size_t HTTPProxy::kMaxHeaderCount = 128;
+const size_t HTTPProxy::kMaxHeaderSize = 2048;
+const int HTTPProxy::kTransactionTimeoutSeconds = 600;
+
+const char HTTPProxy::kHTTPURLDelimiters[] = " /#?";
+const char HTTPProxy::kHTTPURLPrefix[] = "http://";
+const char HTTPProxy::kHTTPVersionPrefix[] = " HTTP/1";
+const char HTTPProxy::kInternalErrorMsg[] = "Proxy Failed: Internal Error";
+
+
+HTTPProxy::HTTPProxy(const std::string &interface_name,
+                     const std::vector<std::string> &dns_servers)
+    : state_(kStateIdle),
+      interface_name_(interface_name),
+      dns_servers_(dns_servers),
+      accept_callback_(NewCallback(this, &HTTPProxy::AcceptClient)),
+      connect_completion_callback_(
+          NewCallback(this, &HTTPProxy::OnConnectCompletion)),
+      dns_client_callback_(NewCallback(this, &HTTPProxy::GetDNSResult)),
+      read_client_callback_(NewCallback(this, &HTTPProxy::ReadFromClient)),
+      read_server_callback_(NewCallback(this, &HTTPProxy::ReadFromServer)),
+      write_client_callback_(NewCallback(this, &HTTPProxy::WriteToClient)),
+      write_server_callback_(NewCallback(this, &HTTPProxy::WriteToServer)),
+      task_factory_(this),
+      dispatcher_(NULL),
+      dns_client_(NULL),
+      proxy_port_(-1),
+      proxy_socket_(-1),
+      server_async_connection_(NULL),
+      sockets_(NULL),
+      client_socket_(-1),
+      server_port_(kDefaultServerPort),
+      server_socket_(-1),
+      idle_timeout_(NULL) { }
+
+HTTPProxy::~HTTPProxy() {
+  Stop();
+}
+
+bool HTTPProxy::Start(EventDispatcher *dispatcher,
+                      Sockets *sockets) {
+  VLOG(3) << "In " << __func__;
+
+  if (sockets_) {
+    // We are already running.
+    return true;
+  }
+
+  proxy_socket_ = sockets->Socket(PF_INET, SOCK_STREAM, 0);
+  if (proxy_socket_ < 0) {
+    PLOG(ERROR) << "Failed to open proxy socket";
+    return false;
+  }
+
+  struct sockaddr_in addr;
+  socklen_t addrlen = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+  addr.sin_family = AF_INET;
+  addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  if (sockets->Bind(proxy_socket_,
+                    reinterpret_cast<struct sockaddr *>(&addr),
+                    sizeof(addr)) < 0 ||
+      sockets->GetSockName(proxy_socket_,
+                           reinterpret_cast<struct sockaddr *>(&addr),
+                           &addrlen) < 0 ||
+      sockets->SetNonBlocking(proxy_socket_) < 0 ||
+      sockets->Listen(proxy_socket_, kMaxClientQueue) < 0) {
+    sockets->Close(proxy_socket_);
+    proxy_socket_ = -1;
+    PLOG(ERROR) << "HTTPProxy socket setup failed";
+    return false;
+  }
+
+  accept_handler_.reset(
+      dispatcher->CreateReadyHandler(proxy_socket_, IOHandler::kModeInput,
+                                     accept_callback_.get()));
+  dispatcher_ = dispatcher;
+  dns_client_.reset(new DNSClient(IPAddress::kFamilyIPv4,
+                                  interface_name_,
+                                  dns_servers_,
+                                  kDNSTimeoutSeconds * 1000,
+                                  dispatcher,
+                                  dns_client_callback_.get()));
+  proxy_port_ = ntohs(addr.sin_port);
+  server_async_connection_.reset(
+      new AsyncConnection(interface_name_, dispatcher, sockets,
+                          connect_completion_callback_.get()));
+  sockets_ = sockets;
+  state_ = kStateWaitConnection;
+  return true;
+}
+
+void HTTPProxy::Stop() {
+  VLOG(3) << "In " << __func__;
+
+  if (!sockets_ ) {
+    return;
+  }
+
+  StopClient();
+
+  accept_handler_.reset();
+  dispatcher_ = NULL;
+  dns_client_.reset();
+  proxy_port_ = -1;
+  server_async_connection_.reset();
+  sockets_->Close(proxy_socket_);
+  proxy_socket_ = -1;
+  sockets_ = NULL;
+  state_ = kStateIdle;
+}
+
+// IOReadyHandler callback routine fired when a client connects to the
+// proxy's socket.  We Accept() the client and start reading a request
+// from it.
+void HTTPProxy::AcceptClient(int fd) {
+  VLOG(3) << "In " << __func__;
+
+  int client_fd = sockets_->Accept(fd, NULL, NULL);
+  if (client_fd < 0) {
+    PLOG(ERROR) << "Client accept failed";
+    return;
+  }
+
+  accept_handler_->Stop();
+
+  client_socket_ = client_fd;
+
+  sockets_->SetNonBlocking(client_socket_);
+  read_client_handler_.reset(
+      dispatcher_->CreateInputHandler(client_socket_,
+                                      read_client_callback_.get()));
+  // Overall transaction timeout.
+  dispatcher_->PostDelayedTask(
+      task_factory_.NewRunnableMethod(&HTTPProxy::StopClient),
+      kTransactionTimeoutSeconds * 1000);
+
+  state_ = kStateReadClientHeader;
+  StartIdleTimeout();
+}
+
+bool HTTPProxy::ConnectServer(const IPAddress &address, int port) {
+  state_ = kStateConnectServer;
+  if (!server_async_connection_->Start(address, port)) {
+    SendClientError(500, "Could not create socket to connect to server");
+    return false;
+  }
+  StartIdleTimeout();
+  return true;
+}
+
+// DNSClient callback that fires when the DNS request completes.
+void HTTPProxy::GetDNSResult(bool result) {
+  if (!result) {
+    SendClientError(502, string("Could not resolve hostname: ") +
+                    dns_client_->error());
+    return;
+  }
+  ConnectServer(dns_client_->address(), server_port_);
+}
+
+// IOReadyHandler callback routine which fires when the asynchronous Connect()
+// to the remote server completes (or fails).
+void HTTPProxy::OnConnectCompletion(bool success, int fd) {
+  if (!success) {
+    SendClientError(500, string("Socket connection delayed failure: ") +
+                    server_async_connection_->error());
+    return;
+  }
+  server_socket_ = fd;
+  state_ = kStateTunnelData;
+  StartTransmit();
+}
+
+// Read through the header lines from the client, modifying or adding
+// lines as necessary.  Perform final determination of the hostname/port
+// we should connect to and either start a DNS request or connect to a
+// numeric address.
+bool HTTPProxy::ParseClientRequest() {
+  VLOG(3) << "In " << __func__;
+
+  string host;
+  bool found_via = false;
+  bool found_connection = false;
+  for (vector<string>::iterator it = client_headers_.begin();
+       it != client_headers_.end(); ++it) {
+    if (StartsWithASCII(*it, "Host:", false)) {
+      host = it->substr(5);
+    } else if (StartsWithASCII(*it, "Via:", false)) {
+      found_via = true;
+      (*it).append(StringPrintf(", %s shill-proxy", client_version_.c_str()));
+    } else if (StartsWithASCII(*it, "Connection:", false)) {
+      found_connection = true;
+      (*it).assign("Connection: close");
+    } else if (StartsWithASCII(*it, "Proxy-Connection:", false)) {
+      (*it).assign("Proxy-Connection: close");
+    }
+  }
+
+  if (!found_connection) {
+    client_headers_.push_back("Connection: close");
+  }
+  if (!found_via) {
+    client_headers_.push_back(
+        StringPrintf("Via: %s shill-proxy", client_version_.c_str()));
+  }
+
+  // Assemble the request as it will be sent to the server.
+  client_data_.Clear();
+  for (vector<string>::iterator it = client_headers_.begin();
+       it != client_headers_.end(); ++it) {
+    client_data_.Append(ByteString(*it + "\r\n", false));
+  }
+  client_data_.Append(ByteString(string("\r\n"), false));
+
+  TrimWhitespaceASCII(host, TRIM_ALL, &host);
+  if (host.empty()) {
+    // Revert to using the hostname in the URL if no "Host:" header exists.
+    host = server_hostname_;
+  }
+
+  if (host.empty()) {
+    SendClientError(400, "I don't know what host you want me to connect to");
+    return false;
+  }
+
+  server_port_ = 80;
+  vector<string> host_parts;
+  base::SplitString(host, ':', &host_parts);
+
+  if (host_parts.size() > 2) {
+    SendClientError(400, "Too many colons in hostname");
+    return false;
+  } else if (host_parts.size() == 2) {
+    server_hostname_ = host_parts[0];
+    if (!base::StringToInt(host_parts[1], &server_port_)) {
+      SendClientError(400, "Could not parse port number");
+      return false;
+    }
+  } else {
+    server_hostname_ = host;
+  }
+
+  IPAddress addr(IPAddress::kFamilyIPv4);
+  if (addr.SetAddressFromString(server_hostname_)) {
+    if (!ConnectServer(addr, server_port_)) {
+      return false;
+    }
+  } else {
+    VLOG(3) << "Looking up host: " << server_hostname_;
+    if (!dns_client_->Start(server_hostname_)) {
+      SendClientError(502, "Could not resolve hostname");
+      return false;
+    }
+    state_ = kStateLookupServer;
+  }
+  return true;
+}
+
+// Accept a new line into the client headers.  Returns false if a parse
+// error occurs.
+bool HTTPProxy::ProcessLastHeaderLine() {
+  string *header = &client_headers_.back();
+  TrimString(*header, "\r", header);
+
+  if (header->empty()) {
+    // Empty line terminates client headers.
+    client_headers_.pop_back();
+    if (!ParseClientRequest()) {
+      return false;
+    }
+  }
+
+  // Is this is the first header line?
+  if (client_headers_.size() == 1) {
+    if (!ReadClientHTTPVersion(header) || !ReadClientHostname(header)) {
+      return false;
+    }
+  }
+
+  if (client_headers_.size() >= kMaxHeaderCount) {
+    SendClientError(500, kInternalErrorMsg);
+    return false;
+  }
+
+  return true;
+}
+
+// Split input from client into header lines, and consume parsed lines
+// from InputData.  The passed in |data| is modified to indicate the
+// characters consumed.
+bool HTTPProxy::ReadClientHeaders(InputData *data) {
+  unsigned char *ptr = data->buf;
+  unsigned char *end = ptr + data->len;
+
+  if (client_headers_.empty()) {
+    client_headers_.push_back(string());
+  }
+
+  for (; ptr < end && state_ == kStateReadClientHeader; ++ptr) {
+    if (*ptr == '\n') {
+      if (!ProcessLastHeaderLine()) {
+        return false;
+      }
+
+      // Start a new line.  New chararacters we receive will be appended there.
+      client_headers_.push_back(string());
+      continue;
+    }
+
+    string *header = &client_headers_.back();
+    // Is the first character of the header line a space or tab character?
+    if (header->empty() && (*ptr == ' ' || *ptr == '\t') &&
+        client_headers_.size() > 1) {
+      // Line Continuation: Add this character to the previous header line.
+      // This way, all of the data (including newlines and line continuation
+      // characters) related to a specific header will be contained within
+      // a single element of |client_headers_|, and manipulation of headers
+      // such as appending will be simpler.  This is accomplished by removing
+      // the empty line we started, and instead appending the whitespace
+      // and following characters to the previous line.
+      client_headers_.pop_back();
+      header = &client_headers_.back();
+      header->append("\r\n");
+    }
+
+    if (header->length() >= kMaxHeaderSize) {
+      SendClientError(500, kInternalErrorMsg);
+      return false;
+    }
+    header->push_back(*ptr);
+  }
+
+  // Return the remaining data to the caller -- this could be POST data
+  // or other non-header data sent with the client request.
+  data->buf = ptr;
+  data->len = end - ptr;
+
+  return true;
+}
+
+// Finds the URL in the first line of an HTTP client header, and extracts
+// and removes the hostname (and port) from the URL.  Returns false if a
+// parse error occurs, and true otherwise (whether or not the hostname was
+// found).
+bool HTTPProxy::ReadClientHostname(string *header) {
+  const string http_url_prefix(kHTTPURLPrefix);
+  size_t url_idx = header->find(http_url_prefix);
+  if (url_idx != string::npos) {
+    size_t host_start = url_idx + http_url_prefix.length();
+    size_t host_end =
+      header->find_first_of(kHTTPURLDelimiters, host_start);
+    if (host_end != string::npos) {
+      server_hostname_ = header->substr(host_start,
+                                        host_end - host_start);
+      // Modify the URL passed upstream to remove "http://<hostname>".
+      header->erase(url_idx, host_end - url_idx);
+      if ((*header)[url_idx] != '/') {
+        header->insert(url_idx, "/");
+      }
+    } else {
+      LOG(ERROR) << "Could not find end of hostname in request.  Line was: "
+                 << *header;
+      SendClientError(500, kInternalErrorMsg);
+      return false;
+    }
+  }
+  return true;
+}
+
+// Extract the HTTP version number from the first line of the client headers.
+// Returns true if found.
+bool HTTPProxy::ReadClientHTTPVersion(string *header) {
+  const string http_version_prefix(kHTTPVersionPrefix);
+  size_t http_ver_pos = header->find(http_version_prefix);
+  if (http_ver_pos != string::npos) {
+    client_version_ =
+      header->substr(http_ver_pos + http_version_prefix.length() - 1);
+  } else {
+    SendClientError(501, "Server only accepts HTTP/1.x requests");
+    return false;
+  }
+  return true;
+}
+
+// IOInputHandler callback that fires when data is read from the client.
+// This could be header data, or perhaps POST data that follows the headers.
+void HTTPProxy::ReadFromClient(InputData *data) {
+  VLOG(3) << "In " << __func__ << " length " << data->len;
+
+  if (data->len == 0) {
+    // EOF from client.
+    StopClient();
+    return;
+  }
+
+  if (state_ == kStateReadClientHeader) {
+    if (!ReadClientHeaders(data)) {
+      return;
+    }
+    if (state_ == kStateReadClientHeader) {
+      // Still consuming client headers; restart the input timer.
+      StartIdleTimeout();
+      return;
+    }
+  }
+
+  // Check data->len again since ReadClientHeaders() may have consumed some
+  // part of it.
+  if (data->len != 0) {
+    // The client sent some information after its headers.  Buffer the client
+    // input and temporarily disable input events from the client.
+    client_data_.Append(ByteString(data->buf, data->len));
+    read_client_handler_->Stop();
+    StartTransmit();
+  }
+}
+
+// IOInputHandler callback which fires when data has been read from the
+// server.
+void HTTPProxy::ReadFromServer(InputData *data) {
+  VLOG(3) << "In " << __func__ << " length " << data->len;
+  if (data->len == 0) {
+    // Server closed connection.
+    if (server_data_.IsEmpty()) {
+      StopClient();
+      return;
+    }
+    state_ = kStateFlushResponse;
+  } else {
+    read_server_handler_->Stop();
+  }
+
+  server_data_.Append(ByteString(data->buf, data->len));
+
+  StartTransmit();
+}
+
+// Return an HTTP error message back to the client.
+void HTTPProxy::SendClientError(int code, const string &error) {
+  VLOG(3) << "In " << __func__;
+  LOG(ERROR) << "Sending error " << error;
+
+  string error_msg = StringPrintf("HTTP/1.1 %d ERROR\r\n"
+                                  "Content-Type: text/plain\r\n\r\n"
+                                  "%s", code, error.c_str());
+  server_data_ = ByteString(error_msg, false);
+  state_ = kStateFlushResponse;
+  StartTransmit();
+}
+
+// Start a timeout for "the next event".  This timeout augments the overall
+// transaction timeout to make sure there is some activity occurring at
+// reasonable intervals.
+void HTTPProxy::StartIdleTimeout() {
+  int timeout_seconds = 0;
+  switch (state_) {
+    case kStateReadClientHeader:
+      timeout_seconds = kClientHeaderTimeoutSeconds;
+      break;
+    case kStateConnectServer:
+      timeout_seconds = kConnectTimeoutSeconds;
+      break;
+    case kStateLookupServer:
+      // DNSClient has its own internal timeout, so we need not set one here.
+      timeout_seconds = 0;
+      break;
+    default:
+      timeout_seconds = kInputTimeoutSeconds;
+      break;
+  }
+  if (idle_timeout_) {
+    idle_timeout_->Cancel();
+    idle_timeout_ = NULL;
+  }
+  if (timeout_seconds != 0) {
+    idle_timeout_ = task_factory_.NewRunnableMethod(&HTTPProxy::StopClient);
+    dispatcher_->PostDelayedTask(idle_timeout_, timeout_seconds * 1000);
+  }
+}
+
+// Start the various input handlers.  Listen for new data only if we have
+// completely written the last data we've received to the other end.
+void HTTPProxy::StartReceive() {
+  if (state_ == kStateTunnelData && client_data_.IsEmpty()) {
+    read_client_handler_->Start();
+  }
+  if (server_data_.IsEmpty()) {
+    if (state_ == kStateTunnelData) {
+      if (read_server_handler_.get()) {
+        read_server_handler_->Start();
+      } else {
+        read_server_handler_.reset(
+            dispatcher_->CreateInputHandler(server_socket_,
+                                            read_server_callback_.get()));
+      }
+    } else if (state_ == kStateFlushResponse) {
+      StopClient();
+      return;
+    }
+  }
+  StartIdleTimeout();
+}
+
+// Start the various output-ready handlers for the endpoints we have
+// data waiting for.
+void HTTPProxy::StartTransmit() {
+  if (state_ == kStateTunnelData && !client_data_.IsEmpty()) {
+    if (write_server_handler_.get()) {
+      write_server_handler_->Start();
+    } else {
+      write_server_handler_.reset(
+          dispatcher_->CreateReadyHandler(server_socket_,
+                                          IOHandler::kModeOutput,
+                                          write_server_callback_.get()));
+    }
+  }
+  if ((state_ == kStateFlushResponse || state_ == kStateTunnelData) &&
+      !server_data_.IsEmpty()) {
+    if (write_client_handler_.get()) {
+      write_client_handler_->Start();
+    } else {
+      write_client_handler_.reset(
+          dispatcher_->CreateReadyHandler(client_socket_,
+                                          IOHandler::kModeOutput,
+                                          write_client_callback_.get()));
+    }
+  }
+  StartIdleTimeout();
+}
+
+// End the transaction with the current client, restart the IOHandler
+// which alerts us to new clients connecting.  This function is called
+// during various error conditions and is a callback for all timeouts.
+void HTTPProxy::StopClient() {
+  VLOG(3) << "In " << __func__;
+
+  write_client_handler_.reset();
+  read_client_handler_.reset();
+  if (client_socket_ != -1) {
+    sockets_->Close(client_socket_);
+    client_socket_ = -1;
+  }
+  client_headers_.clear();
+  client_version_.clear();
+  server_port_ = kDefaultServerPort;
+  write_server_handler_.reset();
+  read_server_handler_.reset();
+  if (server_socket_ != -1) {
+    sockets_->Close(server_socket_);
+    server_socket_ = -1;
+  }
+  server_hostname_.clear();
+  client_data_.Clear();
+  server_data_.Clear();
+  dns_client_->Stop();
+  server_async_connection_->Stop();
+  task_factory_.RevokeAll();
+  idle_timeout_ = NULL;
+  accept_handler_->Start();
+  state_ = kStateWaitConnection;
+}
+
+// Output ReadyHandler callback which fires when the client socket is
+// ready for data to be sent to it.
+void HTTPProxy::WriteToClient(int fd) {
+  CHECK_EQ(client_socket_, fd);
+  int ret = sockets_->Send(fd, server_data_.GetConstData(),
+                           server_data_.GetLength(), 0);
+  VLOG(3) << "In " << __func__ << " wrote " << ret << " of " <<
+      server_data_.GetLength();
+  if (ret < 0) {
+    LOG(ERROR) << "Server write failed";
+    StopClient();
+    return;
+  }
+
+  server_data_ = ByteString(server_data_.GetConstData() + ret,
+                            server_data_.GetLength() - ret);
+
+  if (server_data_.IsEmpty()) {
+    write_client_handler_->Stop();
+  }
+
+  StartReceive();
+}
+
+// Output ReadyHandler callback which fires when the server socket is
+// ready for data to be sent to it.
+void HTTPProxy::WriteToServer(int fd) {
+  CHECK_EQ(server_socket_, fd);
+  int ret = sockets_->Send(fd, client_data_.GetConstData(),
+                           client_data_.GetLength(), 0);
+  VLOG(3) << "In " << __func__ << " wrote " << ret << " of " <<
+      client_data_.GetLength();
+
+  if (ret < 0) {
+    LOG(ERROR) << "Client write failed";
+    StopClient();
+    return;
+  }
+
+  client_data_ = ByteString(client_data_.GetConstData() + ret,
+                            client_data_.GetLength() - ret);
+
+  if (client_data_.IsEmpty()) {
+    write_server_handler_->Stop();
+  }
+
+  StartReceive();
+}
+
+}  // namespace shill