Rework QPS client/server

Now setup as a driver and N anonymous workers that may become clients or servers.
Will convert async soon.
diff --git a/test/cpp/qps/client.cc b/test/cpp/qps/client.cc
index 7c9763a..827c1ec 100644
--- a/test/cpp/qps/client.cc
+++ b/test/cpp/qps/client.cc
@@ -53,53 +53,112 @@
 #include <grpc++/server_builder.h>
 #include "test/core/util/grpc_profiler.h"
 #include "test/cpp/util/create_test_channel.h"
+#include "test/cpp/qps/client.h"
 #include "test/cpp/qps/qpstest.pb.h"
+#include "test/cpp/qps/histogram.h"
 #include "test/cpp/qps/timer.h"
 
-DEFINE_int32(driver_port, 0, "Client driver port.");
+namespace grpc {
+namespace testing {
 
-using grpc::ChannelInterface;
-using grpc::CreateTestChannel;
-using grpc::ServerBuilder;
-using grpc::ServerContext;
-using grpc::Status;
-using grpc::testing::ClientArgs;
-using grpc::testing::ClientConfig;
-using grpc::testing::ClientResult;
-using grpc::testing::QpsClient;
-using grpc::testing::SimpleRequest;
-using grpc::testing::SimpleResponse;
-using grpc::testing::StatsRequest;
-using grpc::testing::TestService;
+class SynchronousClient GRPC_FINAL : public Client {
+ public:
+  SynchronousClient(const ClientConfig& config) : timer_(new Timer) {
+    for (int i = 0; i < config.client_channels(); i++) {
+      channels_.push_back(ClientChannelInfo(config.server_targets(i % config.server_targets_size()), config));
+      auto* stub = channels_.back().get_stub();
+      for (int j = 0; j < config.outstanding_rpcs_per_channel(); j++) {
+        threads_.emplace_back(new Thread(stub, config));
+      }
+    }
+  }
 
-// In some distros, gflags is in the namespace google, and in some others,
-// in gflags. This hack is enabling us to find both.
-namespace google { }
-namespace gflags { }
-using namespace google;
-using namespace gflags;
+  ClientStats Mark() {
+    Histogram latencies;
+    std::vector<Histogram> to_merge(threads_.size());
+    for (size_t i = 0; i < threads_.size(); i++) {
+      threads_[i]->BeginSwap(&to_merge[i]);
+    }
+    std::unique_ptr<Timer> timer(new Timer);
+    timer_.swap(timer);
+    for (size_t i = 0; i < threads_.size(); i++) {
+      threads_[i]->EndSwap();
+      latencies.Merge(&to_merge[i]);
+    }
 
-static double now() {
-  gpr_timespec tv = gpr_now();
-  return 1e9 * tv.tv_sec + tv.tv_nsec;
-}
+    auto timer_result = timer->Mark();
 
-static bool got_sigint = false;
+    ClientStats stats;
+    auto* l = stats.mutable_latencies();
+    l->set_l_50(latencies.Percentile(50));
+    l->set_l_90(latencies.Percentile(90));
+    l->set_l_99(latencies.Percentile(99));
+    l->set_l_999(latencies.Percentile(99.9));
+    stats.set_num_rpcs(latencies.Count());
+    stats.set_time_elapsed(timer_result.wall);
+    stats.set_time_system(timer_result.system);
+    stats.set_time_user(timer_result.user);
+    return stats;
+  }
 
-static void sigint_handler(int x) { got_sigint = 1; }
+ private:
+  class Thread {
+   public:
+    Thread(TestService::Stub* stub, const ClientConfig& config) : stub_(stub), config_(config), done_(false), new_(nullptr), impl_([this]() {
+      SimpleRequest request;
+      SimpleResponse response;
+      request.set_response_type(
+          grpc::testing::PayloadType::COMPRESSABLE);
+      request.set_response_size(config_.payload_size());
+      for (;;) {
+        {
+          std::lock_guard<std::mutex> g(mu_);
+          if (done_) return;
+          if (new_) {
+            new_->Swap(&histogram_);
+            new_ = nullptr;
+            cv_.notify_one();
+          }
+        }
+        double start = Timer::Now();
+        grpc::ClientContext context;
+        grpc::Status s =
+            stub_->UnaryCall(&context, request, &response);
+        histogram_.Add((Timer::Now() - start) * 1e9);
+      }
+    }) {}
 
-ClientResult RunTest(const ClientArgs& args) {
-  const auto& config = args.config();
+    ~Thread() {
+      {
+        std::lock_guard<std::mutex> g(mu_);
+        done_ = true;
+      }
+      impl_.join();
+    }
 
-  gpr_log(GPR_INFO,
-          "QPS test with parameters\n"
-          "enable_ssl = %d\n"
-          "client_channels = %d\n"
-          "client_threads = %d\n"
-          "num_rpcs = %d\n"
-          "payload_size = %d\n",
-          config.enable_ssl(), config.client_channels(), config.client_threads(), config.num_rpcs(),
-          config.payload_size());
+    void BeginSwap(Histogram* n) {
+      std::lock_guard<std::mutex> g(mu_);
+      new_ = n;
+    }
+
+    void EndSwap() {
+      std::unique_lock<std::mutex> g(mu_);
+      cv_.wait(g, [this]() { return new_ == nullptr; });
+    }
+
+   private:
+    Thread(const Thread&);
+    Thread& operator=(const Thread&);
+
+    TestService::Stub* stub_;
+    ClientConfig config_;
+    std::mutex mu_;
+    std::condition_variable cv_;
+    bool done_;
+    Histogram *new_;
+    Histogram histogram_;
+    std::thread impl_;
+  };
 
   class ClientChannelInfo {
    public:
@@ -113,133 +172,14 @@
     std::shared_ptr<ChannelInterface> channel_;
     std::unique_ptr<TestService::Stub> stub_;
   };
-
-  std::vector<ClientChannelInfo> channels;
-  for (int i = 0; i < config.client_channels(); i++) {
-    channels.push_back(ClientChannelInfo(args.server_targets(i % args.server_targets_size()), config));
-  }
-
-  std::vector<std::thread> threads;  // Will add threads when ready to execute
-  std::vector< ::gpr_histogram *> thread_stats(config.client_threads());
-
-  grpc::ClientContext context_stats_begin;
-
-  grpc_profiler_start("qps_client.prof");
-
-  Timer timer;
-
-  for (int i = 0; i < config.client_threads(); i++) {
-    gpr_histogram *hist = gpr_histogram_create(0.01, 60e9);
-    GPR_ASSERT(hist != NULL);
-    thread_stats[i] = hist;
-
-    threads.push_back(
-        std::thread([hist, config, &channels](int channel_num) {
-                      SimpleRequest request;
-                      SimpleResponse response;
-                      request.set_response_type(
-                          grpc::testing::PayloadType::COMPRESSABLE);
-                      request.set_response_size(config.payload_size());
-
-                      for (int j = 0; j < config.num_rpcs(); j++) {
-                        TestService::Stub *stub =
-                            channels[channel_num].get_stub();
-                        double start = now();
-                        grpc::ClientContext context;
-                        grpc::Status s =
-                            stub->UnaryCall(&context, request, &response);
-                        gpr_histogram_add(hist, now() - start);
-
-                        GPR_ASSERT((s.code() == grpc::StatusCode::OK) &&
-                                   (response.payload().type() ==
-                                    grpc::testing::PayloadType::COMPRESSABLE) &&
-                                   (response.payload().body().length() ==
-                                    static_cast<size_t>(config.payload_size())));
-
-                        // Now do runtime round-robin assignment of the next
-                        // channel number
-                        channel_num += config.client_threads();
-                        channel_num %= config.client_channels();
-                      }
-                    },
-                    i % config.client_channels()));
-  }
-
-  for (auto &t : threads) {
-    t.join();
-  }
-
-  auto timer_result = timer.Mark();
-
-  grpc_profiler_stop();
-
-  gpr_histogram *hist = gpr_histogram_create(0.01, 60e9);
-  GPR_ASSERT(hist != NULL);
-
-  for (int i = 0; i < config.client_threads(); i++) {
-    gpr_histogram *h = thread_stats[i];
-    gpr_log(GPR_INFO, "latency at thread %d (50/90/95/99/99.9): %f/%f/%f/%f/%f",
-            i, gpr_histogram_percentile(h, 50), gpr_histogram_percentile(h, 90),
-            gpr_histogram_percentile(h, 95), gpr_histogram_percentile(h, 99),
-            gpr_histogram_percentile(h, 99.9));
-    gpr_histogram_merge(hist, h);
-    gpr_histogram_destroy(h);
-  }
-
-  ClientResult result;
-  auto* latencies = result.mutable_latencies();
-  latencies->set_l_50(gpr_histogram_percentile(hist, 50));
-  latencies->set_l_90(gpr_histogram_percentile(hist, 90));
-  latencies->set_l_99(gpr_histogram_percentile(hist, 99));
-  latencies->set_l_999(gpr_histogram_percentile(hist, 99.9));
-  result.set_num_rpcs(config.client_threads() * config.num_rpcs());
-  result.set_time_elapsed(timer_result.wall);
-  result.set_time_system(timer_result.system);
-  result.set_time_user(timer_result.user);
-
-  gpr_histogram_destroy(hist);
-
-  return result;
-}
-
-class ClientImpl final : public QpsClient::Service {
- public:
-  Status RunTest(ServerContext* ctx, const ClientArgs* args, ClientResult* result) override {
-    *result = ::RunTest(*args);
-    return Status::OK;
-  }
-
- private:
-  std::mutex client_mu_;
+  std::vector<ClientChannelInfo> channels_;
+  std::vector<std::unique_ptr<Thread>> threads_;
+  std::unique_ptr<Timer> timer_;
 };
 
-static void RunServer() {
-  char* server_address = NULL;
-  gpr_join_host_port(&server_address, "::", FLAGS_driver_port);
-
-  ClientImpl service;
-
-  ServerBuilder builder;
-  builder.AddPort(server_address);
-  builder.RegisterService(&service);
-
-  gpr_free(server_address);
-
-  auto server = builder.BuildAndStart();
-
-  while (!got_sigint) {
-    std::this_thread::sleep_for(std::chrono::seconds(5));
-  }
+std::unique_ptr<Client> CreateSynchronousClient(const ClientConfig& config) {
+  return std::unique_ptr<Client>(new SynchronousClient(config));
 }
 
-int main(int argc, char **argv) {
-  signal(SIGINT, sigint_handler);
-
-  grpc_init();
-  ParseCommandLineFlags(&argc, &argv, true);
-
-  RunServer();
-
-  grpc_shutdown();
-  return 0;
-}
+}  // namespace testing
+}  // namespace grpc