Allow RunScenarios to spawn in-process workers

This allows us to get back to single binary tests where appropriate, which will help in-depth profiling efforts.
I've built this atop my smoke_test changes as they inspired me to get this done.
diff --git a/test/cpp/qps/qps_driver.cc b/test/cpp/qps/qps_driver.cc
index f42d538..e1f1649 100644
--- a/test/cpp/qps/qps_driver.cc
+++ b/test/cpp/qps/qps_driver.cc
@@ -42,6 +42,7 @@
 
 DEFINE_int32(warmup_seconds, 5, "Warmup time (in seconds)");
 DEFINE_int32(benchmark_seconds, 30, "Benchmark time (in seconds)");
+DEFINE_int32(local_workers, 0, "Number of local workers to start");
 
 // Common config
 DEFINE_bool(enable_ssl, false, "Use SSL");
@@ -103,7 +104,8 @@
 
   auto result = RunScenario(client_config, FLAGS_num_clients,
                             server_config, FLAGS_num_servers,
-                            FLAGS_warmup_seconds, FLAGS_benchmark_seconds);
+                            FLAGS_warmup_seconds, FLAGS_benchmark_seconds,
+                            FLAGS_local_workers);
 
   gpr_log(GPR_INFO, "QPS: %.1f",
           result.latencies.Count() /