Merge pull request #16081 from sreecha/rq-threads

Restrict the number of threads in C++ sync server
diff --git a/grpc.def b/grpc.def
index 5b98792..312e916 100644
--- a/grpc.def
+++ b/grpc.def
@@ -68,6 +68,7 @@
     grpc_resource_quota_ref
     grpc_resource_quota_unref
     grpc_resource_quota_resize
+    grpc_resource_quota_set_max_threads
     grpc_resource_quota_arg_vtable
     grpc_channelz_get_top_channels
     grpc_channelz_get_channel
diff --git a/include/grpc/grpc.h b/include/grpc/grpc.h
index f0eb2c0..eb02514 100644
--- a/include/grpc/grpc.h
+++ b/include/grpc/grpc.h
@@ -450,6 +450,10 @@
 GRPCAPI void grpc_resource_quota_resize(grpc_resource_quota* resource_quota,
                                         size_t new_size);
 
+/** Update the size of the maximum number of threads allowed */
+GRPCAPI void grpc_resource_quota_set_max_threads(
+    grpc_resource_quota* resource_quota, int new_max_threads);
+
 /** Fetch a vtable for a grpc_channel_arg that points to a grpc_resource_quota
  */
 GRPCAPI const grpc_arg_pointer_vtable* grpc_resource_quota_arg_vtable(void);
diff --git a/include/grpcpp/resource_quota.h b/include/grpcpp/resource_quota.h
index 554437a..50bd1cb 100644
--- a/include/grpcpp/resource_quota.h
+++ b/include/grpcpp/resource_quota.h
@@ -26,10 +26,10 @@
 
 namespace grpc {
 
-/// ResourceQuota represents a bound on memory usage by the gRPC library.
-/// A ResourceQuota can be attached to a server (via \a ServerBuilder),
+/// ResourceQuota represents a bound on memory and thread usage by the gRPC
+/// library. A ResourceQuota can be attached to a server (via \a ServerBuilder),
 /// or a client channel (via \a ChannelArguments).
-/// gRPC will attempt to keep memory used by all attached entities
+/// gRPC will attempt to keep memory and threads used by all attached entities
 /// below the ResourceQuota bound.
 class ResourceQuota final : private GrpcLibraryCodegen {
  public:
@@ -44,6 +44,16 @@
   /// No time bound is given for this to occur however.
   ResourceQuota& Resize(size_t new_size);
 
+  /// Set the max number of threads that can be allocated from this
+  /// ResourceQuota object.
+  ///
+  /// If the new_max_threads value is smaller than the current value, no new
+  /// threads are allocated until the number of active threads fall below
+  /// new_max_threads. There is no time bound on when this may happen i.e none
+  /// of the current threads are forcefully destroyed and all threads run their
+  /// normal course.
+  ResourceQuota& SetMaxThreads(int new_max_threads);
+
   grpc_resource_quota* c_resource_quota() const { return impl_; }
 
  private:
diff --git a/include/grpcpp/server.h b/include/grpcpp/server.h
index 81c3907..189cf8a 100644
--- a/include/grpcpp/server.h
+++ b/include/grpcpp/server.h
@@ -144,7 +144,8 @@
   Server(int max_message_size, ChannelArguments* args,
          std::shared_ptr<std::vector<std::unique_ptr<ServerCompletionQueue>>>
              sync_server_cqs,
-         int min_pollers, int max_pollers, int sync_cq_timeout_msec);
+         grpc_resource_quota* server_rq, int min_pollers, int max_pollers,
+         int sync_cq_timeout_msec);
 
   /// Start the server.
   ///
diff --git a/src/core/lib/iomgr/resource_quota.cc b/src/core/lib/iomgr/resource_quota.cc
index 539bc12..b6fc757 100644
--- a/src/core/lib/iomgr/resource_quota.cc
+++ b/src/core/lib/iomgr/resource_quota.cc
@@ -96,6 +96,9 @@
      list, false otherwise */
   bool added_to_free_pool;
 
+  /* The number of threads currently allocated to this resource user */
+  gpr_atm num_threads_allocated;
+
   /* Reclaimers: index 0 is the benign reclaimer, 1 is the destructive reclaimer
    */
   grpc_closure* reclaimers[2];
@@ -135,12 +138,33 @@
 
   gpr_atm last_size;
 
+  /* Mutex to protect max_threads and num_threads_allocated */
+  /* Note: We could have used gpr_atm for max_threads and num_threads_allocated
+   * and avoid having this mutex; but in that case, each invocation of the
+   * function grpc_resource_user_allocate_threads() would have had to do at
+   * least two atomic loads (for max_threads and num_threads_allocated) followed
+   * by a CAS (on num_threads_allocated).
+   * Moreover, we expect grpc_resource_user_allocate_threads() to be often
+   * called concurrently thereby increasing the chances of failing the CAS
+   * operation. This additional complexity is not worth the tiny perf gain we
+   * may (or may not) have by using atomics */
+  gpr_mu thread_count_mu;
+
+  /* Max number of threads allowed */
+  int max_threads;
+
+  /* Number of threads currently allocated via this resource_quota object */
+  int num_threads_allocated;
+
   /* Has rq_step been scheduled to occur? */
   bool step_scheduled;
+
   /* Are we currently reclaiming memory */
   bool reclaiming;
+
   /* Closure around rq_step */
   grpc_closure rq_step_closure;
+
   /* Closure around rq_reclamation_done */
   grpc_closure rq_reclamation_done_closure;
 
@@ -524,6 +548,11 @@
 static void ru_destroy(void* ru, grpc_error* error) {
   grpc_resource_user* resource_user = static_cast<grpc_resource_user*>(ru);
   GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0);
+  // Free all the remaining thread quota
+  grpc_resource_user_free_threads(resource_user,
+                                  static_cast<int>(gpr_atm_no_barrier_load(
+                                      &resource_user->num_threads_allocated)));
+
   for (int i = 0; i < GRPC_RULIST_COUNT; i++) {
     rulist_remove(resource_user, static_cast<grpc_rulist>(i));
   }
@@ -594,6 +623,9 @@
   resource_quota->free_pool = INT64_MAX;
   resource_quota->size = INT64_MAX;
   gpr_atm_no_barrier_store(&resource_quota->last_size, GPR_ATM_MAX);
+  gpr_mu_init(&resource_quota->thread_count_mu);
+  resource_quota->max_threads = INT_MAX;
+  resource_quota->num_threads_allocated = 0;
   resource_quota->step_scheduled = false;
   resource_quota->reclaiming = false;
   gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, 0);
@@ -616,6 +648,8 @@
 
 void grpc_resource_quota_unref_internal(grpc_resource_quota* resource_quota) {
   if (gpr_unref(&resource_quota->refs)) {
+    // No outstanding thread quota
+    GPR_ASSERT(resource_quota->num_threads_allocated == 0);
     GRPC_COMBINER_UNREF(resource_quota->combiner, "resource_quota");
     gpr_free(resource_quota->name);
     gpr_free(resource_quota);
@@ -647,6 +681,15 @@
 }
 
 /* Public API */
+void grpc_resource_quota_set_max_threads(grpc_resource_quota* resource_quota,
+                                         int new_max_threads) {
+  GPR_ASSERT(new_max_threads >= 0);
+  gpr_mu_lock(&resource_quota->thread_count_mu);
+  resource_quota->max_threads = new_max_threads;
+  gpr_mu_unlock(&resource_quota->thread_count_mu);
+}
+
+/* Public API */
 void grpc_resource_quota_resize(grpc_resource_quota* resource_quota,
                                 size_t size) {
   grpc_core::ExecCtx exec_ctx;
@@ -731,6 +774,7 @@
   grpc_closure_list_init(&resource_user->on_allocated);
   resource_user->allocating = false;
   resource_user->added_to_free_pool = false;
+  gpr_atm_no_barrier_store(&resource_user->num_threads_allocated, 0);
   resource_user->reclaimers[0] = nullptr;
   resource_user->reclaimers[1] = nullptr;
   resource_user->new_reclaimers[0] = nullptr;
@@ -785,6 +829,40 @@
   }
 }
 
+bool grpc_resource_user_allocate_threads(grpc_resource_user* resource_user,
+                                         int thread_count) {
+  GPR_ASSERT(thread_count >= 0);
+  bool is_success = false;
+  gpr_mu_lock(&resource_user->resource_quota->thread_count_mu);
+  grpc_resource_quota* rq = resource_user->resource_quota;
+  if (rq->num_threads_allocated + thread_count <= rq->max_threads) {
+    rq->num_threads_allocated += thread_count;
+    gpr_atm_no_barrier_fetch_add(&resource_user->num_threads_allocated,
+                                 thread_count);
+    is_success = true;
+  }
+  gpr_mu_unlock(&resource_user->resource_quota->thread_count_mu);
+  return is_success;
+}
+
+void grpc_resource_user_free_threads(grpc_resource_user* resource_user,
+                                     int thread_count) {
+  GPR_ASSERT(thread_count >= 0);
+  gpr_mu_lock(&resource_user->resource_quota->thread_count_mu);
+  grpc_resource_quota* rq = resource_user->resource_quota;
+  rq->num_threads_allocated -= thread_count;
+  int old_count = static_cast<int>(gpr_atm_no_barrier_fetch_add(
+      &resource_user->num_threads_allocated, -thread_count));
+  if (old_count < thread_count || rq->num_threads_allocated < 0) {
+    gpr_log(GPR_ERROR,
+            "Releasing more threads (%d) than currently allocated (rq threads: "
+            "%d, ru threads: %d)",
+            thread_count, rq->num_threads_allocated + thread_count, old_count);
+    abort();
+  }
+  gpr_mu_unlock(&resource_user->resource_quota->thread_count_mu);
+}
+
 void grpc_resource_user_alloc(grpc_resource_user* resource_user, size_t size,
                               grpc_closure* optional_on_done) {
   gpr_mu_lock(&resource_user->mu);
diff --git a/src/core/lib/iomgr/resource_quota.h b/src/core/lib/iomgr/resource_quota.h
index 937daf8..1d5e95e 100644
--- a/src/core/lib/iomgr/resource_quota.h
+++ b/src/core/lib/iomgr/resource_quota.h
@@ -93,6 +93,22 @@
 void grpc_resource_user_unref(grpc_resource_user* resource_user);
 void grpc_resource_user_shutdown(grpc_resource_user* resource_user);
 
+/* Attempts to get quota (from the resource_user) to create 'thd_count' number
+ * of threads. Returns true if successful (i.e the caller is now free to create
+ * 'thd_count' number of threads) or false if quota is not available */
+bool grpc_resource_user_allocate_threads(grpc_resource_user* resource_user,
+                                         int thd_count);
+/* Releases 'thd_count' worth of quota back to the resource user. The quota
+ * should have been previously obtained successfully by calling
+ * grpc_resource_user_allocate_threads().
+ *
+ * Note: There need not be an exact one-to-one correspondence between
+ * grpc_resource_user_allocate_threads() and grpc_resource_user_free_threads()
+ * calls. The only requirement is that the number of threads allocated should
+ * all be eventually released */
+void grpc_resource_user_free_threads(grpc_resource_user* resource_user,
+                                     int thd_count);
+
 /* Allocate from the resource user (and its quota).
    If optional_on_done is NULL, then allocate immediately. This may push the
    quota over-limit, at which point reclamation will kick in.
diff --git a/src/cpp/common/resource_quota_cc.cc b/src/cpp/common/resource_quota_cc.cc
index daeb0ba..276e5f7 100644
--- a/src/cpp/common/resource_quota_cc.cc
+++ b/src/cpp/common/resource_quota_cc.cc
@@ -33,4 +33,8 @@
   return *this;
 }
 
+ResourceQuota& ResourceQuota::SetMaxThreads(int new_max_threads) {
+  grpc_resource_quota_set_max_threads(impl_, new_max_threads);
+  return *this;
+}
 }  // namespace grpc
diff --git a/src/cpp/server/server_builder.cc b/src/cpp/server/server_builder.cc
index e0b9b7a..0ab3cd0 100644
--- a/src/cpp/server/server_builder.cc
+++ b/src/cpp/server/server_builder.cc
@@ -261,7 +261,7 @@
   }
 
   std::unique_ptr<Server> server(new Server(
-      max_receive_message_size_, &args, sync_server_cqs,
+      max_receive_message_size_, &args, sync_server_cqs, resource_quota_,
       sync_server_settings_.min_pollers, sync_server_settings_.max_pollers,
       sync_server_settings_.cq_timeout_msec));
 
diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc
index 0d77510..472c503 100644
--- a/src/cpp/server/server_cc.cc
+++ b/src/cpp/server/server_cc.cc
@@ -47,6 +47,12 @@
 namespace grpc {
 namespace {
 
+// The default value for maximum number of threads that can be created in the
+// sync server. This value of 500 is empirically chosen. To increase the max
+// number of threads in a sync server, pass a custom ResourceQuota object (with
+// the desired number of max-threads set) to the server builder
+#define DEFAULT_MAX_SYNC_SERVER_THREADS 500
+
 class DefaultGlobalCallbacks final : public Server::GlobalCallbacks {
  public:
   ~DefaultGlobalCallbacks() override {}
@@ -266,9 +272,9 @@
  public:
   SyncRequestThreadManager(Server* server, CompletionQueue* server_cq,
                            std::shared_ptr<GlobalCallbacks> global_callbacks,
-                           int min_pollers, int max_pollers,
-                           int cq_timeout_msec)
-      : ThreadManager(min_pollers, max_pollers),
+                           grpc_resource_quota* rq, int min_pollers,
+                           int max_pollers, int cq_timeout_msec)
+      : ThreadManager("SyncServer", rq, min_pollers, max_pollers),
         server_(server),
         server_cq_(server_cq),
         cq_timeout_msec_(cq_timeout_msec),
@@ -376,7 +382,8 @@
     int max_receive_message_size, ChannelArguments* args,
     std::shared_ptr<std::vector<std::unique_ptr<ServerCompletionQueue>>>
         sync_server_cqs,
-    int min_pollers, int max_pollers, int sync_cq_timeout_msec)
+    grpc_resource_quota* server_rq, int min_pollers, int max_pollers,
+    int sync_cq_timeout_msec)
     : max_receive_message_size_(max_receive_message_size),
       sync_server_cqs_(std::move(sync_server_cqs)),
       started_(false),
@@ -392,10 +399,22 @@
   global_callbacks_->UpdateArguments(args);
 
   if (sync_server_cqs_ != nullptr) {
+    bool default_rq_created = false;
+    if (server_rq == nullptr) {
+      server_rq = grpc_resource_quota_create("SyncServer-default-rq");
+      grpc_resource_quota_set_max_threads(server_rq,
+                                          DEFAULT_MAX_SYNC_SERVER_THREADS);
+      default_rq_created = true;
+    }
+
     for (const auto& it : *sync_server_cqs_) {
       sync_req_mgrs_.emplace_back(new SyncRequestThreadManager(
-          this, it.get(), global_callbacks_, min_pollers, max_pollers,
-          sync_cq_timeout_msec));
+          this, it.get(), global_callbacks_, server_rq, min_pollers,
+          max_pollers, sync_cq_timeout_msec));
+    }
+
+    if (default_rq_created) {
+      grpc_resource_quota_unref(server_rq);
     }
   }
 
diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc
index 02ac56a..fa9eec5 100644
--- a/src/cpp/thread_manager/thread_manager.cc
+++ b/src/cpp/thread_manager/thread_manager.cc
@@ -22,8 +22,8 @@
 #include <mutex>
 
 #include <grpc/support/log.h>
-
 #include "src/core/lib/gprpp/thd.h"
+#include "src/core/lib/iomgr/exec_ctx.h"
 
 namespace grpc {
 
@@ -48,12 +48,17 @@
   thd_.Join();
 }
 
-ThreadManager::ThreadManager(int min_pollers, int max_pollers)
+ThreadManager::ThreadManager(const char* name,
+                             grpc_resource_quota* resource_quota,
+                             int min_pollers, int max_pollers)
     : shutdown_(false),
       num_pollers_(0),
       min_pollers_(min_pollers),
       max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
-      num_threads_(0) {}
+      num_threads_(0),
+      max_active_threads_sofar_(0) {
+  resource_user_ = grpc_resource_user_create(resource_quota, name);
+}
 
 ThreadManager::~ThreadManager() {
   {
@@ -61,6 +66,8 @@
     GPR_ASSERT(num_threads_ == 0);
   }
 
+  grpc_core::ExecCtx exec_ctx;  // grpc_resource_user_unref needs an exec_ctx
+  grpc_resource_user_unref(resource_user_);
   CleanupCompletedThreads();
 }
 
@@ -81,17 +88,27 @@
   return shutdown_;
 }
 
+int ThreadManager::GetMaxActiveThreadsSoFar() {
+  std::lock_guard<std::mutex> list_lock(list_mu_);
+  return max_active_threads_sofar_;
+}
+
 void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
   {
     std::lock_guard<std::mutex> list_lock(list_mu_);
     completed_threads_.push_back(thd);
   }
 
-  std::lock_guard<std::mutex> lock(mu_);
-  num_threads_--;
-  if (num_threads_ == 0) {
-    shutdown_cv_.notify_one();
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+    num_threads_--;
+    if (num_threads_ == 0) {
+      shutdown_cv_.notify_one();
+    }
   }
+
+  // Give a thread back to the resource quota
+  grpc_resource_user_free_threads(resource_user_, 1);
 }
 
 void ThreadManager::CleanupCompletedThreads() {
@@ -106,14 +123,22 @@
 }
 
 void ThreadManager::Initialize() {
+  if (!grpc_resource_user_allocate_threads(resource_user_, min_pollers_)) {
+    gpr_log(GPR_ERROR,
+            "No thread quota available to even create the minimum required "
+            "polling threads (i.e %d). Unable to start the thread manager",
+            min_pollers_);
+    abort();
+  }
+
   {
     std::unique_lock<std::mutex> lock(mu_);
     num_pollers_ = min_pollers_;
     num_threads_ = min_pollers_;
+    max_active_threads_sofar_ = min_pollers_;
   }
 
   for (int i = 0; i < min_pollers_; i++) {
-    // Create a new thread (which ends up calling the MainWorkLoop() function
     new WorkerThread(this);
   }
 }
@@ -139,11 +164,15 @@
         done = true;
         break;
       case WORK_FOUND:
-        // If we got work and there are now insufficient pollers, start a new
-        // one
-        if (!shutdown_ && num_pollers_ < min_pollers_) {
+        // If we got work and there are now insufficient pollers and there is
+        // quota available to create a new thread, start a new poller thread
+        if (!shutdown_ && num_pollers_ < min_pollers_ &&
+            grpc_resource_user_allocate_threads(resource_user_, 1)) {
           num_pollers_++;
           num_threads_++;
+          if (num_threads_ > max_active_threads_sofar_) {
+            max_active_threads_sofar_ = num_threads_;
+          }
           // Drop lock before spawning thread to avoid contention
           lock.unlock();
           new WorkerThread(this);
@@ -196,6 +225,8 @@
     }
   };
 
+  // This thread is exiting. Do some cleanup work i.e delete already completed
+  // worker threads
   CleanupCompletedThreads();
 
   // If we are here, either ThreadManager is shutting down or it already has
diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h
index 5a40f2d..01043ed 100644
--- a/src/cpp/thread_manager/thread_manager.h
+++ b/src/cpp/thread_manager/thread_manager.h
@@ -27,12 +27,14 @@
 #include <grpcpp/support/config.h>
 
 #include "src/core/lib/gprpp/thd.h"
+#include "src/core/lib/iomgr/resource_quota.h"
 
 namespace grpc {
 
 class ThreadManager {
  public:
-  explicit ThreadManager(int min_pollers, int max_pollers);
+  explicit ThreadManager(const char* name, grpc_resource_quota* resource_quota,
+                         int min_pollers, int max_pollers);
   virtual ~ThreadManager();
 
   // Initializes and Starts the Rpc Manager threads
@@ -84,6 +86,11 @@
   // all the threads have drained all the outstanding work
   virtual void Wait();
 
+  // Max number of concurrent threads that were ever active in this thread
+  // manager so far. This is useful for debugging purposes (and in unit tests)
+  // to check if resource_quota is properly being enforced.
+  int GetMaxActiveThreadsSoFar();
+
  private:
   // Helper wrapper class around grpc_core::Thread. Takes a ThreadManager object
   // and starts a new grpc_core::Thread to calls the Run() function.
@@ -91,6 +98,24 @@
   // The Run() function calls ThreadManager::MainWorkLoop() function and once
   // that completes, it marks the WorkerThread completed by calling
   // ThreadManager::MarkAsCompleted()
+  //
+  // WHY IS THIS NEEDED?:
+  // When a thread terminates, some other thread *must* call Join() on that
+  // thread so that the resources are released. Having a WorkerThread wrapper
+  // will make this easier. Once Run() completes, each thread calls the
+  // following two functions:
+  //    ThreadManager::CleanupCompletedThreads()
+  //    ThreadManager::MarkAsCompleted()
+  //
+  //  - MarkAsCompleted() puts the WorkerThread object in the ThreadManger's
+  //    completed_threads_ list
+  //  - CleanupCompletedThreads() calls "Join()" on the threads that are already
+  //    in the completed_threads_ list  (since a thread cannot call Join() on
+  //    itself, it calls CleanupCompletedThreads() *before* calling
+  //    MarkAsCompleted())
+  //
+  // TODO(sreek): Consider creating the threads 'detached' so that Join() need
+  // not be called (and the need for this WorkerThread class is eliminated)
   class WorkerThread {
    public:
     WorkerThread(ThreadManager* thd_mgr);
@@ -111,13 +136,21 @@
   void MarkAsCompleted(WorkerThread* thd);
   void CleanupCompletedThreads();
 
-  // Protects shutdown_, num_pollers_ and num_threads_
-  // TODO: sreek - Change num_pollers and num_threads_ to atomics
+  // Protects shutdown_, num_pollers_, num_threads_ and
+  // max_active_threads_sofar_
   std::mutex mu_;
 
   bool shutdown_;
   std::condition_variable shutdown_cv_;
 
+  // The resource user object to use when requesting quota to create threads
+  //
+  // Note: The user of this ThreadManager object must create grpc_resource_quota
+  // object (that contains the actual max thread quota) and a grpc_resource_user
+  // object through which quota is requested whenver new threads need to be
+  // created
+  grpc_resource_user* resource_user_;
+
   // Number of threads doing polling
   int num_pollers_;
 
@@ -125,10 +158,15 @@
   int min_pollers_;
   int max_pollers_;
 
-  // The total number of threads (includes threads includes the threads that are
-  // currently polling i.e num_pollers_)
+  // The total number of threads currently active (includes threads includes the
+  // threads that are currently polling i.e num_pollers_)
   int num_threads_;
 
+  // See GetMaxActiveThreadsSoFar()'s description.
+  // To be more specific, this variable tracks the max value num_threads_ was
+  // ever set so far
+  int max_active_threads_sofar_;
+
   std::mutex list_mu_;
   std::list<WorkerThread*> completed_threads_;
 };
diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.c b/src/ruby/ext/grpc/rb_grpc_imports.generated.c
index 2443532..78090af 100644
--- a/src/ruby/ext/grpc/rb_grpc_imports.generated.c
+++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.c
@@ -91,6 +91,7 @@
 grpc_resource_quota_ref_type grpc_resource_quota_ref_import;
 grpc_resource_quota_unref_type grpc_resource_quota_unref_import;
 grpc_resource_quota_resize_type grpc_resource_quota_resize_import;
+grpc_resource_quota_set_max_threads_type grpc_resource_quota_set_max_threads_import;
 grpc_resource_quota_arg_vtable_type grpc_resource_quota_arg_vtable_import;
 grpc_channelz_get_top_channels_type grpc_channelz_get_top_channels_import;
 grpc_channelz_get_channel_type grpc_channelz_get_channel_import;
@@ -341,6 +342,7 @@
   grpc_resource_quota_ref_import = (grpc_resource_quota_ref_type) GetProcAddress(library, "grpc_resource_quota_ref");
   grpc_resource_quota_unref_import = (grpc_resource_quota_unref_type) GetProcAddress(library, "grpc_resource_quota_unref");
   grpc_resource_quota_resize_import = (grpc_resource_quota_resize_type) GetProcAddress(library, "grpc_resource_quota_resize");
+  grpc_resource_quota_set_max_threads_import = (grpc_resource_quota_set_max_threads_type) GetProcAddress(library, "grpc_resource_quota_set_max_threads");
   grpc_resource_quota_arg_vtable_import = (grpc_resource_quota_arg_vtable_type) GetProcAddress(library, "grpc_resource_quota_arg_vtable");
   grpc_channelz_get_top_channels_import = (grpc_channelz_get_top_channels_type) GetProcAddress(library, "grpc_channelz_get_top_channels");
   grpc_channelz_get_channel_import = (grpc_channelz_get_channel_type) GetProcAddress(library, "grpc_channelz_get_channel");
diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.h b/src/ruby/ext/grpc/rb_grpc_imports.generated.h
index b08a1f9..1807efa 100644
--- a/src/ruby/ext/grpc/rb_grpc_imports.generated.h
+++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.h
@@ -248,6 +248,9 @@
 typedef void(*grpc_resource_quota_resize_type)(grpc_resource_quota* resource_quota, size_t new_size);
 extern grpc_resource_quota_resize_type grpc_resource_quota_resize_import;
 #define grpc_resource_quota_resize grpc_resource_quota_resize_import
+typedef void(*grpc_resource_quota_set_max_threads_type)(grpc_resource_quota* resource_quota, int new_max_threads);
+extern grpc_resource_quota_set_max_threads_type grpc_resource_quota_set_max_threads_import;
+#define grpc_resource_quota_set_max_threads grpc_resource_quota_set_max_threads_import
 typedef const grpc_arg_pointer_vtable*(*grpc_resource_quota_arg_vtable_type)(void);
 extern grpc_resource_quota_arg_vtable_type grpc_resource_quota_arg_vtable_import;
 #define grpc_resource_quota_arg_vtable grpc_resource_quota_arg_vtable_import
diff --git a/test/core/iomgr/resource_quota_test.cc b/test/core/iomgr/resource_quota_test.cc
index 059ff7b..f3b35fe 100644
--- a/test/core/iomgr/resource_quota_test.cc
+++ b/test/core/iomgr/resource_quota_test.cc
@@ -798,6 +798,98 @@
   }
 }
 
+// Simple test to check resource quota thread limits
+static void test_thread_limit() {
+  grpc_core::ExecCtx exec_ctx;
+
+  grpc_resource_quota* rq = grpc_resource_quota_create("test_thread_limit");
+  grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1");
+  grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2");
+
+  // Max threads = 100
+  grpc_resource_quota_set_max_threads(rq, 100);
+
+  // Request quota for 100 threads (50 for ru1, 50 for ru2)
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 10));
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 10));
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 40));
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 40));
+
+  // Threads exhausted. Next request must fail
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru2, 20));
+
+  // Free 20 threads from two different users
+  grpc_resource_user_free_threads(ru1, 10);
+  grpc_resource_user_free_threads(ru2, 10);
+
+  // Next request to 20 threads must succeed
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 20));
+
+  // No more thread quota again
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 20));
+
+  // Free 10 more
+  grpc_resource_user_free_threads(ru1, 10);
+
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 5));
+  GPR_ASSERT(
+      !grpc_resource_user_allocate_threads(ru2, 10));  // Only 5 available
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 5));
+
+  // Teardown (ru1 and ru2 release all the quota back to rq)
+  grpc_resource_user_unref(ru1);
+  grpc_resource_user_unref(ru2);
+  grpc_resource_quota_unref(rq);
+}
+
+// Change max quota in either direction dynamically
+static void test_thread_maxquota_change() {
+  grpc_core::ExecCtx exec_ctx;
+
+  grpc_resource_quota* rq =
+      grpc_resource_quota_create("test_thread_maxquota_change");
+  grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1");
+  grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2");
+
+  // Max threads = 100
+  grpc_resource_quota_set_max_threads(rq, 100);
+
+  // Request quota for 100 threads (50 for ru1, 50 for ru2)
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 50));
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 50));
+
+  // Threads exhausted. Next request must fail
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru2, 20));
+
+  // Increase maxquota and retry
+  // Max threads = 150;
+  grpc_resource_quota_set_max_threads(rq, 150);
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 20));  // ru2=70, ru1=50
+
+  // Decrease maxquota (Note: Quota already given to ru1 and ru2 is unaffected)
+  // Max threads = 10;
+  grpc_resource_quota_set_max_threads(rq, 10);
+
+  // New requests will fail until quota is available
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10));
+
+  // Make quota available
+  grpc_resource_user_free_threads(ru1, 50);                   // ru1 now has 0
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10));  // not enough
+
+  grpc_resource_user_free_threads(ru2, 70);  // ru2 now has 0
+
+  // Now we can get quota up-to 10, the current max
+  GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 10));
+  // No more thread quota again
+  GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10));
+
+  // Teardown (ru1 and ru2 release all the quota back to rq)
+  grpc_resource_user_unref(ru1);
+  grpc_resource_user_unref(ru2);
+  grpc_resource_quota_unref(rq);
+}
+
 int main(int argc, char** argv) {
   grpc_test_init(argc, argv);
   grpc_init();
@@ -827,6 +919,11 @@
   test_negative_rq_free_pool();
   gpr_mu_destroy(&g_mu);
   gpr_cv_destroy(&g_cv);
+
+  // Resource quota thread related
+  test_thread_limit();
+  test_thread_maxquota_change();
+
   grpc_shutdown();
   return 0;
 }
diff --git a/test/core/surface/public_headers_must_be_c89.c b/test/core/surface/public_headers_must_be_c89.c
index 9f4ad2b..497f719 100644
--- a/test/core/surface/public_headers_must_be_c89.c
+++ b/test/core/surface/public_headers_must_be_c89.c
@@ -130,6 +130,7 @@
   printf("%lx", (unsigned long) grpc_resource_quota_ref);
   printf("%lx", (unsigned long) grpc_resource_quota_unref);
   printf("%lx", (unsigned long) grpc_resource_quota_resize);
+  printf("%lx", (unsigned long) grpc_resource_quota_set_max_threads);
   printf("%lx", (unsigned long) grpc_resource_quota_arg_vtable);
   printf("%lx", (unsigned long) grpc_channelz_get_top_channels);
   printf("%lx", (unsigned long) grpc_channelz_get_channel);
diff --git a/test/cpp/thread_manager/thread_manager_test.cc b/test/cpp/thread_manager/thread_manager_test.cc
index 7a95a9f..838f5f7 100644
--- a/test/cpp/thread_manager/thread_manager_test.cc
+++ b/test/cpp/thread_manager/thread_manager_test.cc
@@ -30,30 +30,44 @@
 #include "test/cpp/util/test_config.h"
 
 namespace grpc {
+
+struct ThreadManagerTestSettings {
+  // The min number of pollers that SHOULD be active in ThreadManager
+  int min_pollers;
+  // The max number of pollers that could be active in ThreadManager
+  int max_pollers;
+  // The sleep duration in PollForWork() function to simulate "polling"
+  int poll_duration_ms;
+  // The sleep duration in DoWork() function to simulate "work"
+  int work_duration_ms;
+  // Max number of times PollForWork() is called before shutting down
+  int max_poll_calls;
+};
+
 class ThreadManagerTest final : public grpc::ThreadManager {
  public:
-  ThreadManagerTest()
-      : ThreadManager(kMinPollers, kMaxPollers),
+  ThreadManagerTest(const char* name, grpc_resource_quota* rq,
+                    const ThreadManagerTestSettings& settings)
+      : ThreadManager(name, rq, settings.min_pollers, settings.max_pollers),
+        settings_(settings),
         num_do_work_(0),
         num_poll_for_work_(0),
         num_work_found_(0) {}
 
   grpc::ThreadManager::WorkStatus PollForWork(void** tag, bool* ok) override;
   void DoWork(void* tag, bool ok) override;
-  void PerformTest();
+
+  // Get number of times PollForWork() returned WORK_FOUND
+  int GetNumWorkFound();
+  // Get number of times DoWork() was called
+  int GetNumDoWork();
 
  private:
   void SleepForMs(int sleep_time_ms);
 
-  static const int kMinPollers = 2;
-  static const int kMaxPollers = 10;
+  ThreadManagerTestSettings settings_;
 
-  static const int kPollingTimeoutMsec = 10;
-  static const int kDoWorkDurationMsec = 1;
-
-  // PollForWork will return SHUTDOWN after these many number of invocations
-  static const int kMaxNumPollForWork = 50;
-
+  // Counters
   gpr_atm num_do_work_;        // Number of calls to DoWork
   gpr_atm num_poll_for_work_;  // Number of calls to PollForWork
   gpr_atm num_work_found_;     // Number of times WORK_FOUND was returned
@@ -69,54 +83,117 @@
 grpc::ThreadManager::WorkStatus ThreadManagerTest::PollForWork(void** tag,
                                                                bool* ok) {
   int call_num = gpr_atm_no_barrier_fetch_add(&num_poll_for_work_, 1);
-
-  if (call_num >= kMaxNumPollForWork) {
+  if (call_num >= settings_.max_poll_calls) {
     Shutdown();
     return SHUTDOWN;
   }
 
-  // Simulate "polling for work" by sleeping for sometime
-  SleepForMs(kPollingTimeoutMsec);
-
+  SleepForMs(settings_.poll_duration_ms);  // Simulate "polling" duration
   *tag = nullptr;
   *ok = true;
 
-  // Return timeout roughly 1 out of every 3 calls
+  // Return timeout roughly 1 out of every 3 calls just to make the test a bit
+  // more interesting
   if (call_num % 3 == 0) {
     return TIMEOUT;
-  } else {
-    gpr_atm_no_barrier_fetch_add(&num_work_found_, 1);
-    return WORK_FOUND;
   }
+
+  gpr_atm_no_barrier_fetch_add(&num_work_found_, 1);
+  return WORK_FOUND;
 }
 
 void ThreadManagerTest::DoWork(void* tag, bool ok) {
   gpr_atm_no_barrier_fetch_add(&num_do_work_, 1);
-  SleepForMs(kDoWorkDurationMsec);  // Simulate doing work by sleeping
+  SleepForMs(settings_.work_duration_ms);  // Simulate work by sleeping
 }
 
-void ThreadManagerTest::PerformTest() {
-  // Initialize() starts the ThreadManager
-  Initialize();
+int ThreadManagerTest::GetNumWorkFound() {
+  return static_cast<int>(gpr_atm_no_barrier_load(&num_work_found_));
+}
 
-  // Wait for all the threads to gracefully terminate
-  Wait();
-
-  // The number of times DoWork() was called is equal to the number of times
-  // WORK_FOUND was returned
-  gpr_log(GPR_DEBUG, "DoWork() called %" PRIdPTR " times",
-          gpr_atm_no_barrier_load(&num_do_work_));
-  GPR_ASSERT(gpr_atm_no_barrier_load(&num_do_work_) ==
-             gpr_atm_no_barrier_load(&num_work_found_));
+int ThreadManagerTest::GetNumDoWork() {
+  return static_cast<int>(gpr_atm_no_barrier_load(&num_do_work_));
 }
 }  // namespace grpc
 
+// Test that the number of times DoWork() is called is equal to the number of
+// times PollForWork() returned WORK_FOUND
+static void TestPollAndWork() {
+  grpc_resource_quota* rq = grpc_resource_quota_create("Test-poll-and-work");
+  grpc::ThreadManagerTestSettings settings = {
+      2 /* min_pollers */, 10 /* max_pollers */, 10 /* poll_duration_ms */,
+      1 /* work_duration_ms */, 50 /* max_poll_calls */};
+
+  grpc::ThreadManagerTest test_thread_mgr("TestThreadManager", rq, settings);
+  grpc_resource_quota_unref(rq);
+
+  test_thread_mgr.Initialize();  // Start the thread manager
+  test_thread_mgr.Wait();        // Wait for all threads to finish
+
+  // Verify that The number of times DoWork() was called is equal to the number
+  // of times WORK_FOUND was returned
+  gpr_log(GPR_DEBUG, "DoWork() called %d times",
+          test_thread_mgr.GetNumDoWork());
+  GPR_ASSERT(test_thread_mgr.GetNumDoWork() ==
+             test_thread_mgr.GetNumWorkFound());
+}
+
+static void TestThreadQuota() {
+  const int kMaxNumThreads = 3;
+  grpc_resource_quota* rq = grpc_resource_quota_create("Test-thread-quota");
+  grpc_resource_quota_set_max_threads(rq, kMaxNumThreads);
+
+  // Set work_duration_ms to be much greater than poll_duration_ms. This way,
+  // the thread manager will be forced to create more 'polling' threads to
+  // honor the min_pollers guarantee
+  grpc::ThreadManagerTestSettings settings = {
+      1 /* min_pollers */, 1 /* max_pollers */, 1 /* poll_duration_ms */,
+      10 /* work_duration_ms */, 50 /* max_poll_calls */};
+
+  // Create two thread managers (but with same resource quota). This means
+  // that the max number of active threads across BOTH the thread managers
+  // cannot be greater than kMaxNumthreads
+  grpc::ThreadManagerTest test_thread_mgr_1("TestThreadManager-1", rq,
+                                            settings);
+  grpc::ThreadManagerTest test_thread_mgr_2("TestThreadManager-2", rq,
+                                            settings);
+  // It is ok to unref resource quota before starting thread managers.
+  grpc_resource_quota_unref(rq);
+
+  // Start both thread managers
+  test_thread_mgr_1.Initialize();
+  test_thread_mgr_2.Initialize();
+
+  // Wait for both to finish
+  test_thread_mgr_1.Wait();
+  test_thread_mgr_2.Wait();
+
+  // Now verify that the total number of active threads in either thread manager
+  // never exceeds kMaxNumThreads
+  //
+  // NOTE: Actually the total active threads across *both* thread managers at
+  // any point of time never exceeds kMaxNumThreads but unfortunately there is
+  // no easy way to verify it (i.e we can't just do (max1 + max2 <= k))
+  // Its okay to not test this case here. The resource quota c-core tests
+  // provide enough coverage to resource quota object with multiple resource
+  // users
+  int max1 = test_thread_mgr_1.GetMaxActiveThreadsSoFar();
+  int max2 = test_thread_mgr_2.GetMaxActiveThreadsSoFar();
+  gpr_log(
+      GPR_DEBUG,
+      "MaxActiveThreads in TestThreadManager_1: %d, TestThreadManager_2: %d",
+      max1, max2);
+  GPR_ASSERT(max1 <= kMaxNumThreads && max2 <= kMaxNumThreads);
+}
+
 int main(int argc, char** argv) {
   std::srand(std::time(nullptr));
-
   grpc::testing::InitTest(&argc, &argv, true);
-  grpc::ThreadManagerTest test_rpc_manager;
-  test_rpc_manager.PerformTest();
+  grpc_init();
 
+  TestPollAndWork();
+  TestThreadQuota();
+
+  grpc_shutdown();
   return 0;
 }