Merge pull request #10135 from ctiller/call_arena

Use an arena for call allocations
diff --git a/BUILD b/BUILD
index 5a2e7a7..d5a4873 100644
--- a/BUILD
+++ b/BUILD
@@ -308,6 +308,7 @@
     srcs = [
         "src/core/lib/profiling/basic_timers.c",
         "src/core/lib/profiling/stap_timers.c",
+        "src/core/lib/support/arena.c",
         "src/core/lib/support/alloc.c",
         "src/core/lib/support/avl.c",
         "src/core/lib/support/backoff.c",
@@ -352,6 +353,7 @@
         "src/core/lib/support/wrap_memcpy.c",
     ],
     hdrs = [
+        "src/core/lib/support/arena.h",
         "src/core/lib/profiling/timers.h",
         "src/core/lib/support/backoff.h",
         "src/core/lib/support/block_annotate.h",
diff --git a/src/core/ext/census/grpc_filter.c b/src/core/ext/census/grpc_filter.c
index b80d831..fc29dbd 100644
--- a/src/core/ext/census/grpc_filter.c
+++ b/src/core/ext/census/grpc_filter.c
@@ -138,7 +138,7 @@
 static void client_destroy_call_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_call_element *elem,
                                      const grpc_call_final_info *final_info,
-                                     void *ignored) {
+                                     grpc_closure *ignored) {
   call_data *d = elem->call_data;
   GPR_ASSERT(d != NULL);
   /* TODO(hongyu): record rpc client stats and census_rpc_end_op here */
@@ -160,7 +160,7 @@
 static void server_destroy_call_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_call_element *elem,
                                      const grpc_call_final_info *final_info,
-                                     void *ignored) {
+                                     grpc_closure *ignored) {
   call_data *d = elem->call_data;
   GPR_ASSERT(d != NULL);
   /* TODO(hongyu): record rpc server stats and census_tracing_end_op here */
diff --git a/src/core/ext/client_channel/client_channel.c b/src/core/ext/client_channel/client_channel.c
index f2475cf..960d00e 100644
--- a/src/core/ext/client_channel/client_channel.c
+++ b/src/core/ext/client_channel/client_channel.c
@@ -661,6 +661,7 @@
   /** either 0 for no call, 1 for cancelled, or a pointer to a
       grpc_subchannel_call */
   gpr_atm subchannel_call;
+  gpr_arena *arena;
 
   subchannel_creation_phase creation_phase;
   grpc_connected_subchannel *connected_subchannel;
@@ -796,9 +797,14 @@
   } else {
     /* Create call on subchannel. */
     grpc_subchannel_call *subchannel_call = NULL;
+    const grpc_connected_subchannel_call_args call_args = {
+        .pollent = calld->pollent,
+        .path = calld->path,
+        .start_time = calld->call_start_time,
+        .deadline = calld->deadline,
+        .arena = calld->arena};
     grpc_error *new_error = grpc_connected_subchannel_create_call(
-        exec_ctx, calld->connected_subchannel, calld->pollent, calld->path,
-        calld->call_start_time, calld->deadline, &subchannel_call);
+        exec_ctx, calld->connected_subchannel, &call_args, &subchannel_call);
     if (new_error != GRPC_ERROR_NONE) {
       new_error = grpc_error_add_child(new_error, error);
       subchannel_call = CANCELLED_CALL;
@@ -1025,9 +1031,14 @@
   if (calld->creation_phase == GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING &&
       calld->connected_subchannel != NULL) {
     grpc_subchannel_call *subchannel_call = NULL;
+    const grpc_connected_subchannel_call_args call_args = {
+        .pollent = calld->pollent,
+        .path = calld->path,
+        .start_time = calld->call_start_time,
+        .deadline = calld->deadline,
+        .arena = calld->arena};
     grpc_error *error = grpc_connected_subchannel_create_call(
-        exec_ctx, calld->connected_subchannel, calld->pollent, calld->path,
-        calld->call_start_time, calld->deadline, &subchannel_call);
+        exec_ctx, calld->connected_subchannel, &call_args, &subchannel_call);
     if (error != GRPC_ERROR_NONE) {
       subchannel_call = CANCELLED_CALL;
       fail_locked(exec_ctx, calld, GRPC_ERROR_REF(error));
@@ -1114,6 +1125,7 @@
   calld->call_start_time = args->start_time;
   calld->deadline = gpr_convert_clock_type(args->deadline, GPR_CLOCK_MONOTONIC);
   calld->owning_call = args->call_stack;
+  calld->arena = args->arena;
   grpc_deadline_state_start(exec_ctx, elem, calld->deadline);
   return GRPC_ERROR_NONE;
 }
@@ -1122,7 +1134,7 @@
 static void cc_destroy_call_elem(grpc_exec_ctx *exec_ctx,
                                  grpc_call_element *elem,
                                  const grpc_call_final_info *final_info,
-                                 void *and_free_memory) {
+                                 grpc_closure *then_schedule_closure) {
   call_data *calld = elem->call_data;
   grpc_deadline_state_destroy(exec_ctx, elem);
   grpc_slice_unref_internal(exec_ctx, calld->path);
@@ -1132,6 +1144,8 @@
   GRPC_ERROR_UNREF(calld->cancel_error);
   grpc_subchannel_call *call = GET_CALL(calld);
   if (call != NULL && call != CANCELLED_CALL) {
+    grpc_subchannel_call_set_cleanup_closure(call, then_schedule_closure);
+    then_schedule_closure = NULL;
     GRPC_SUBCHANNEL_CALL_UNREF(exec_ctx, call, "client_channel_destroy_call");
   }
   GPR_ASSERT(calld->creation_phase == GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING);
@@ -1141,7 +1155,7 @@
                                     "picked");
   }
   gpr_free(calld->waiting_ops);
-  gpr_free(and_free_memory);
+  grpc_closure_sched(exec_ctx, then_schedule_closure, GRPC_ERROR_NONE);
 }
 
 static void cc_set_pollset_or_pollset_set(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/ext/client_channel/subchannel.c b/src/core/ext/client_channel/subchannel.c
index 5df0a90..cef08a0 100644
--- a/src/core/ext/client_channel/subchannel.c
+++ b/src/core/ext/client_channel/subchannel.c
@@ -148,6 +148,7 @@
 
 struct grpc_subchannel_call {
   grpc_connected_subchannel *connection;
+  grpc_closure *schedule_closure_after_destroy;
 };
 
 #define SUBCHANNEL_CALL_TO_CALL_STACK(call) ((grpc_call_stack *)((call) + 1))
@@ -719,13 +720,22 @@
 static void subchannel_call_destroy(grpc_exec_ctx *exec_ctx, void *call,
                                     grpc_error *error) {
   grpc_subchannel_call *c = call;
+  GPR_ASSERT(c->schedule_closure_after_destroy != NULL);
   GPR_TIMER_BEGIN("grpc_subchannel_call_unref.destroy", 0);
   grpc_connected_subchannel *connection = c->connection;
-  grpc_call_stack_destroy(exec_ctx, SUBCHANNEL_CALL_TO_CALL_STACK(c), NULL, c);
+  grpc_call_stack_destroy(exec_ctx, SUBCHANNEL_CALL_TO_CALL_STACK(c), NULL,
+                          c->schedule_closure_after_destroy);
   GRPC_CONNECTED_SUBCHANNEL_UNREF(exec_ctx, connection, "subchannel_call");
   GPR_TIMER_END("grpc_subchannel_call_unref.destroy", 0);
 }
 
+void grpc_subchannel_call_set_cleanup_closure(grpc_subchannel_call *call,
+                                              grpc_closure *closure) {
+  GPR_ASSERT(call->schedule_closure_after_destroy == NULL);
+  GPR_ASSERT(closure != NULL);
+  call->schedule_closure_after_destroy = closure;
+}
+
 void grpc_subchannel_call_ref(
     grpc_subchannel_call *c GRPC_SUBCHANNEL_REF_EXTRA_ARGS) {
   GRPC_CALL_STACK_REF(SUBCHANNEL_CALL_TO_CALL_STACK(c), REF_REASON);
@@ -761,15 +771,22 @@
 
 grpc_error *grpc_connected_subchannel_create_call(
     grpc_exec_ctx *exec_ctx, grpc_connected_subchannel *con,
-    grpc_polling_entity *pollent, grpc_slice path, gpr_timespec start_time,
-    gpr_timespec deadline, grpc_subchannel_call **call) {
+    const grpc_connected_subchannel_call_args *args,
+    grpc_subchannel_call **call) {
   grpc_channel_stack *chanstk = CHANNEL_STACK_FROM_CONNECTION(con);
-  *call = gpr_zalloc(sizeof(grpc_subchannel_call) + chanstk->call_stack_size);
+  *call = gpr_arena_alloc(
+      args->arena, sizeof(grpc_subchannel_call) + chanstk->call_stack_size);
   grpc_call_stack *callstk = SUBCHANNEL_CALL_TO_CALL_STACK(*call);
   (*call)->connection = con;  // Ref is added below.
-  grpc_error *error =
-      grpc_call_stack_init(exec_ctx, chanstk, 1, subchannel_call_destroy, *call,
-                           NULL, NULL, path, start_time, deadline, callstk);
+  const grpc_call_element_args call_args = {.call_stack = callstk,
+                                            .server_transport_data = NULL,
+                                            .context = NULL,
+                                            .path = args->path,
+                                            .start_time = args->start_time,
+                                            .deadline = args->deadline,
+                                            .arena = args->arena};
+  grpc_error *error = grpc_call_stack_init(
+      exec_ctx, chanstk, 1, subchannel_call_destroy, *call, &call_args);
   if (error != GRPC_ERROR_NONE) {
     const char *error_string = grpc_error_string(error);
     gpr_log(GPR_ERROR, "error: %s", error_string);
@@ -778,7 +795,7 @@
     return error;
   }
   GRPC_CONNECTED_SUBCHANNEL_REF(con, "subchannel_call");
-  grpc_call_stack_set_pollset_or_pollset_set(exec_ctx, callstk, pollent);
+  grpc_call_stack_set_pollset_or_pollset_set(exec_ctx, callstk, args->pollent);
   return GRPC_ERROR_NONE;
 }
 
diff --git a/src/core/ext/client_channel/subchannel.h b/src/core/ext/client_channel/subchannel.h
index 6a70a76..3e64a25 100644
--- a/src/core/ext/client_channel/subchannel.h
+++ b/src/core/ext/client_channel/subchannel.h
@@ -37,6 +37,7 @@
 #include "src/core/ext/client_channel/connector.h"
 #include "src/core/lib/channel/channel_stack.h"
 #include "src/core/lib/iomgr/polling_entity.h"
+#include "src/core/lib/support/arena.h"
 #include "src/core/lib/transport/connectivity_state.h"
 #include "src/core/lib/transport/metadata.h"
 
@@ -112,10 +113,18 @@
                                     GRPC_SUBCHANNEL_REF_EXTRA_ARGS);
 
 /** construct a subchannel call */
+typedef struct {
+  grpc_polling_entity *pollent;
+  grpc_slice path;
+  gpr_timespec start_time;
+  gpr_timespec deadline;
+  gpr_arena *arena;
+} grpc_connected_subchannel_call_args;
+
 grpc_error *grpc_connected_subchannel_create_call(
     grpc_exec_ctx *exec_ctx, grpc_connected_subchannel *connected_subchannel,
-    grpc_polling_entity *pollent, grpc_slice path, gpr_timespec start_time,
-    gpr_timespec deadline, grpc_subchannel_call **subchannel_call);
+    const grpc_connected_subchannel_call_args *args,
+    grpc_subchannel_call **subchannel_call);
 
 /** process a transport level op */
 void grpc_connected_subchannel_process_transport_op(
@@ -154,6 +163,11 @@
 char *grpc_subchannel_call_get_peer(grpc_exec_ctx *exec_ctx,
                                     grpc_subchannel_call *subchannel_call);
 
+/** Must be called once per call. Sets the 'then_schedule_closure' argument for
+    call stack destruction. */
+void grpc_subchannel_call_set_cleanup_closure(
+    grpc_subchannel_call *subchannel_call, grpc_closure *closure);
+
 grpc_call_stack *grpc_subchannel_call_get_call_stack(
     grpc_subchannel_call *subchannel_call);
 
diff --git a/src/core/ext/load_reporting/load_reporting_filter.c b/src/core/ext/load_reporting/load_reporting_filter.c
index c275063..4ed8326 100644
--- a/src/core/ext/load_reporting/load_reporting_filter.c
+++ b/src/core/ext/load_reporting/load_reporting_filter.c
@@ -123,7 +123,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   call_data *calld = elem->call_data;
 
   /* TODO(dgq): do something with the data
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.c b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
index a368453..082078c 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.c
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
@@ -575,7 +575,7 @@
 
 static int init_stream(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
                        grpc_stream *gs, grpc_stream_refcount *refcount,
-                       const void *server_data) {
+                       const void *server_data, gpr_arena *arena) {
   GPR_TIMER_BEGIN("init_stream", 0);
   grpc_chttp2_transport *t = (grpc_chttp2_transport *)gt;
   grpc_chttp2_stream *s = (grpc_chttp2_stream *)gs;
@@ -588,8 +588,8 @@
   gpr_ref_init(&s->active_streams, 1);
   GRPC_CHTTP2_STREAM_REF(s, "chttp2");
 
-  grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[0]);
-  grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[1]);
+  grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[0], arena);
+  grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[1], arena);
   grpc_chttp2_data_parser_init(&s->data_parser);
   grpc_slice_buffer_init(&s->flow_controlled_buffer);
   s->deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC);
@@ -665,16 +665,17 @@
 
   GPR_TIMER_END("destroy_stream", 0);
 
-  gpr_free(s->destroy_stream_arg);
+  grpc_closure_sched(exec_ctx, s->destroy_stream_arg, GRPC_ERROR_NONE);
 }
 
 static void destroy_stream(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
-                           grpc_stream *gs, void *and_free_memory) {
+                           grpc_stream *gs,
+                           grpc_closure *then_schedule_closure) {
   GPR_TIMER_BEGIN("destroy_stream", 0);
   grpc_chttp2_transport *t = (grpc_chttp2_transport *)gt;
   grpc_chttp2_stream *s = (grpc_chttp2_stream *)gs;
 
-  s->destroy_stream_arg = and_free_memory;
+  s->destroy_stream_arg = then_schedule_closure;
   grpc_closure_sched(
       exec_ctx, grpc_closure_init(&s->destroy_stream, destroy_stream_locked, s,
                                   grpc_combiner_scheduler(t->combiner, false)),
@@ -1629,15 +1630,19 @@
       s->recv_trailing_metadata_finished != NULL) {
     char status_string[GPR_LTOA_MIN_BUFSIZE];
     gpr_ltoa(status, status_string);
-    grpc_chttp2_incoming_metadata_buffer_replace_or_add(
-        exec_ctx, &s->metadata_buffer[1],
-        grpc_mdelem_from_slices(exec_ctx, GRPC_MDSTR_GRPC_STATUS,
-                                grpc_slice_from_copied_string(status_string)));
+    GRPC_LOG_IF_ERROR("add_status",
+                      grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+                          exec_ctx, &s->metadata_buffer[1],
+                          grpc_mdelem_from_slices(
+                              exec_ctx, GRPC_MDSTR_GRPC_STATUS,
+                              grpc_slice_from_copied_string(status_string))));
     if (msg != NULL) {
-      grpc_chttp2_incoming_metadata_buffer_replace_or_add(
-          exec_ctx, &s->metadata_buffer[1],
-          grpc_mdelem_from_slices(exec_ctx, GRPC_MDSTR_GRPC_MESSAGE,
-                                  grpc_slice_from_copied_string(msg)));
+      GRPC_LOG_IF_ERROR(
+          "add_status_message",
+          grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+              exec_ctx, &s->metadata_buffer[1],
+              grpc_mdelem_from_slices(exec_ctx, GRPC_MDSTR_GRPC_MESSAGE,
+                                      grpc_slice_from_copied_string(msg))));
     }
     s->published_metadata[1] = GRPC_METADATA_SYNTHESIZED_FROM_FAKE;
     grpc_chttp2_maybe_complete_recv_trailing_metadata(exec_ctx, t, s);
diff --git a/src/core/ext/transport/chttp2/transport/incoming_metadata.c b/src/core/ext/transport/chttp2/transport/incoming_metadata.c
index c91b019..da0a34d 100644
--- a/src/core/ext/transport/chttp2/transport/incoming_metadata.c
+++ b/src/core/ext/transport/chttp2/transport/incoming_metadata.c
@@ -41,69 +41,48 @@
 #include <grpc/support/log.h>
 
 void grpc_chttp2_incoming_metadata_buffer_init(
-    grpc_chttp2_incoming_metadata_buffer *buffer) {
-  buffer->deadline = gpr_inf_future(GPR_CLOCK_REALTIME);
+    grpc_chttp2_incoming_metadata_buffer *buffer, gpr_arena *arena) {
+  buffer->arena = arena;
+  grpc_metadata_batch_init(&buffer->batch);
+  buffer->batch.deadline = gpr_inf_future(GPR_CLOCK_REALTIME);
 }
 
 void grpc_chttp2_incoming_metadata_buffer_destroy(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer) {
-  size_t i;
-  if (!buffer->published) {
-    for (i = 0; i < buffer->count; i++) {
-      GRPC_MDELEM_UNREF(exec_ctx, buffer->elems[i].md);
-    }
-  }
-  gpr_free(buffer->elems);
+  grpc_metadata_batch_destroy(exec_ctx, &buffer->batch);
 }
 
-void grpc_chttp2_incoming_metadata_buffer_add(
-    grpc_chttp2_incoming_metadata_buffer *buffer, grpc_mdelem elem) {
-  GPR_ASSERT(!buffer->published);
-  if (buffer->capacity == buffer->count) {
-    buffer->capacity = GPR_MAX(8, 2 * buffer->capacity);
-    buffer->elems =
-        gpr_realloc(buffer->elems, sizeof(*buffer->elems) * buffer->capacity);
-  }
-  buffer->elems[buffer->count++].md = elem;
-  buffer->size += GRPC_MDELEM_LENGTH(elem);
-}
-
-void grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+grpc_error *grpc_chttp2_incoming_metadata_buffer_add(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
     grpc_mdelem elem) {
-  for (size_t i = 0; i < buffer->count; i++) {
-    if (grpc_slice_eq(GRPC_MDKEY(buffer->elems[i].md), GRPC_MDKEY(elem))) {
-      GRPC_MDELEM_UNREF(exec_ctx, buffer->elems[i].md);
-      buffer->elems[i].md = elem;
-      return;
+  buffer->size += GRPC_MDELEM_LENGTH(elem);
+  return grpc_metadata_batch_add_tail(
+      exec_ctx, &buffer->batch,
+      gpr_arena_alloc(buffer->arena, sizeof(grpc_linked_mdelem)), elem);
+}
+
+grpc_error *grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+    grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
+    grpc_mdelem elem) {
+  for (grpc_linked_mdelem *l = buffer->batch.list.head; l != NULL;
+       l = l->next) {
+    if (grpc_slice_eq(GRPC_MDKEY(l->md), GRPC_MDKEY(elem))) {
+      GRPC_MDELEM_UNREF(exec_ctx, l->md);
+      l->md = elem;
+      return GRPC_ERROR_NONE;
     }
   }
-  grpc_chttp2_incoming_metadata_buffer_add(buffer, elem);
+  return grpc_chttp2_incoming_metadata_buffer_add(exec_ctx, buffer, elem);
 }
 
 void grpc_chttp2_incoming_metadata_buffer_set_deadline(
     grpc_chttp2_incoming_metadata_buffer *buffer, gpr_timespec deadline) {
-  GPR_ASSERT(!buffer->published);
-  buffer->deadline = deadline;
+  buffer->batch.deadline = deadline;
 }
 
 void grpc_chttp2_incoming_metadata_buffer_publish(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
     grpc_metadata_batch *batch) {
-  GPR_ASSERT(!buffer->published);
-  buffer->published = 1;
-  if (buffer->count > 0) {
-    size_t i;
-    for (i = 0; i < buffer->count; i++) {
-      /* TODO(ctiller): do something better here */
-      if (!GRPC_LOG_IF_ERROR("grpc_chttp2_incoming_metadata_buffer_publish",
-                             grpc_metadata_batch_link_tail(
-                                 exec_ctx, batch, &buffer->elems[i]))) {
-        GRPC_MDELEM_UNREF(exec_ctx, buffer->elems[i].md);
-      }
-    }
-  } else {
-    batch->list.head = batch->list.tail = NULL;
-  }
-  batch->deadline = buffer->deadline;
+  *batch = buffer->batch;
+  grpc_metadata_batch_init(&buffer->batch);
 }
diff --git a/src/core/ext/transport/chttp2/transport/incoming_metadata.h b/src/core/ext/transport/chttp2/transport/incoming_metadata.h
index 1eac6fc..288c917 100644
--- a/src/core/ext/transport/chttp2/transport/incoming_metadata.h
+++ b/src/core/ext/transport/chttp2/transport/incoming_metadata.h
@@ -37,28 +37,26 @@
 #include "src/core/lib/transport/transport.h"
 
 typedef struct {
-  grpc_linked_mdelem *elems;
-  size_t count;
-  size_t capacity;
-  gpr_timespec deadline;
-  int published;
+  gpr_arena *arena;
+  grpc_metadata_batch batch;
   size_t size;  // total size of metadata
 } grpc_chttp2_incoming_metadata_buffer;
 
 /** assumes everything initially zeroed */
 void grpc_chttp2_incoming_metadata_buffer_init(
-    grpc_chttp2_incoming_metadata_buffer *buffer);
+    grpc_chttp2_incoming_metadata_buffer *buffer, gpr_arena *arena);
 void grpc_chttp2_incoming_metadata_buffer_destroy(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer);
 void grpc_chttp2_incoming_metadata_buffer_publish(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
     grpc_metadata_batch *batch);
 
-void grpc_chttp2_incoming_metadata_buffer_add(
-    grpc_chttp2_incoming_metadata_buffer *buffer, grpc_mdelem elem);
-void grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+grpc_error *grpc_chttp2_incoming_metadata_buffer_add(
     grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
-    grpc_mdelem elem);
+    grpc_mdelem elem) GRPC_MUST_USE_RESULT;
+grpc_error *grpc_chttp2_incoming_metadata_buffer_replace_or_add(
+    grpc_exec_ctx *exec_ctx, grpc_chttp2_incoming_metadata_buffer *buffer,
+    grpc_mdelem elem) GRPC_MUST_USE_RESULT;
 void grpc_chttp2_incoming_metadata_buffer_set_deadline(
     grpc_chttp2_incoming_metadata_buffer *buffer, gpr_timespec deadline);
 
diff --git a/src/core/ext/transport/chttp2/transport/internal.h b/src/core/ext/transport/chttp2/transport/internal.h
index d26812a..3c56c21 100644
--- a/src/core/ext/transport/chttp2/transport/internal.h
+++ b/src/core/ext/transport/chttp2/transport/internal.h
@@ -425,7 +425,7 @@
   grpc_stream_refcount *refcount;
 
   grpc_closure destroy_stream;
-  void *destroy_stream_arg;
+  grpc_closure *destroy_stream_arg;
 
   grpc_chttp2_stream_link links[STREAM_LIST_COUNT];
   uint8_t included[STREAM_LIST_COUNT];
diff --git a/src/core/ext/transport/chttp2/transport/parsing.c b/src/core/ext/transport/chttp2/transport/parsing.c
index e7f2597..7efc8c6 100644
--- a/src/core/ext/transport/chttp2/transport/parsing.c
+++ b/src/core/ext/transport/chttp2/transport/parsing.c
@@ -548,7 +548,14 @@
       s->seen_error = true;
       GRPC_MDELEM_UNREF(exec_ctx, md);
     } else {
-      grpc_chttp2_incoming_metadata_buffer_add(&s->metadata_buffer[0], md);
+      grpc_error *error = grpc_chttp2_incoming_metadata_buffer_add(
+          exec_ctx, &s->metadata_buffer[0], md);
+      if (error != GRPC_ERROR_NONE) {
+        grpc_chttp2_cancel_stream(exec_ctx, t, s, error);
+        grpc_chttp2_parsing_become_skip_parser(exec_ctx, t);
+        s->seen_error = true;
+        GRPC_MDELEM_UNREF(exec_ctx, md);
+      }
     }
   }
 
@@ -598,7 +605,14 @@
     s->seen_error = true;
     GRPC_MDELEM_UNREF(exec_ctx, md);
   } else {
-    grpc_chttp2_incoming_metadata_buffer_add(&s->metadata_buffer[1], md);
+    grpc_error *error = grpc_chttp2_incoming_metadata_buffer_add(
+        exec_ctx, &s->metadata_buffer[1], md);
+    if (error != GRPC_ERROR_NONE) {
+      grpc_chttp2_cancel_stream(exec_ctx, t, s, error);
+      grpc_chttp2_parsing_become_skip_parser(exec_ctx, t);
+      s->seen_error = true;
+      GRPC_MDELEM_UNREF(exec_ctx, md);
+    }
   }
 
   GPR_TIMER_END("on_trailing_header", 0);
diff --git a/src/core/ext/transport/cronet/transport/cronet_transport.c b/src/core/ext/transport/cronet/transport/cronet_transport.c
index e187360..450d9ab 100644
--- a/src/core/ext/transport/cronet/transport/cronet_transport.c
+++ b/src/core/ext/transport/cronet/transport/cronet_transport.c
@@ -184,6 +184,7 @@
 };
 
 struct stream_obj {
+  gpr_arena *arena;
   struct op_and_state *oas;
   grpc_transport_stream_op *curr_op;
   grpc_cronet_transport *curr_ct;
@@ -486,15 +487,18 @@
   gpr_mu_lock(&s->mu);
   memset(&s->state.rs.initial_metadata, 0,
          sizeof(s->state.rs.initial_metadata));
-  grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.initial_metadata);
+  grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.initial_metadata,
+                                            s->arena);
   for (size_t i = 0; i < headers->count; i++) {
-    grpc_chttp2_incoming_metadata_buffer_add(
-        &s->state.rs.initial_metadata,
-        grpc_mdelem_from_slices(
-            &exec_ctx, grpc_slice_intern(grpc_slice_from_static_string(
-                           headers->headers[i].key)),
-            grpc_slice_intern(
-                grpc_slice_from_static_string(headers->headers[i].value))));
+    GRPC_LOG_IF_ERROR(
+        "on_response_headers_received",
+        grpc_chttp2_incoming_metadata_buffer_add(
+            &exec_ctx, &s->state.rs.initial_metadata,
+            grpc_mdelem_from_slices(
+                &exec_ctx, grpc_slice_intern(grpc_slice_from_static_string(
+                               headers->headers[i].key)),
+                grpc_slice_intern(grpc_slice_from_static_string(
+                    headers->headers[i].value)))));
   }
   s->state.state_callback_received[OP_RECV_INITIAL_METADATA] = true;
   if (!(s->state.state_op_done[OP_CANCEL_ERROR] ||
@@ -586,17 +590,20 @@
   memset(&s->state.rs.trailing_metadata, 0,
          sizeof(s->state.rs.trailing_metadata));
   s->state.rs.trailing_metadata_valid = false;
-  grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.trailing_metadata);
+  grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.trailing_metadata,
+                                            s->arena);
   for (size_t i = 0; i < trailers->count; i++) {
     CRONET_LOG(GPR_DEBUG, "trailer key=%s, value=%s", trailers->headers[i].key,
                trailers->headers[i].value);
-    grpc_chttp2_incoming_metadata_buffer_add(
-        &s->state.rs.trailing_metadata,
-        grpc_mdelem_from_slices(
-            &exec_ctx, grpc_slice_intern(grpc_slice_from_static_string(
-                           trailers->headers[i].key)),
-            grpc_slice_intern(
-                grpc_slice_from_static_string(trailers->headers[i].value))));
+    GRPC_LOG_IF_ERROR(
+        "on_response_trailers_received",
+        grpc_chttp2_incoming_metadata_buffer_add(
+            &exec_ctx, &s->state.rs.trailing_metadata,
+            grpc_mdelem_from_slices(
+                &exec_ctx, grpc_slice_intern(grpc_slice_from_static_string(
+                               trailers->headers[i].key)),
+                grpc_slice_intern(grpc_slice_from_static_string(
+                    trailers->headers[i].value)))));
     s->state.rs.trailing_metadata_valid = true;
     if (0 == strcmp(trailers->headers[i].key, "grpc-status") &&
         0 != strcmp(trailers->headers[i].value, "0")) {
@@ -1215,7 +1222,7 @@
 
 static int init_stream(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
                        grpc_stream *gs, grpc_stream_refcount *refcount,
-                       const void *server_data) {
+                       const void *server_data, gpr_arena *arena) {
   stream_obj *s = (stream_obj *)gs;
   memset(&s->storage, 0, sizeof(s->storage));
   s->storage.head = NULL;
@@ -1237,6 +1244,7 @@
 
   s->curr_gs = gs;
   s->curr_ct = (grpc_cronet_transport *)gt;
+  s->arena = arena;
 
   gpr_mu_init(&s->mu);
   return 0;
@@ -1273,10 +1281,12 @@
 }
 
 static void destroy_stream(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
-                           grpc_stream *gs, void *and_free_memory) {
+                           grpc_stream *gs,
+                           grpc_closure *then_schedule_closure) {
   stream_obj *s = (stream_obj *)gs;
   null_and_maybe_free_read_buffer(s);
   GRPC_ERROR_UNREF(s->state.cancel_error);
+  grpc_closure_sched(exec_ctx, then_schedule_closure, GRPC_ERROR_NONE);
 }
 
 static void destroy_transport(grpc_exec_ctx *exec_ctx, grpc_transport *gt) {}
diff --git a/src/core/lib/channel/channel_stack.c b/src/core/lib/channel/channel_stack.c
index 3fb2a60..6d53b05 100644
--- a/src/core/lib/channel/channel_stack.c
+++ b/src/core/lib/channel/channel_stack.c
@@ -166,41 +166,32 @@
   }
 }
 
-grpc_error *grpc_call_stack_init(
-    grpc_exec_ctx *exec_ctx, grpc_channel_stack *channel_stack,
-    int initial_refs, grpc_iomgr_cb_func destroy, void *destroy_arg,
-    grpc_call_context_element *context, const void *transport_server_data,
-    grpc_slice path, gpr_timespec start_time, gpr_timespec deadline,
-    grpc_call_stack *call_stack) {
+grpc_error *grpc_call_stack_init(grpc_exec_ctx *exec_ctx,
+                                 grpc_channel_stack *channel_stack,
+                                 int initial_refs, grpc_iomgr_cb_func destroy,
+                                 void *destroy_arg,
+                                 const grpc_call_element_args *elem_args) {
   grpc_channel_element *channel_elems = CHANNEL_ELEMS_FROM_STACK(channel_stack);
   size_t count = channel_stack->count;
   grpc_call_element *call_elems;
   char *user_data;
   size_t i;
 
-  call_stack->count = count;
-  GRPC_STREAM_REF_INIT(&call_stack->refcount, initial_refs, destroy,
+  elem_args->call_stack->count = count;
+  GRPC_STREAM_REF_INIT(&elem_args->call_stack->refcount, initial_refs, destroy,
                        destroy_arg, "CALL_STACK");
-  call_elems = CALL_ELEMS_FROM_STACK(call_stack);
+  call_elems = CALL_ELEMS_FROM_STACK(elem_args->call_stack);
   user_data = ((char *)call_elems) +
               ROUND_UP_TO_ALIGNMENT_SIZE(count * sizeof(grpc_call_element));
 
   /* init per-filter data */
   grpc_error *first_error = GRPC_ERROR_NONE;
-  const grpc_call_element_args args = {
-      .start_time = start_time,
-      .call_stack = call_stack,
-      .server_transport_data = transport_server_data,
-      .context = context,
-      .path = path,
-      .deadline = deadline,
-  };
   for (i = 0; i < count; i++) {
     call_elems[i].filter = channel_elems[i].filter;
     call_elems[i].channel_data = channel_elems[i].channel_data;
     call_elems[i].call_data = user_data;
-    grpc_error *error =
-        call_elems[i].filter->init_call_elem(exec_ctx, &call_elems[i], &args);
+    grpc_error *error = call_elems[i].filter->init_call_elem(
+        exec_ctx, &call_elems[i], elem_args);
     if (error != GRPC_ERROR_NONE) {
       if (first_error == GRPC_ERROR_NONE) {
         first_error = error;
@@ -241,15 +232,16 @@
 
 void grpc_call_stack_destroy(grpc_exec_ctx *exec_ctx, grpc_call_stack *stack,
                              const grpc_call_final_info *final_info,
-                             void *and_free_memory) {
+                             grpc_closure *then_schedule_closure) {
   grpc_call_element *elems = CALL_ELEMS_FROM_STACK(stack);
   size_t count = stack->count;
   size_t i;
 
   /* destroy per-filter data */
   for (i = 0; i < count; i++) {
-    elems[i].filter->destroy_call_elem(exec_ctx, &elems[i], final_info,
-                                       i == count - 1 ? and_free_memory : NULL);
+    elems[i].filter->destroy_call_elem(
+        exec_ctx, &elems[i], final_info,
+        i == count - 1 ? then_schedule_closure : NULL);
   }
 }
 
diff --git a/src/core/lib/channel/channel_stack.h b/src/core/lib/channel/channel_stack.h
index 6d3340b..80e3603 100644
--- a/src/core/lib/channel/channel_stack.h
+++ b/src/core/lib/channel/channel_stack.h
@@ -56,6 +56,7 @@
 
 #include "src/core/lib/debug/trace.h"
 #include "src/core/lib/iomgr/polling_entity.h"
+#include "src/core/lib/support/arena.h"
 #include "src/core/lib/transport/transport.h"
 
 #ifdef __cplusplus
@@ -84,6 +85,7 @@
   grpc_slice path;
   gpr_timespec start_time;
   gpr_timespec deadline;
+  gpr_arena *arena;
 } grpc_call_element_args;
 
 typedef struct {
@@ -139,12 +141,12 @@
   /* Destroy per call data.
      The filter does not need to do any chaining.
      The bottom filter of a stack will be passed a non-NULL pointer to
-     \a and_free_memory that should be passed to gpr_free when destruction
-     is complete. \a final_info contains data about the completed call, mainly
-     for reporting purposes. */
+     \a then_schedule_closure that should be passed to grpc_closure_sched when
+     destruction is complete. \a final_info contains data about the completed
+     call, mainly for reporting purposes. */
   void (*destroy_call_elem)(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                             const grpc_call_final_info *final_info,
-                            void *and_free_memory);
+                            grpc_closure *then_schedule_closure);
 
   /* sizeof(per channel data) */
   size_t sizeof_channel_data;
@@ -236,12 +238,11 @@
 /* Initialize a call stack given a channel stack. transport_server_data is
    expected to be NULL on a client, or an opaque transport owned pointer on the
    server. */
-grpc_error *grpc_call_stack_init(
-    grpc_exec_ctx *exec_ctx, grpc_channel_stack *channel_stack,
-    int initial_refs, grpc_iomgr_cb_func destroy, void *destroy_arg,
-    grpc_call_context_element *context, const void *transport_server_data,
-    grpc_slice path, gpr_timespec start_time, gpr_timespec deadline,
-    grpc_call_stack *call_stack);
+grpc_error *grpc_call_stack_init(grpc_exec_ctx *exec_ctx,
+                                 grpc_channel_stack *channel_stack,
+                                 int initial_refs, grpc_iomgr_cb_func destroy,
+                                 void *destroy_arg,
+                                 const grpc_call_element_args *elem_args);
 /* Set a pollset or a pollset_set for a call stack: must occur before the first
  * op is started */
 void grpc_call_stack_set_pollset_or_pollset_set(grpc_exec_ctx *exec_ctx,
@@ -271,7 +272,7 @@
 /* Destroy a call stack */
 void grpc_call_stack_destroy(grpc_exec_ctx *exec_ctx, grpc_call_stack *stack,
                              const grpc_call_final_info *final_info,
-                             void *and_free_memory);
+                             grpc_closure *then_schedule_closure);
 
 /* Ignore set pollset{_set} - used by filters if they don't care about pollsets
  * at all. Does nothing. */
diff --git a/src/core/lib/channel/compress_filter.c b/src/core/lib/channel/compress_filter.c
index aa41014..02dc479 100644
--- a/src/core/lib/channel/compress_filter.c
+++ b/src/core/lib/channel/compress_filter.c
@@ -292,7 +292,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   /* grab pointers to our data from the call element */
   call_data *calld = elem->call_data;
   grpc_slice_buffer_destroy_internal(exec_ctx, &calld->slices);
diff --git a/src/core/lib/channel/connected_channel.c b/src/core/lib/channel/connected_channel.c
index 29796f7..42ef7b7 100644
--- a/src/core/lib/channel/connected_channel.c
+++ b/src/core/lib/channel/connected_channel.c
@@ -88,7 +88,7 @@
   channel_data *chand = elem->channel_data;
   int r = grpc_transport_init_stream(
       exec_ctx, chand->transport, TRANSPORT_STREAM_FROM_CALL_DATA(calld),
-      &args->call_stack->refcount, args->server_transport_data);
+      &args->call_stack->refcount, args->server_transport_data, args->arena);
   return r == 0 ? GRPC_ERROR_NONE
                 : GRPC_ERROR_CREATE("transport stream initialization failed");
 }
@@ -105,12 +105,12 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *and_free_memory) {
+                              grpc_closure *then_schedule_closure) {
   call_data *calld = elem->call_data;
   channel_data *chand = elem->channel_data;
   grpc_transport_destroy_stream(exec_ctx, chand->transport,
                                 TRANSPORT_STREAM_FROM_CALL_DATA(calld),
-                                and_free_memory);
+                                then_schedule_closure);
 }
 
 /* Constructor for channel_data */
diff --git a/src/core/lib/channel/deadline_filter.c b/src/core/lib/channel/deadline_filter.c
index 5a12d62..34114bb 100644
--- a/src/core/lib/channel/deadline_filter.c
+++ b/src/core/lib/channel/deadline_filter.c
@@ -256,7 +256,7 @@
 // Destructor for call_data.  Used for both client and server filters.
 static void destroy_call_elem(grpc_exec_ctx* exec_ctx, grpc_call_element* elem,
                               const grpc_call_final_info* final_info,
-                              void* and_free_memory) {
+                              grpc_closure* ignored) {
   grpc_deadline_state_destroy(exec_ctx, elem);
 }
 
diff --git a/src/core/lib/channel/http_client_filter.c b/src/core/lib/channel/http_client_filter.c
index c031533..f9d0d68 100644
--- a/src/core/lib/channel/http_client_filter.c
+++ b/src/core/lib/channel/http_client_filter.c
@@ -412,7 +412,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   call_data *calld = elem->call_data;
   grpc_slice_buffer_destroy_internal(exec_ctx, &calld->slices);
 }
diff --git a/src/core/lib/channel/http_server_filter.c b/src/core/lib/channel/http_server_filter.c
index fb70de8..bebd3af 100644
--- a/src/core/lib/channel/http_server_filter.c
+++ b/src/core/lib/channel/http_server_filter.c
@@ -358,7 +358,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   call_data *calld = elem->call_data;
   grpc_slice_buffer_destroy_internal(exec_ctx, &calld->read_slice_buffer);
 }
diff --git a/src/core/lib/channel/message_size_filter.c b/src/core/lib/channel/message_size_filter.c
index b424c0d..5ba13fe 100644
--- a/src/core/lib/channel/message_size_filter.c
+++ b/src/core/lib/channel/message_size_filter.c
@@ -200,7 +200,7 @@
 // Destructor for call_data.
 static void destroy_call_elem(grpc_exec_ctx* exec_ctx, grpc_call_element* elem,
                               const grpc_call_final_info* final_info,
-                              void* ignored) {}
+                              grpc_closure* ignored) {}
 
 // Constructor for channel_data.
 static grpc_error* init_channel_elem(grpc_exec_ctx* exec_ctx,
diff --git a/src/core/lib/security/transport/client_auth_filter.c b/src/core/lib/security/transport/client_auth_filter.c
index a23082a..8dea1d9 100644
--- a/src/core/lib/security/transport/client_auth_filter.c
+++ b/src/core/lib/security/transport/client_auth_filter.c
@@ -318,7 +318,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   call_data *calld = elem->call_data;
   grpc_call_credentials_unref(exec_ctx, calld->creds);
   if (calld->have_host) {
diff --git a/src/core/lib/security/transport/server_auth_filter.c b/src/core/lib/security/transport/server_auth_filter.c
index 14619d9..01cb473 100644
--- a/src/core/lib/security/transport/server_auth_filter.c
+++ b/src/core/lib/security/transport/server_auth_filter.c
@@ -227,7 +227,7 @@
 /* Destructor for call_data */
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {}
+                              grpc_closure *ignored) {}
 
 /* Constructor for channel_data */
 static grpc_error *init_channel_elem(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/lib/surface/call.c b/src/core/lib/surface/call.c
index 47f36be..2c5d8c0 100644
--- a/src/core/lib/surface/call.c
+++ b/src/core/lib/surface/call.c
@@ -51,6 +51,7 @@
 #include "src/core/lib/profiling/timers.h"
 #include "src/core/lib/slice/slice_internal.h"
 #include "src/core/lib/slice/slice_string_helpers.h"
+#include "src/core/lib/support/arena.h"
 #include "src/core/lib/support/string.h"
 #include "src/core/lib/surface/api_trace.h"
 #include "src/core/lib/surface/call.h"
@@ -138,6 +139,7 @@
 } batch_control;
 
 struct grpc_call {
+  gpr_arena *arena;
   grpc_completion_queue *cq;
   grpc_polling_entity pollent;
   grpc_channel *channel;
@@ -212,6 +214,8 @@
   grpc_closure receiving_initial_metadata_ready;
   uint32_t test_only_last_message_flags;
 
+  grpc_closure release_call;
+
   union {
     struct {
       grpc_status_code *status;
@@ -273,7 +277,11 @@
       grpc_channel_get_channel_stack(args->channel);
   grpc_call *call;
   GPR_TIMER_BEGIN("grpc_call_create", 0);
-  call = gpr_zalloc(sizeof(grpc_call) + channel_stack->call_stack_size);
+  gpr_arena *arena =
+      gpr_arena_create(grpc_channel_get_call_size_estimate(args->channel));
+  call = gpr_arena_alloc(arena,
+                         sizeof(grpc_call) + channel_stack->call_stack_size);
+  call->arena = arena;
   *out_call = call;
   gpr_mu_init(&call->child_list_mu);
   call->channel = args->channel;
@@ -364,11 +372,16 @@
 
   GRPC_CHANNEL_INTERNAL_REF(args->channel, "call");
   /* initial refcount dropped by grpc_call_destroy */
+  grpc_call_element_args call_args = {
+      .call_stack = CALL_STACK_FROM_CALL(call),
+      .server_transport_data = args->server_transport_data,
+      .context = call->context,
+      .path = path,
+      .start_time = call->start_time,
+      .deadline = send_deadline,
+      .arena = call->arena};
   add_init_error(&error, grpc_call_stack_init(exec_ctx, channel_stack, 1,
-                                              destroy_call, call, call->context,
-                                              args->server_transport_data, path,
-                                              call->start_time, send_deadline,
-                                              CALL_STACK_FROM_CALL(call)));
+                                              destroy_call, call, &call_args));
   if (error != GRPC_ERROR_NONE) {
     cancel_with_error(exec_ctx, call, STATUS_FROM_SURFACE,
                       GRPC_ERROR_REF(error));
@@ -425,6 +438,14 @@
   GRPC_CALL_STACK_UNREF(exec_ctx, CALL_STACK_FROM_CALL(c), REF_REASON);
 }
 
+static void release_call(grpc_exec_ctx *exec_ctx, void *call,
+                         grpc_error *error) {
+  grpc_call *c = call;
+  grpc_channel *channel = c->channel;
+  grpc_channel_update_call_size_estimate(channel, gpr_arena_destroy(c->arena));
+  GRPC_CHANNEL_INTERNAL_UNREF(exec_ctx, channel, "call");
+}
+
 static void set_status_value_directly(grpc_status_code status, void *dest);
 static void destroy_call(grpc_exec_ctx *exec_ctx, void *call,
                          grpc_error *error) {
@@ -451,7 +472,6 @@
   if (c->cq) {
     GRPC_CQ_INTERNAL_UNREF(c->cq, "bind");
   }
-  grpc_channel *channel = c->channel;
 
   get_final_status(call, set_status_value_directly, &c->final_info.final_status,
                    NULL);
@@ -463,8 +483,9 @@
         unpack_received_status(gpr_atm_acq_load(&c->status[i])).error);
   }
 
-  grpc_call_stack_destroy(exec_ctx, CALL_STACK_FROM_CALL(c), &c->final_info, c);
-  GRPC_CHANNEL_INTERNAL_UNREF(exec_ctx, channel, "call");
+  grpc_call_stack_destroy(exec_ctx, CALL_STACK_FROM_CALL(c), &c->final_info,
+                          grpc_closure_init(&c->release_call, release_call, c,
+                                            grpc_schedule_on_exec_ctx));
   GPR_TIMER_END("destroy_call", 0);
 }
 
diff --git a/src/core/lib/surface/channel.c b/src/core/lib/surface/channel.c
index d6acd39..2b700b2 100644
--- a/src/core/lib/surface/channel.c
+++ b/src/core/lib/surface/channel.c
@@ -68,6 +68,8 @@
   grpc_compression_options compression_options;
   grpc_mdelem default_authority;
 
+  gpr_atm call_size_estimate;
+
   gpr_mu registered_call_mu;
   registered_call *registered_calls;
 
@@ -115,6 +117,10 @@
   gpr_mu_init(&channel->registered_call_mu);
   channel->registered_calls = NULL;
 
+  gpr_atm_no_barrier_store(
+      &channel->call_size_estimate,
+      (gpr_atm)CHANNEL_STACK_FROM_CHANNEL(channel)->call_stack_size);
+
   grpc_compression_options_init(&channel->compression_options);
   for (size_t i = 0; i < args->num_args; i++) {
     if (0 == strcmp(args->args[i].key, GRPC_ARG_DEFAULT_AUTHORITY)) {
@@ -177,6 +183,32 @@
   return channel;
 }
 
+size_t grpc_channel_get_call_size_estimate(grpc_channel *channel) {
+#define ROUND_UP_SIZE 256
+  return ((size_t)gpr_atm_no_barrier_load(&channel->call_size_estimate) +
+          ROUND_UP_SIZE) &
+         ~(size_t)(ROUND_UP_SIZE - 1);
+}
+
+void grpc_channel_update_call_size_estimate(grpc_channel *channel,
+                                            size_t size) {
+  size_t cur = (size_t)gpr_atm_no_barrier_load(&channel->call_size_estimate);
+  if (cur < size) {
+    /* size grew: update estimate */
+    gpr_atm_no_barrier_cas(&channel->call_size_estimate, (gpr_atm)cur,
+                           (gpr_atm)size);
+    /* if we lose: never mind, something else will likely update soon enough */
+  } else if (cur == size) {
+    /* no change: holding pattern */
+  } else if (cur > 0) {
+    /* size shrank: decrease estimate */
+    gpr_atm_no_barrier_cas(
+        &channel->call_size_estimate, (gpr_atm)cur,
+        (gpr_atm)(GPR_MIN(cur - 1, (255 * cur + size) / 256)));
+    /* if we lose: never mind, something else will likely update soon enough */
+  }
+}
+
 char *grpc_channel_get_target(grpc_channel *channel) {
   GRPC_API_TRACE("grpc_channel_get_target(channel=%p)", 1, (channel));
   return gpr_strdup(channel->target);
diff --git a/src/core/lib/surface/channel.h b/src/core/lib/surface/channel.h
index 3a441d7..c4aebd8 100644
--- a/src/core/lib/surface/channel.h
+++ b/src/core/lib/surface/channel.h
@@ -66,6 +66,9 @@
                                                 grpc_channel *channel,
                                                 int status_code);
 
+size_t grpc_channel_get_call_size_estimate(grpc_channel *channel);
+void grpc_channel_update_call_size_estimate(grpc_channel *channel, size_t size);
+
 #ifdef GRPC_STREAM_REFCOUNT_DEBUG
 void grpc_channel_internal_ref(grpc_channel *channel, const char *reason);
 void grpc_channel_internal_unref(grpc_exec_ctx *exec_ctx, grpc_channel *channel,
diff --git a/src/core/lib/surface/lame_client.c b/src/core/lib/surface/lame_client.c
index 49bc4c1..9ddb88b 100644
--- a/src/core/lib/surface/lame_client.c
+++ b/src/core/lib/surface/lame_client.c
@@ -130,8 +130,8 @@
 
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *and_free_memory) {
-  gpr_free(and_free_memory);
+                              grpc_closure *then_schedule_closure) {
+  grpc_closure_sched(exec_ctx, then_schedule_closure, GRPC_ERROR_NONE);
 }
 
 static grpc_error *init_channel_elem(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/lib/surface/server.c b/src/core/lib/surface/server.c
index b360579..1186a4a 100644
--- a/src/core/lib/surface/server.c
+++ b/src/core/lib/surface/server.c
@@ -898,7 +898,7 @@
 
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   channel_data *chand = elem->channel_data;
   call_data *calld = elem->call_data;
 
diff --git a/src/core/lib/transport/transport.c b/src/core/lib/transport/transport.c
index 3024f2a..d56cb31 100644
--- a/src/core/lib/transport/transport.c
+++ b/src/core/lib/transport/transport.c
@@ -162,9 +162,9 @@
 int grpc_transport_init_stream(grpc_exec_ctx *exec_ctx,
                                grpc_transport *transport, grpc_stream *stream,
                                grpc_stream_refcount *refcount,
-                               const void *server_data) {
+                               const void *server_data, gpr_arena *arena) {
   return transport->vtable->init_stream(exec_ctx, transport, stream, refcount,
-                                        server_data);
+                                        server_data, arena);
 }
 
 void grpc_transport_perform_stream_op(grpc_exec_ctx *exec_ctx,
@@ -197,9 +197,10 @@
 
 void grpc_transport_destroy_stream(grpc_exec_ctx *exec_ctx,
                                    grpc_transport *transport,
-                                   grpc_stream *stream, void *and_free_memory) {
+                                   grpc_stream *stream,
+                                   grpc_closure *then_schedule_closure) {
   transport->vtable->destroy_stream(exec_ctx, transport, stream,
-                                    and_free_memory);
+                                    then_schedule_closure);
 }
 
 char *grpc_transport_get_peer(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/lib/transport/transport.h b/src/core/lib/transport/transport.h
index cc1c277..950b18a 100644
--- a/src/core/lib/transport/transport.h
+++ b/src/core/lib/transport/transport.h
@@ -41,6 +41,7 @@
 #include "src/core/lib/iomgr/polling_entity.h"
 #include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/iomgr/pollset_set.h"
+#include "src/core/lib/support/arena.h"
 #include "src/core/lib/transport/byte_stream.h"
 #include "src/core/lib/transport/metadata_batch.h"
 
@@ -229,7 +230,7 @@
 int grpc_transport_init_stream(grpc_exec_ctx *exec_ctx,
                                grpc_transport *transport, grpc_stream *stream,
                                grpc_stream_refcount *refcount,
-                               const void *server_data);
+                               const void *server_data, gpr_arena *arena);
 
 void grpc_transport_set_pops(grpc_exec_ctx *exec_ctx, grpc_transport *transport,
                              grpc_stream *stream, grpc_polling_entity *pollent);
@@ -246,7 +247,8 @@
                  caller, but any child memory must be cleaned up) */
 void grpc_transport_destroy_stream(grpc_exec_ctx *exec_ctx,
                                    grpc_transport *transport,
-                                   grpc_stream *stream, void *and_free_memory);
+                                   grpc_stream *stream,
+                                   grpc_closure *then_schedule_closure);
 
 void grpc_transport_stream_op_finish_with_failure(grpc_exec_ctx *exec_ctx,
                                                   grpc_transport_stream_op *op,
diff --git a/src/core/lib/transport/transport_impl.h b/src/core/lib/transport/transport_impl.h
index 8553148..6f688bf 100644
--- a/src/core/lib/transport/transport_impl.h
+++ b/src/core/lib/transport/transport_impl.h
@@ -47,7 +47,7 @@
   /* implementation of grpc_transport_init_stream */
   int (*init_stream)(grpc_exec_ctx *exec_ctx, grpc_transport *self,
                      grpc_stream *stream, grpc_stream_refcount *refcount,
-                     const void *server_data);
+                     const void *server_data, gpr_arena *arena);
 
   /* implementation of grpc_transport_set_pollset */
   void (*set_pollset)(grpc_exec_ctx *exec_ctx, grpc_transport *self,
@@ -67,7 +67,8 @@
 
   /* implementation of grpc_transport_destroy_stream */
   void (*destroy_stream)(grpc_exec_ctx *exec_ctx, grpc_transport *self,
-                         grpc_stream *stream, void *and_free_memory);
+                         grpc_stream *stream,
+                         grpc_closure *then_schedule_closure);
 
   /* implementation of grpc_transport_destroy */
   void (*destroy)(grpc_exec_ctx *exec_ctx, grpc_transport *self);
diff --git a/src/cpp/common/channel_filter.h b/src/cpp/common/channel_filter.h
index 79c4bab..494d5d6 100644
--- a/src/cpp/common/channel_filter.h
+++ b/src/cpp/common/channel_filter.h
@@ -318,7 +318,8 @@
   static void DestroyCallElement(grpc_exec_ctx *exec_ctx,
                                  grpc_call_element *elem,
                                  const grpc_call_final_info *final_info,
-                                 void *and_free_memory) {
+                                 grpc_closure *then_call_closure) {
+    GPR_ASSERT(then_call_closure == NULL);
     reinterpret_cast<CallDataType *>(elem->call_data)->~CallDataType();
   }
 
diff --git a/test/core/channel/channel_stack_test.c b/test/core/channel/channel_stack_test.c
index 76bb573..af551c4 100644
--- a/test/core/channel/channel_stack_test.c
+++ b/test/core/channel/channel_stack_test.c
@@ -68,7 +68,7 @@
 
 static void call_destroy_func(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *ignored) {
+                              grpc_closure *ignored) {
   ++*(int *)(elem->channel_data);
 }
 
@@ -139,10 +139,16 @@
   GPR_ASSERT(*channel_data == 0);
 
   call_stack = gpr_malloc(channel_stack->call_stack_size);
-  grpc_error *error =
-      grpc_call_stack_init(&exec_ctx, channel_stack, 1, free_call, call_stack,
-                           NULL, NULL, path, gpr_now(GPR_CLOCK_MONOTONIC),
-                           gpr_inf_future(GPR_CLOCK_MONOTONIC), call_stack);
+  const grpc_call_element_args args = {
+      .call_stack = call_stack,
+      .server_transport_data = NULL,
+      .context = NULL,
+      .path = path,
+      .start_time = gpr_now(GPR_CLOCK_MONOTONIC),
+      .deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC),
+      .arena = NULL};
+  grpc_error *error = grpc_call_stack_init(&exec_ctx, channel_stack, 1,
+                                           free_call, call_stack, &args);
   GPR_ASSERT(error == GRPC_ERROR_NONE);
   GPR_ASSERT(call_stack->count == 1);
   call_elem = grpc_call_stack_element(call_stack, 0);
diff --git a/test/core/end2end/tests/filter_call_init_fails.c b/test/core/end2end/tests/filter_call_init_fails.c
index d2d6e82..65216cf 100644
--- a/test/core/end2end/tests/filter_call_init_fails.c
+++ b/test/core/end2end/tests/filter_call_init_fails.c
@@ -213,7 +213,7 @@
 
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *and_free_memory) {}
+                              grpc_closure *ignored) {}
 
 static grpc_error *init_channel_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_channel_element *elem,
diff --git a/test/core/end2end/tests/filter_causes_close.c b/test/core/end2end/tests/filter_causes_close.c
index 25e6065..c968f30 100644
--- a/test/core/end2end/tests/filter_causes_close.c
+++ b/test/core/end2end/tests/filter_causes_close.c
@@ -236,7 +236,7 @@
 
 static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                               const grpc_call_final_info *final_info,
-                              void *and_free_memory) {}
+                              grpc_closure *ignored) {}
 
 static grpc_error *init_channel_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_channel_element *elem,
diff --git a/test/core/end2end/tests/filter_latency.c b/test/core/end2end/tests/filter_latency.c
index d05e9e7..2428c92 100644
--- a/test/core/end2end/tests/filter_latency.c
+++ b/test/core/end2end/tests/filter_latency.c
@@ -267,7 +267,7 @@
 static void client_destroy_call_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_call_element *elem,
                                      const grpc_call_final_info *final_info,
-                                     void *and_free_memory) {
+                                     grpc_closure *ignored) {
   gpr_mu_lock(&g_mu);
   g_client_latency = final_info->stats.latency;
   gpr_mu_unlock(&g_mu);
@@ -276,7 +276,7 @@
 static void server_destroy_call_elem(grpc_exec_ctx *exec_ctx,
                                      grpc_call_element *elem,
                                      const grpc_call_final_info *final_info,
-                                     void *and_free_memory) {
+                                     grpc_closure *ignored) {
   gpr_mu_lock(&g_mu);
   g_server_latency = final_info->stats.latency;
   gpr_mu_unlock(&g_mu);
diff --git a/test/cpp/microbenchmarks/bm_call_create.cc b/test/cpp/microbenchmarks/bm_call_create.cc
index 014e2b9..5ef40ab 100644
--- a/test/cpp/microbenchmarks/bm_call_create.cc
+++ b/test/cpp/microbenchmarks/bm_call_create.cc
@@ -232,7 +232,7 @@
 
 static void DestroyCallElem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
                             const grpc_call_final_info *final_info,
-                            void *and_free_memory) {}
+                            grpc_closure *then_sched_closure) {}
 
 grpc_error *InitChannelElem(grpc_exec_ctx *exec_ctx, grpc_channel_element *elem,
                             grpc_channel_element_args *args) {
@@ -275,7 +275,7 @@
 /* implementation of grpc_transport_init_stream */
 int InitStream(grpc_exec_ctx *exec_ctx, grpc_transport *self,
                grpc_stream *stream, grpc_stream_refcount *refcount,
-               const void *server_data) {
+               const void *server_data, gpr_arena *arena) {
   return 0;
 }
 
@@ -299,7 +299,7 @@
 
 /* implementation of grpc_transport_destroy_stream */
 void DestroyStream(grpc_exec_ctx *exec_ctx, grpc_transport *self,
-                   grpc_stream *stream, void *and_free_memory) {}
+                   grpc_stream *stream, grpc_closure *then_sched_closure) {}
 
 /* implementation of grpc_transport_destroy */
 void Destroy(grpc_exec_ctx *exec_ctx, grpc_transport *self) {}
@@ -394,7 +394,7 @@
   grpc_channel_stack *channel_stack =
       static_cast<grpc_channel_stack *>(gpr_zalloc(channel_size));
   GPR_ASSERT(GRPC_LOG_IF_ERROR(
-      "call_stack_init",
+      "channel_stack_init",
       grpc_channel_stack_init(&exec_ctx, 1, FilterDestroy, channel_stack,
                               &filters[0], filters.size(), &channel_args,
                               fixture.flags & REQUIRES_TRANSPORT
@@ -409,15 +409,29 @@
   grpc_slice method = grpc_slice_from_static_string("/foo/bar");
   grpc_call_final_info final_info;
   TestOp test_op_data;
+  grpc_call_element_args call_args;
+  call_args.call_stack = call_stack;
+  call_args.server_transport_data = NULL;
+  call_args.context = NULL;
+  call_args.path = method;
+  call_args.start_time = start_time;
+  call_args.deadline = deadline;
+  const int kArenaSize = 4096;
+  call_args.arena = gpr_arena_create(kArenaSize);
   while (state.KeepRunning()) {
     GRPC_ERROR_UNREF(grpc_call_stack_init(&exec_ctx, channel_stack, 1,
-                                          DoNothing, NULL, NULL, NULL, method,
-                                          start_time, deadline, call_stack));
+                                          DoNothing, NULL, &call_args));
     typename TestOp::Op op(&exec_ctx, &test_op_data, call_stack);
     grpc_call_stack_destroy(&exec_ctx, call_stack, &final_info, NULL);
     op.Finish(&exec_ctx);
     grpc_exec_ctx_flush(&exec_ctx);
+    // recreate arena every 64k iterations to avoid oom
+    if (0 == (state.iterations() & 0xffff)) {
+      gpr_arena_destroy(call_args.arena);
+      call_args.arena = gpr_arena_create(kArenaSize);
+    }
   }
+  gpr_arena_destroy(call_args.arena);
   grpc_channel_stack_destroy(&exec_ctx, channel_stack);
   grpc_exec_ctx_finish(&exec_ctx);
   gpr_free(channel_stack);