Remove metadata context entirely
diff --git a/src/core/transport/metadata.c b/src/core/transport/metadata.c
index cbec63c..d031efc 100644
--- a/src/core/transport/metadata.c
+++ b/src/core/transport/metadata.c
@@ -64,33 +64,17 @@
 #ifdef GRPC_METADATA_REFCOUNT_DEBUG
 #define DEBUG_ARGS , const char *file, int line
 #define FWD_DEBUG_ARGS , file, line
-#define INTERNAL_STRING_REF(s)            \
-  if (is_mdstr_static((grpc_mdstr *)(s))) \
-    ;                                     \
-  else                                    \
-  internal_string_ref((s), __FILE__, __LINE__)
-#define INTERNAL_STRING_UNREF(s)          \
-  if (is_mdstr_static((grpc_mdstr *)(s))) \
-    ;                                     \
-  else                                    \
-  internal_string_unref((s), __FILE__, __LINE__)
-#define REF_MD_LOCKED(s) ref_md_locked((s), __FILE__, __LINE__)
+#define REF_MD_LOCKED(shard, s) ref_md_locked((shard), (s), __FILE__, __LINE__)
 #else
 #define DEBUG_ARGS
 #define FWD_DEBUG_ARGS
-#define INTERNAL_STRING_REF(s)            \
-  if (is_mdstr_static((grpc_mdstr *)(s))) \
-    ;                                     \
-  else                                    \
-  internal_string_ref((s))
-#define INTERNAL_STRING_UNREF(s)          \
-  if (is_mdstr_static((grpc_mdstr *)(s))) \
-    ;                                     \
-  else                                    \
-  internal_string_unref((s))
-#define REF_MD_LOCKED(s) ref_md_locked((s))
+#define REF_MD_LOCKED(shard, s) ref_md_locked((shard), (s))
 #endif
 
+#define TABLE_IDX(hash, log2_shards, capacity) \
+  (((hash) >> (log2_shards)) % (capacity))
+#define SHARD_IDX(hash, log2_shards) ((hash) & ((1 << (log2_shards)) - 1))
+
 typedef void (*destroy_user_data_func)(void *user_data);
 
 /* Shadow structure for grpc_mdstr for non-static values */
@@ -100,14 +84,13 @@
   gpr_uint32 hash;
 
   /* private only data */
-  gpr_uint32 refs;
+  gpr_atm refcnt;
+
   gpr_uint8 has_base64_and_huffman_encoded;
   gpr_slice_refcount refcount;
 
   gpr_slice base64_and_huffman;
 
-  grpc_mdctx *context;
-
   struct internal_string *bucket_next;
 } internal_string;
 
@@ -117,69 +100,75 @@
   internal_string *key;
   internal_string *value;
 
+  /* private only data */
   gpr_atm refcnt;
 
-  /* private only data */
   gpr_mu mu_user_data;
   gpr_atm destroy_user_data;
   gpr_atm user_data;
 
-  grpc_mdctx *context;
   struct internal_metadata *bucket_next;
 } internal_metadata;
 
-typedef struct static_string {
-  grpc_mdstr *mdstr;
-  gpr_uint32 hash;
-} static_string;
-
-typedef struct static_mdelem {
-  grpc_mdelem *mdelem;
-  gpr_uint32 hash;
-} static_mdelem;
-
-struct grpc_mdctx {
-  gpr_uint32 hash_seed;
-  int refs;
-
+typedef struct strtab_shard {
   gpr_mu mu;
+  internal_string **strs;
+  size_t count;
+  size_t capacity;
+} strtab_shard;
 
-  /* linearly probed hash tables for static element lookup */
-  static_string static_strtab[GRPC_STATIC_MDSTR_COUNT * 2];
-  static_mdelem static_mdtab[GRPC_STATIC_MDELEM_COUNT * 2];
-  size_t static_strtab_maxprobe;
-  size_t static_mdtab_maxprobe;
+typedef struct mdtab_shard {
+  gpr_mu mu;
+  internal_metadata **elems;
+  size_t count;
+  size_t capacity;
+  size_t free;
+} mdtab_shard;
 
-  /* chained hash table of dynamically allocated strings */
-  internal_string **strtab;
-  size_t strtab_count;
-  size_t strtab_capacity;
+#define LOG2_STRTAB_SHARD_COUNT 5
+#define LOG2_MDTAB_SHARD_COUNT 4
+#define STRTAB_SHARD_COUNT ((size_t)(1 << LOG2_STRTAB_SHARD_COUNT))
+#define MDTAB_SHARD_COUNT ((size_t)(1 << LOG2_MDTAB_SHARD_COUNT))
 
-  /* chained hash table of dynamically allocated mdelems */
-  internal_metadata **mdtab;
-  size_t mdtab_count;
-  size_t mdtab_free;
-  size_t mdtab_capacity;
+/* hash seed: decided at initialization time */
+static gpr_uint32 g_hash_seed;
 
-  /* cache slots */
-  gpr_atm cache_slots[GRPC_MDELEM_CACHE_SLOT_COUNT];
-  /* compression algorithm mdelems: one per algorithm bitmask */
-  gpr_atm compression_algorithm_mdelem[1 << GRPC_COMPRESS_ALGORITHMS_COUNT];
-};
+/* linearly probed hash tables for static element lookup */
+static grpc_mdstr *g_static_strtab[GRPC_STATIC_MDSTR_COUNT * 2];
+static grpc_mdelem *g_static_mdtab[GRPC_STATIC_MDELEM_COUNT * 2];
+static size_t g_static_strtab_maxprobe;
+static size_t g_static_mdtab_maxprobe;
 
-static void internal_string_ref(internal_string *s DEBUG_ARGS);
-static void internal_string_unref(internal_string *s DEBUG_ARGS);
-static void discard_metadata(grpc_mdctx *ctx);
-static void gc_mdtab(grpc_mdctx *ctx);
-static void metadata_context_destroy_locked(grpc_mdctx *ctx);
+static strtab_shard g_strtab_shard[STRTAB_SHARD_COUNT];
+static mdtab_shard g_mdtab_shard[MDTAB_SHARD_COUNT];
+
+static void discard_metadata(mdtab_shard *shard);
+static void gc_mdtab(mdtab_shard *shard);
 
 void grpc_mdctx_global_init(void) {
-  size_t i;
+  size_t i, j;
+  g_hash_seed = (gpr_uint32)gpr_now(GPR_CLOCK_REALTIME).tv_nsec;
+  g_static_strtab_maxprobe = 0;
+  g_static_mdtab_maxprobe = 0;
+  /* build static tables */
+  memset(g_static_mdtab, 0, sizeof(g_static_mdtab));
+  memset(g_static_strtab, 0, sizeof(g_static_strtab));
   for (i = 0; i < GRPC_STATIC_MDSTR_COUNT; i++) {
     grpc_mdstr *elem = &grpc_static_mdstr_table[i];
     const char *str = grpc_static_metadata_strings[i];
+    gpr_uint32 hash = gpr_murmur_hash3(str, strlen(str), g_hash_seed);
     *(gpr_slice *)&elem->slice = gpr_slice_from_static_string(str);
-    *(gpr_uint32 *)&elem->hash = gpr_murmur_hash3(str, strlen(str), 0);
+    *(gpr_uint32 *)&elem->hash = hash;
+    for (j = 0;; j++) {
+      size_t idx = (hash + j) % GPR_ARRAY_SIZE(g_static_strtab);
+      if (g_static_strtab[idx] == NULL) {
+        g_static_strtab[idx] = &grpc_static_mdstr_table[i];
+        break;
+      }
+    }
+    if (j > g_static_strtab_maxprobe) {
+      g_static_strtab_maxprobe = j;
+    }
   }
   for (i = 0; i < GRPC_STATIC_MDELEM_COUNT; i++) {
     grpc_mdelem *elem = &grpc_static_mdelem_table[i];
@@ -187,12 +176,56 @@
         &grpc_static_mdstr_table[grpc_static_metadata_elem_indices[2 * i + 0]];
     grpc_mdstr *value =
         &grpc_static_mdstr_table[grpc_static_metadata_elem_indices[2 * i + 1]];
+    gpr_uint32 hash = GRPC_MDSTR_KV_HASH(key->hash, value->hash);
     *(grpc_mdstr **)&elem->key = key;
     *(grpc_mdstr **)&elem->value = value;
+    for (j = 0;; j++) {
+      size_t idx = (hash + j) % GPR_ARRAY_SIZE(g_static_mdtab);
+      if (g_static_mdtab[idx] == NULL) {
+        g_static_mdtab[idx] = elem;
+        break;
+      }
+    }
+    if (j > g_static_mdtab_maxprobe) {
+      g_static_mdtab_maxprobe = j;
+    }
+  }
+  /* initialize shards */
+  for (i = 0; i < STRTAB_SHARD_COUNT; i++) {
+    strtab_shard *shard = &g_strtab_shard[i];
+    gpr_mu_init(&shard->mu);
+    shard->count = 0;
+    shard->capacity = INITIAL_STRTAB_CAPACITY;
+    shard->strs = gpr_malloc(sizeof(*shard->strs) * shard->capacity);
+    memset(shard->strs, 0, sizeof(*shard->strs) * shard->capacity);
+  }
+  for (i = 0; i < MDTAB_SHARD_COUNT; i++) {
+    mdtab_shard *shard = &g_mdtab_shard[i];
+    gpr_mu_init(&shard->mu);
+    shard->count = 0;
+    shard->free = 0;
+    shard->capacity = INITIAL_MDTAB_CAPACITY;
+    shard->elems = gpr_malloc(sizeof(*shard->elems) * shard->capacity);
+    memset(shard->elems, 0, sizeof(*shard->elems) * shard->capacity);
   }
 }
 
-void grpc_mdctx_global_shutdown(void) {}
+void grpc_mdctx_global_shutdown(void) {
+  size_t i;
+  for (i = 0; i < MDTAB_SHARD_COUNT; i++) {
+    mdtab_shard *shard = &g_mdtab_shard[i];
+    gpr_mu_destroy(&shard->mu);
+    discard_metadata(shard);
+    GPR_ASSERT(shard->count == 0);
+    gpr_free(shard->elems);
+  }
+  for (i = 0; i < STRTAB_SHARD_COUNT; i++) {
+    strtab_shard *shard = &g_strtab_shard[i];
+    gpr_mu_destroy(&shard->mu);
+    GPR_ASSERT(shard->count == 0);
+    gpr_free(shard->strs);
+  }
+}
 
 static int is_mdstr_static(grpc_mdstr *s) {
   return s >= &grpc_static_mdstr_table[0] &&
@@ -204,39 +237,8 @@
          e < &grpc_static_mdelem_table[GRPC_STATIC_MDELEM_COUNT];
 }
 
-static void lock(grpc_mdctx *ctx) { gpr_mu_lock(&ctx->mu); }
-
-static void unlock(grpc_mdctx *ctx) {
-  /* If the context has been orphaned we'd like to delete it soon. We check
-     conditions in unlock as it signals the end of mutations on a context.
-
-     We need to ensure all grpc_mdelem and grpc_mdstr elements have been deleted
-     first. This is equivalent to saying that both tables have zero counts,
-     which is equivalent to saying that strtab_count is zero (as mdelem's MUST
-     reference an mdstr for their key and value slots).
-
-     To encourage that to happen, we start discarding zero reference count
-     mdelems on every unlock (instead of the usual 'I'm too loaded' trigger
-     case), since otherwise we can be stuck waiting for a garbage collection
-     that will never happen. */
-  if (ctx->refs == 0) {
-/* uncomment if you're having trouble diagnosing an mdelem leak to make
-   things clearer (slows down destruction a lot, however) */
-#ifdef GRPC_METADATA_REFCOUNT_DEBUG
-    gc_mdtab(ctx);
-#endif
-    if (ctx->mdtab_count && ctx->mdtab_count == ctx->mdtab_free) {
-      discard_metadata(ctx);
-    }
-    if (ctx->strtab_count == 0) {
-      metadata_context_destroy_locked(ctx);
-      return;
-    }
-  }
-  gpr_mu_unlock(&ctx->mu);
-}
-
-static void ref_md_locked(internal_metadata *md DEBUG_ARGS) {
+static void ref_md_locked(mdtab_shard *shard,
+                          internal_metadata *md DEBUG_ARGS) {
 #ifdef GRPC_METADATA_REFCOUNT_DEBUG
   gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG,
           "ELM   REF:%p:%d->%d: '%s' = '%s'", md,
@@ -246,62 +248,32 @@
           grpc_mdstr_as_c_string((grpc_mdstr *)md->value));
 #endif
   if (0 == gpr_atm_no_barrier_fetch_add(&md->refcnt, 2)) {
-    md->context->mdtab_free--;
+    shard->free--;
   } else {
     GPR_ASSERT(1 != gpr_atm_no_barrier_fetch_add(&md->refcnt, -1));
   }
 }
 
+#if 0
 grpc_mdctx *grpc_mdctx_create_with_seed(gpr_uint32 seed) {
   grpc_mdctx *ctx = gpr_malloc(sizeof(grpc_mdctx));
   size_t i, j;
 
   memset(ctx, 0, sizeof(*ctx));
 
-  ctx->refs = 1;
-  ctx->hash_seed = seed;
-  gpr_mu_init(&ctx->mu);
-  ctx->strtab = gpr_malloc(sizeof(internal_string *) * INITIAL_STRTAB_CAPACITY);
-  memset(ctx->strtab, 0, sizeof(grpc_mdstr *) * INITIAL_STRTAB_CAPACITY);
-  ctx->strtab_count = 0;
-  ctx->strtab_capacity = INITIAL_STRTAB_CAPACITY;
-  ctx->mdtab = gpr_malloc(sizeof(internal_metadata *) * INITIAL_MDTAB_CAPACITY);
-  memset(ctx->mdtab, 0, sizeof(grpc_mdelem *) * INITIAL_MDTAB_CAPACITY);
-  ctx->mdtab_count = 0;
-  ctx->mdtab_capacity = INITIAL_MDTAB_CAPACITY;
-  ctx->mdtab_free = 0;
+  g_refs = 1;
+  g_hash_seed = seed;
+  gpr_mu_init(&g_mu);
+  g_strtab = gpr_malloc(sizeof(internal_string *) * INITIAL_STRTAB_CAPACITY);
+  memset(g_strtab, 0, sizeof(grpc_mdstr *) * INITIAL_STRTAB_CAPACITY);
+  g_strtab_count = 0;
+  g_strtab_capacity = INITIAL_STRTAB_CAPACITY;
+  g_mdtab = gpr_malloc(sizeof(internal_metadata *) * INITIAL_MDTAB_CAPACITY);
+  memset(g_mdtab, 0, sizeof(grpc_mdelem *) * INITIAL_MDTAB_CAPACITY);
+  g_mdtab_count = 0;
+  g_mdtab_capacity = INITIAL_MDTAB_CAPACITY;
+  g_mdtab_free = 0;
 
-  for (i = 0; i < GRPC_STATIC_MDSTR_COUNT; i++) {
-    const char *str = grpc_static_metadata_strings[i];
-    gpr_uint32 lup_hash = gpr_murmur_hash3(str, strlen(str), seed);
-    for (j = 0;; j++) {
-      size_t idx = (lup_hash + j) % GPR_ARRAY_SIZE(ctx->static_strtab);
-      if (ctx->static_strtab[idx].mdstr == NULL) {
-        ctx->static_strtab[idx].mdstr = &grpc_static_mdstr_table[i];
-        ctx->static_strtab[idx].hash = lup_hash;
-        break;
-      }
-    }
-    if (j > ctx->static_strtab_maxprobe) {
-      ctx->static_strtab_maxprobe = j;
-    }
-  }
-
-  for (i = 0; i < GRPC_STATIC_MDELEM_COUNT; i++) {
-    grpc_mdelem *elem = &grpc_static_mdelem_table[i];
-    gpr_uint32 hash = GRPC_MDSTR_KV_HASH(elem->key->hash, elem->value->hash);
-    for (j = 0;; j++) {
-      size_t idx = (hash + j) % GPR_ARRAY_SIZE(ctx->static_mdtab);
-      if (ctx->static_mdtab[idx].mdelem == NULL) {
-        ctx->static_mdtab[idx].mdelem = elem;
-        ctx->static_mdtab[idx].hash = hash;
-        break;
-      }
-    }
-    if (j > ctx->static_mdtab_maxprobe) {
-      ctx->static_mdtab_maxprobe = j;
-    }
-  }
 
   return ctx;
 }
@@ -313,55 +285,20 @@
   return grpc_mdctx_create_with_seed(
       (gpr_uint32)gpr_now(GPR_CLOCK_REALTIME).tv_nsec);
 }
+#endif
 
-static void drop_cached_elem(gpr_atm *slot) {
-  gpr_atm value = gpr_atm_no_barrier_load(slot);
-  gpr_atm_rel_store(slot, 0);
-  GRPC_MDELEM_UNREF((grpc_mdelem *)value);
-}
-
-void grpc_mdctx_drop_caches(grpc_mdctx *ctx) {
-  size_t i;
-  for (i = 0; i < GRPC_MDELEM_CACHE_SLOT_COUNT; i++) {
-    drop_cached_elem(&ctx->cache_slots[i]);
-  }
-  for (i = 0; i < GPR_ARRAY_SIZE(ctx->compression_algorithm_mdelem); i++) {
-    drop_cached_elem(&ctx->compression_algorithm_mdelem[i]);
-  }
-}
-
-static void set_cache(gpr_atm *slot, grpc_mdelem *elem) {
-  if (!gpr_atm_rel_cas(slot, 0, (gpr_atm)elem)) {
-    GRPC_MDELEM_UNREF(elem);
-  }
-}
-
-void grpc_mdctx_set_mdelem_cache(grpc_mdctx *ctx, grpc_mdelem_cache_slot slot,
-                                 grpc_mdelem *elem) {
-  set_cache(&ctx->cache_slots[slot], elem);
-}
-
-static grpc_mdelem *get_cache(gpr_atm *slot) {
-  return (grpc_mdelem *)gpr_atm_acq_load(slot);
-}
-
-grpc_mdelem *grpc_mdelem_from_cache(grpc_mdctx *ctx,
-                                    grpc_mdelem_cache_slot slot) {
-  return get_cache(&ctx->cache_slots[slot]);
-}
-
-static void discard_metadata(grpc_mdctx *ctx) {
+static void discard_metadata(mdtab_shard *shard) {
   size_t i;
   internal_metadata *next, *cur;
 
-  for (i = 0; i < ctx->mdtab_capacity; i++) {
-    cur = ctx->mdtab[i];
+  for (i = 0; i < shard->capacity; i++) {
+    cur = shard->elems[i];
     while (cur) {
       void *user_data = (void *)gpr_atm_no_barrier_load(&cur->user_data);
       GPR_ASSERT(gpr_atm_acq_load(&cur->refcnt) == 0);
       next = cur->bucket_next;
-      INTERNAL_STRING_UNREF(cur->key);
-      INTERNAL_STRING_UNREF(cur->value);
+      GRPC_MDSTR_UNREF((grpc_mdstr *)cur->key);
+      GRPC_MDSTR_UNREF((grpc_mdstr *)cur->value);
       if (user_data != NULL) {
         ((destroy_user_data_func)gpr_atm_no_barrier_load(
             &cur->destroy_user_data))(user_data);
@@ -369,29 +306,30 @@
       gpr_mu_destroy(&cur->mu_user_data);
       gpr_free(cur);
       cur = next;
-      ctx->mdtab_free--;
-      ctx->mdtab_count--;
+      shard->free--;
+      shard->count--;
     }
-    ctx->mdtab[i] = NULL;
+    shard->elems[i] = NULL;
   }
 }
 
+#if 0
 static void metadata_context_destroy_locked(grpc_mdctx *ctx) {
-  GPR_ASSERT(ctx->strtab_count == 0);
-  GPR_ASSERT(ctx->mdtab_count == 0);
-  GPR_ASSERT(ctx->mdtab_free == 0);
-  gpr_free(ctx->strtab);
-  gpr_free(ctx->mdtab);
-  gpr_mu_unlock(&ctx->mu);
-  gpr_mu_destroy(&ctx->mu);
+  GPR_ASSERT(g_strtab_count == 0);
+  GPR_ASSERT(g_mdtab_count == 0);
+  GPR_ASSERT(g_mdtab_free == 0);
+  gpr_free(g_strtab);
+  gpr_free(g_mdtab);
+  gpr_mu_unlock(&g_mu);
+  gpr_mu_destroy(&g_mu);
   gpr_free(ctx);
 }
 
 void grpc_mdctx_ref(grpc_mdctx *ctx) {
   GPR_TIMER_BEGIN("grpc_mdctx_ref", 0);
   lock(ctx);
-  GPR_ASSERT(ctx->refs > 0);
-  ctx->refs++;
+  GPR_ASSERT(g_refs > 0);
+  g_refs++;
   unlock(ctx);
   GPR_TIMER_END("grpc_mdctx_ref", 0);
 }
@@ -399,14 +337,15 @@
 void grpc_mdctx_unref(grpc_mdctx *ctx) {
   GPR_TIMER_BEGIN("grpc_mdctx_unref", 0);
   lock(ctx);
-  GPR_ASSERT(ctx->refs > 0);
-  ctx->refs--;
+  GPR_ASSERT(g_refs > 0);
+  g_refs--;
   unlock(ctx);
   GPR_TIMER_END("grpc_mdctx_unref", 0);
 }
+#endif
 
-static void grow_strtab(grpc_mdctx *ctx) {
-  size_t capacity = ctx->strtab_capacity * 2;
+static void grow_strtab(strtab_shard *shard) {
+  size_t capacity = shard->capacity * 2;
   size_t i;
   internal_string **strtab;
   internal_string *s, *next;
@@ -416,117 +355,94 @@
   strtab = gpr_malloc(sizeof(internal_string *) * capacity);
   memset(strtab, 0, sizeof(internal_string *) * capacity);
 
-  for (i = 0; i < ctx->strtab_capacity; i++) {
-    for (s = ctx->strtab[i]; s; s = next) {
+  for (i = 0; i < shard->capacity; i++) {
+    for (s = shard->strs[i]; s; s = next) {
+      size_t idx = TABLE_IDX(s->hash, LOG2_STRTAB_SHARD_COUNT, capacity);
       next = s->bucket_next;
-      s->bucket_next = strtab[s->hash % capacity];
-      strtab[s->hash % capacity] = s;
+      s->bucket_next = strtab[idx];
+      strtab[idx] = s;
     }
   }
 
-  gpr_free(ctx->strtab);
-  ctx->strtab = strtab;
-  ctx->strtab_capacity = capacity;
+  gpr_free(shard->strs);
+  shard->strs = strtab;
+  shard->capacity = capacity;
 
   GPR_TIMER_END("grow_strtab", 0);
 }
 
-static void internal_destroy_string(internal_string *is) {
+static void internal_destroy_string(strtab_shard *shard, internal_string *is) {
   internal_string **prev_next;
   internal_string *cur;
-  grpc_mdctx *ctx = is->context;
   GPR_TIMER_BEGIN("internal_destroy_string", 0);
   if (is->has_base64_and_huffman_encoded) {
     gpr_slice_unref(is->base64_and_huffman);
   }
-  for (prev_next = &ctx->strtab[is->hash % ctx->strtab_capacity],
+  for (prev_next = &shard->strs[TABLE_IDX(is->hash, LOG2_STRTAB_SHARD_COUNT,
+                                          shard->capacity)],
       cur = *prev_next;
        cur != is; prev_next = &cur->bucket_next, cur = cur->bucket_next)
     ;
   *prev_next = cur->bucket_next;
-  ctx->strtab_count--;
+  shard->count--;
   gpr_free(is);
   GPR_TIMER_END("internal_destroy_string", 0);
 }
 
-static void internal_string_ref(internal_string *s DEBUG_ARGS) {
-#ifdef GRPC_METADATA_REFCOUNT_DEBUG
-  gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "STR   REF:%p:%d->%d: '%s'", s,
-          s->refs, s->refs + 1, grpc_mdstr_as_c_string((grpc_mdstr *)s));
-#endif
-  ++s->refs;
-}
-
-static void internal_string_unref(internal_string *s DEBUG_ARGS) {
-#ifdef GRPC_METADATA_REFCOUNT_DEBUG
-  gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "STR UNREF:%p:%d->%d: '%s'", s,
-          s->refs, s->refs - 1, grpc_mdstr_as_c_string((grpc_mdstr *)s));
-#endif
-  GPR_ASSERT(s->refs > 0);
-  if (0 == --s->refs) {
-    internal_destroy_string(s);
-  }
-}
-
 static void slice_ref(void *p) {
   internal_string *is =
       (internal_string *)((char *)p - offsetof(internal_string, refcount));
-  grpc_mdctx *ctx = is->context;
-  GPR_TIMER_BEGIN("slice_ref", 0);
-  lock(ctx);
-  INTERNAL_STRING_REF(is);
-  unlock(ctx);
-  GPR_TIMER_END("slice_ref", 0);
+  GRPC_MDSTR_REF((grpc_mdstr *)(is));
 }
 
 static void slice_unref(void *p) {
   internal_string *is =
       (internal_string *)((char *)p - offsetof(internal_string, refcount));
-  grpc_mdctx *ctx = is->context;
-  GPR_TIMER_BEGIN("slice_unref", 0);
-  lock(ctx);
-  INTERNAL_STRING_UNREF(is);
-  unlock(ctx);
-  GPR_TIMER_END("slice_unref", 0);
+  GRPC_MDSTR_UNREF((grpc_mdstr *)(is));
 }
 
-grpc_mdstr *grpc_mdstr_from_string(grpc_mdctx *ctx, const char *str) {
-  return grpc_mdstr_from_buffer(ctx, (const gpr_uint8 *)str, strlen(str));
+grpc_mdstr *grpc_mdstr_from_string(const char *str) {
+  return grpc_mdstr_from_buffer((const gpr_uint8 *)str, strlen(str));
 }
 
-grpc_mdstr *grpc_mdstr_from_slice(grpc_mdctx *ctx, gpr_slice slice) {
-  grpc_mdstr *result = grpc_mdstr_from_buffer(ctx, GPR_SLICE_START_PTR(slice),
+grpc_mdstr *grpc_mdstr_from_slice(gpr_slice slice) {
+  grpc_mdstr *result = grpc_mdstr_from_buffer(GPR_SLICE_START_PTR(slice),
                                               GPR_SLICE_LENGTH(slice));
   gpr_slice_unref(slice);
   return result;
 }
 
-grpc_mdstr *grpc_mdstr_from_buffer(grpc_mdctx *ctx, const gpr_uint8 *buf,
-                                   size_t length) {
-  gpr_uint32 hash = gpr_murmur_hash3(buf, length, ctx->hash_seed);
+grpc_mdstr *grpc_mdstr_from_buffer(const gpr_uint8 *buf, size_t length) {
+  gpr_uint32 hash = gpr_murmur_hash3(buf, length, g_hash_seed);
   internal_string *s;
+  strtab_shard *shard =
+      &g_strtab_shard[SHARD_IDX(hash, LOG2_STRTAB_SHARD_COUNT)];
   size_t i;
+  size_t idx;
 
   GPR_TIMER_BEGIN("grpc_mdstr_from_buffer", 0);
 
   /* search for a static string */
-  for (i = 0; i <= ctx->static_strtab_maxprobe; i++) {
-    size_t idx = (hash + i) % GPR_ARRAY_SIZE(ctx->static_strtab);
-    static_string *ss = &ctx->static_strtab[idx];
-    if (ss->hash == hash && GPR_SLICE_LENGTH(ss->mdstr->slice) == length &&
-        0 == memcmp(buf, GPR_SLICE_START_PTR(ss->mdstr->slice), length)) {
-      return ss->mdstr;
+  for (i = 0; i <= g_static_strtab_maxprobe; i++) {
+    grpc_mdstr *ss;
+    idx = (hash + i) % GPR_ARRAY_SIZE(g_static_strtab);
+    ss = g_static_strtab[idx];
+    if (ss == NULL) break;
+    if (ss->hash == hash && GPR_SLICE_LENGTH(ss->slice) == length &&
+        0 == memcmp(buf, GPR_SLICE_START_PTR(ss->slice), length)) {
+      return ss;
     }
   }
 
-  lock(ctx);
+  gpr_mu_lock(&shard->mu);
 
   /* search for an existing string */
-  for (s = ctx->strtab[hash % ctx->strtab_capacity]; s; s = s->bucket_next) {
+  idx = TABLE_IDX(hash, LOG2_STRTAB_SHARD_COUNT, shard->capacity);
+  for (s = shard->strs[idx]; s; s = s->bucket_next) {
     if (s->hash == hash && GPR_SLICE_LENGTH(s->slice) == length &&
         0 == memcmp(buf, GPR_SLICE_START_PTR(s->slice), length)) {
-      INTERNAL_STRING_REF(s);
-      unlock(ctx);
+      GRPC_MDSTR_REF((grpc_mdstr *)s);
+      gpr_mu_unlock(&shard->mu);
       GPR_TIMER_END("grpc_mdstr_from_buffer", 0);
       return (grpc_mdstr *)s;
     }
@@ -536,7 +452,7 @@
   if (length + 1 < GPR_SLICE_INLINED_SIZE) {
     /* string data goes directly into the slice */
     s = gpr_malloc(sizeof(internal_string));
-    s->refs = 1;
+    gpr_atm_rel_store(&s->refcnt, 2);
     s->slice.refcount = NULL;
     memcpy(s->slice.data.inlined.bytes, buf, length);
     s->slice.data.inlined.bytes[length] = 0;
@@ -545,7 +461,7 @@
     /* string data goes after the internal_string header, and we +1 for null
        terminator */
     s = gpr_malloc(sizeof(internal_string) + length + 1);
-    s->refs = 1;
+    gpr_atm_rel_store(&s->refcnt, 2);
     s->refcount.ref = slice_ref;
     s->refcount.unref = slice_unref;
     s->slice.refcount = &s->refcount;
@@ -557,44 +473,43 @@
   }
   s->has_base64_and_huffman_encoded = 0;
   s->hash = hash;
-  s->context = ctx;
-  s->bucket_next = ctx->strtab[hash % ctx->strtab_capacity];
-  ctx->strtab[hash % ctx->strtab_capacity] = s;
+  s->bucket_next = shard->strs[idx];
+  shard->strs[idx] = s;
 
-  ctx->strtab_count++;
+  shard->count++;
 
-  if (ctx->strtab_count > ctx->strtab_capacity * 2) {
-    grow_strtab(ctx);
+  if (shard->count > shard->capacity * 2) {
+    grow_strtab(shard);
   }
 
-  unlock(ctx);
+  gpr_mu_unlock(&shard->mu);
   GPR_TIMER_END("grpc_mdstr_from_buffer", 0);
 
   return (grpc_mdstr *)s;
 }
 
-static void gc_mdtab(grpc_mdctx *ctx) {
+static void gc_mdtab(mdtab_shard *shard) {
   size_t i;
   internal_metadata **prev_next;
   internal_metadata *md, *next;
 
   GPR_TIMER_BEGIN("gc_mdtab", 0);
-  for (i = 0; i < ctx->mdtab_capacity; i++) {
-    prev_next = &ctx->mdtab[i];
-    for (md = ctx->mdtab[i]; md; md = next) {
+  for (i = 0; i < shard->capacity; i++) {
+    prev_next = &shard->elems[i];
+    for (md = shard->elems[i]; md; md = next) {
       void *user_data = (void *)gpr_atm_no_barrier_load(&md->user_data);
       next = md->bucket_next;
       if (gpr_atm_acq_load(&md->refcnt) == 0) {
-        INTERNAL_STRING_UNREF(md->key);
-        INTERNAL_STRING_UNREF(md->value);
+        GRPC_MDSTR_UNREF((grpc_mdstr *)md->key);
+        GRPC_MDSTR_UNREF((grpc_mdstr *)md->value);
         if (md->user_data) {
           ((destroy_user_data_func)gpr_atm_no_barrier_load(
               &md->destroy_user_data))(user_data);
         }
         gpr_free(md);
         *prev_next = next;
-        ctx->mdtab_free--;
-        ctx->mdtab_count--;
+        shard->free--;
+        shard->count--;
       } else {
         prev_next = &md->bucket_next;
       }
@@ -603,8 +518,8 @@
   GPR_TIMER_END("gc_mdtab", 0);
 }
 
-static void grow_mdtab(grpc_mdctx *ctx) {
-  size_t capacity = ctx->mdtab_capacity * 2;
+static void grow_mdtab(mdtab_shard *shard) {
+  size_t capacity = shard->capacity * 2;
   size_t i;
   internal_metadata **mdtab;
   internal_metadata *md, *next;
@@ -615,64 +530,66 @@
   mdtab = gpr_malloc(sizeof(internal_metadata *) * capacity);
   memset(mdtab, 0, sizeof(internal_metadata *) * capacity);
 
-  for (i = 0; i < ctx->mdtab_capacity; i++) {
-    for (md = ctx->mdtab[i]; md; md = next) {
+  for (i = 0; i < shard->capacity; i++) {
+    for (md = shard->elems[i]; md; md = next) {
+      size_t idx;
       hash = GRPC_MDSTR_KV_HASH(md->key->hash, md->value->hash);
       next = md->bucket_next;
-      md->bucket_next = mdtab[hash % capacity];
-      mdtab[hash % capacity] = md;
+      idx = TABLE_IDX(hash, LOG2_MDTAB_SHARD_COUNT, capacity);
+      md->bucket_next = mdtab[idx];
+      mdtab[idx] = md;
     }
   }
 
-  gpr_free(ctx->mdtab);
-  ctx->mdtab = mdtab;
-  ctx->mdtab_capacity = capacity;
+  gpr_free(shard->elems);
+  shard->elems = mdtab;
+  shard->capacity = capacity;
 
   GPR_TIMER_END("grow_mdtab", 0);
 }
 
-static void rehash_mdtab(grpc_mdctx *ctx) {
-  if (ctx->mdtab_free > ctx->mdtab_capacity / 4) {
-    gc_mdtab(ctx);
+static void rehash_mdtab(mdtab_shard *shard) {
+  if (shard->free > shard->capacity / 4) {
+    gc_mdtab(shard);
   } else {
-    grow_mdtab(ctx);
+    grow_mdtab(shard);
   }
 }
 
-grpc_mdelem *grpc_mdelem_from_metadata_strings(grpc_mdctx *ctx,
-                                               grpc_mdstr *mkey,
+grpc_mdelem *grpc_mdelem_from_metadata_strings(grpc_mdstr *mkey,
                                                grpc_mdstr *mvalue) {
   internal_string *key = (internal_string *)mkey;
   internal_string *value = (internal_string *)mvalue;
   gpr_uint32 hash = GRPC_MDSTR_KV_HASH(mkey->hash, mvalue->hash);
   internal_metadata *md;
+  mdtab_shard *shard = &g_mdtab_shard[SHARD_IDX(hash, LOG2_MDTAB_SHARD_COUNT)];
   size_t i;
-
-  GPR_ASSERT(is_mdstr_static(mkey) || key->context == ctx);
-  GPR_ASSERT(is_mdstr_static(mvalue) || value->context == ctx);
+  size_t idx;
 
   GPR_TIMER_BEGIN("grpc_mdelem_from_metadata_strings", 0);
 
   if (is_mdstr_static(mkey) && is_mdstr_static(mvalue)) {
-    for (i = 0; i <= ctx->static_mdtab_maxprobe; i++) {
-      size_t idx = (hash + i) % GPR_ARRAY_SIZE(ctx->static_mdtab);
-      static_mdelem *smd = &ctx->static_mdtab[idx];
-      if (smd->hash == hash && smd->mdelem->key == mkey &&
-          smd->mdelem->value == mvalue) {
-        return smd->mdelem;
+    for (i = 0; i <= g_static_mdtab_maxprobe; i++) {
+      grpc_mdelem *smd;
+      idx = (hash + i) % GPR_ARRAY_SIZE(g_static_mdtab);
+      smd = g_static_mdtab[idx];
+      if (smd == NULL) break;
+      if (smd->key == mkey && smd->value == mvalue) {
+        return smd;
       }
     }
   }
 
-  lock(ctx);
+  gpr_mu_lock(&shard->mu);
 
+  idx = TABLE_IDX(hash, LOG2_MDTAB_SHARD_COUNT, shard->capacity);
   /* search for an existing pair */
-  for (md = ctx->mdtab[hash % ctx->mdtab_capacity]; md; md = md->bucket_next) {
+  for (md = shard->elems[idx]; md; md = md->bucket_next) {
     if (md->key == key && md->value == value) {
-      REF_MD_LOCKED(md);
-      INTERNAL_STRING_UNREF(key);
-      INTERNAL_STRING_UNREF(value);
-      unlock(ctx);
+      REF_MD_LOCKED(shard, md);
+      GRPC_MDSTR_UNREF((grpc_mdstr *)key);
+      GRPC_MDSTR_UNREF((grpc_mdstr *)value);
+      gpr_mu_unlock(&shard->mu);
       GPR_TIMER_END("grpc_mdelem_from_metadata_strings", 0);
       return (grpc_mdelem *)md;
     }
@@ -681,12 +598,12 @@
   /* not found: create a new pair */
   md = gpr_malloc(sizeof(internal_metadata));
   gpr_atm_rel_store(&md->refcnt, 2);
-  md->context = ctx;
   md->key = key;
   md->value = value;
   md->user_data = 0;
   md->destroy_user_data = 0;
-  md->bucket_next = ctx->mdtab[hash % ctx->mdtab_capacity];
+  md->bucket_next = shard->elems[idx];
+  shard->elems[idx] = md;
   gpr_mu_init(&md->mu_user_data);
 #ifdef GRPC_METADATA_REFCOUNT_DEBUG
   gpr_log(GPR_DEBUG, "ELM   NEW:%p:%d: '%s' = '%s'", md,
@@ -694,40 +611,34 @@
           grpc_mdstr_as_c_string((grpc_mdstr *)md->key),
           grpc_mdstr_as_c_string((grpc_mdstr *)md->value));
 #endif
-  ctx->mdtab[hash % ctx->mdtab_capacity] = md;
-  ctx->mdtab_count++;
+  shard->count++;
 
-  if (ctx->mdtab_count > ctx->mdtab_capacity * 2) {
-    rehash_mdtab(ctx);
+  if (shard->count > shard->capacity * 2) {
+    rehash_mdtab(shard);
   }
 
-  unlock(ctx);
+  gpr_mu_unlock(&shard->mu);
 
   GPR_TIMER_END("grpc_mdelem_from_metadata_strings", 0);
 
   return (grpc_mdelem *)md;
 }
 
-grpc_mdelem *grpc_mdelem_from_strings(grpc_mdctx *ctx, const char *key,
-                                      const char *value) {
-  return grpc_mdelem_from_metadata_strings(ctx,
-                                           grpc_mdstr_from_string(ctx, key),
-                                           grpc_mdstr_from_string(ctx, value));
+grpc_mdelem *grpc_mdelem_from_strings(const char *key, const char *value) {
+  return grpc_mdelem_from_metadata_strings(grpc_mdstr_from_string(key),
+                                           grpc_mdstr_from_string(value));
 }
 
-grpc_mdelem *grpc_mdelem_from_slices(grpc_mdctx *ctx, gpr_slice key,
-                                     gpr_slice value) {
-  return grpc_mdelem_from_metadata_strings(ctx, grpc_mdstr_from_slice(ctx, key),
-                                           grpc_mdstr_from_slice(ctx, value));
+grpc_mdelem *grpc_mdelem_from_slices(gpr_slice key, gpr_slice value) {
+  return grpc_mdelem_from_metadata_strings(grpc_mdstr_from_slice(key),
+                                           grpc_mdstr_from_slice(value));
 }
 
-grpc_mdelem *grpc_mdelem_from_string_and_buffer(grpc_mdctx *ctx,
-                                                const char *key,
+grpc_mdelem *grpc_mdelem_from_string_and_buffer(const char *key,
                                                 const gpr_uint8 *value,
                                                 size_t value_length) {
   return grpc_mdelem_from_metadata_strings(
-      ctx, grpc_mdstr_from_string(ctx, key),
-      grpc_mdstr_from_buffer(ctx, value, value_length));
+      grpc_mdstr_from_string(key), grpc_mdstr_from_buffer(value, value_length));
 }
 
 grpc_mdelem *grpc_mdelem_ref(grpc_mdelem *gmd DEBUG_ARGS) {
@@ -763,14 +674,16 @@
           grpc_mdstr_as_c_string((grpc_mdstr *)md->value));
 #endif
   if (2 == gpr_atm_full_fetch_add(&md->refcnt, -1)) {
-    grpc_mdctx *ctx = md->context;
+    gpr_uint32 hash = GRPC_MDSTR_KV_HASH(md->key->hash, md->value->hash);
+    mdtab_shard *shard =
+        &g_mdtab_shard[SHARD_IDX(hash, LOG2_MDTAB_SHARD_COUNT)];
     GPR_TIMER_BEGIN("grpc_mdelem_unref.to_zero", 0);
-    lock(ctx);
+    gpr_mu_lock(&shard->mu);
     if (1 == gpr_atm_no_barrier_load(&md->refcnt)) {
-      ctx->mdtab_free++;
+      shard->free++;
       gpr_atm_no_barrier_store(&md->refcnt, 0);
     }
-    unlock(ctx);
+    gpr_mu_unlock(&shard->mu);
     GPR_TIMER_END("grpc_mdelem_unref.to_zero", 0);
   }
 }
@@ -781,40 +694,31 @@
 
 grpc_mdstr *grpc_mdstr_ref(grpc_mdstr *gs DEBUG_ARGS) {
   internal_string *s = (internal_string *)gs;
-  grpc_mdctx *ctx;
   if (is_mdstr_static(gs)) return gs;
-  ctx = s->context;
-  lock(ctx);
-  internal_string_ref(s FWD_DEBUG_ARGS);
-  unlock(ctx);
+  GPR_ASSERT(gpr_atm_full_fetch_add(&s->refcnt, 1) != 0);
   return gs;
 }
 
 void grpc_mdstr_unref(grpc_mdstr *gs DEBUG_ARGS) {
   internal_string *s = (internal_string *)gs;
-  grpc_mdctx *ctx;
   if (is_mdstr_static(gs)) return;
-  ctx = s->context;
-  lock(ctx);
-  internal_string_unref(s FWD_DEBUG_ARGS);
-  unlock(ctx);
-}
-
-size_t grpc_mdctx_get_mdtab_capacity_test_only(grpc_mdctx *ctx) {
-  return ctx->mdtab_capacity;
-}
-
-size_t grpc_mdctx_get_mdtab_count_test_only(grpc_mdctx *ctx) {
-  return ctx->mdtab_count;
-}
-
-size_t grpc_mdctx_get_mdtab_free_test_only(grpc_mdctx *ctx) {
-  return ctx->mdtab_free;
+  if (2 == gpr_atm_full_fetch_add(&s->refcnt, -1)) {
+    strtab_shard *shard =
+        &g_strtab_shard[SHARD_IDX(s->hash, LOG2_STRTAB_SHARD_COUNT)];
+    gpr_mu_lock(&shard->mu);
+    if (1 == gpr_atm_no_barrier_load(&s->refcnt)) {
+      internal_destroy_string(shard, s);
+    }
+    gpr_mu_unlock(&shard->mu);
+  }
 }
 
 void *grpc_mdelem_get_user_data(grpc_mdelem *md, void (*destroy_func)(void *)) {
   internal_metadata *im = (internal_metadata *)md;
   void *result;
+  if (is_mdelem_static(md)) {
+    return (void *)grpc_static_mdelem_user_data[md - grpc_static_mdelem_table];
+  }
   if (gpr_atm_acq_load(&im->destroy_user_data) == (gpr_atm)destroy_func) {
     return (void *)gpr_atm_no_barrier_load(&im->user_data);
   } else {
@@ -845,15 +749,16 @@
 gpr_slice grpc_mdstr_as_base64_encoded_and_huffman_compressed(grpc_mdstr *gs) {
   internal_string *s = (internal_string *)gs;
   gpr_slice slice;
-  grpc_mdctx *ctx = s->context;
-  lock(ctx);
+  strtab_shard *shard =
+      &g_strtab_shard[SHARD_IDX(s->hash, LOG2_STRTAB_SHARD_COUNT)];
+  gpr_mu_lock(&shard->mu);
   if (!s->has_base64_and_huffman_encoded) {
     s->base64_and_huffman =
         grpc_chttp2_base64_encode_and_huffman_compress(s->slice);
     s->has_base64_and_huffman_encoded = 1;
   }
   slice = s->base64_and_huffman;
-  unlock(ctx);
+  gpr_mu_unlock(&shard->mu);
   return slice;
 }
 
@@ -885,52 +790,6 @@
   return conforms_to(s, legal_header_bits);
 }
 
-static grpc_mdelem *make_accept_encoding_mdelem_for_compression_algorithms(
-    grpc_mdctx *mdctx, gpr_uint32 algorithms) {
-  gpr_strvec sv;
-  int i;
-  char *str;
-  grpc_mdelem *out;
-
-  gpr_strvec_init(&sv);
-  for (i = 0; algorithms != 0; i++, algorithms >>= 1) {
-    if (algorithms & 1) {
-      char *name;
-      GPR_ASSERT(grpc_compression_algorithm_name((grpc_compression_algorithm)i,
-                                                 &name));
-      if (sv.count) {
-        gpr_strvec_add(&sv, gpr_strdup(","));
-      }
-      gpr_strvec_add(&sv, gpr_strdup(name));
-    }
-  }
-  str = gpr_strvec_flatten(&sv, NULL);
-  out =
-      grpc_mdelem_from_metadata_strings(mdctx, GRPC_MDSTR_GRPC_ACCEPT_ENCODING,
-                                        grpc_mdstr_from_string(mdctx, str));
-  gpr_strvec_destroy(&sv);
-  gpr_free(str);
-  return out;
-}
-
-grpc_mdelem *grpc_accept_encoding_mdelem_from_compression_algorithms(
-    grpc_mdctx *ctx, gpr_uint32 algorithms) {
-  grpc_mdelem *ret;
-  gpr_atm *slot;
-  GPR_ASSERT(algorithms < GPR_ARRAY_SIZE(ctx->compression_algorithm_mdelem));
-
-  slot = &ctx->compression_algorithm_mdelem[algorithms];
-  ret = get_cache(slot);
-  if (ret == NULL) {
-    set_cache(slot, make_accept_encoding_mdelem_for_compression_algorithms(
-                        ctx, algorithms));
-    ret = get_cache(slot);
-    GPR_ASSERT(ret != NULL);
-  }
-
-  return ret;
-}
-
 int grpc_mdstr_is_bin_suffixed(grpc_mdstr *s) {
   /* TODO(ctiller): consider caching this */
   return grpc_is_binary_header((const char *)GPR_SLICE_START_PTR(s->slice),