Update to not depend on c++ runtime
diff --git a/src/core/lib/support/cpu_posix.cc b/src/core/lib/support/cpu_posix.cc
index 0d57d46..832818a 100644
--- a/src/core/lib/support/cpu_posix.cc
+++ b/src/core/lib/support/cpu_posix.cc
@@ -25,8 +25,6 @@
 #include <string.h>
 #include <unistd.h>
 
-#include <atomic>
-
 #include <grpc/support/alloc.h>
 #include <grpc/support/cpu.h>
 #include <grpc/support/log.h>
@@ -35,6 +33,8 @@
 
 static long ncpus = 0;
 
+static pthread_key_t thread_id_key;
+
 static void init_ncpus() {
   ncpus = sysconf(_SC_NPROCESSORS_ONLN);
   if (ncpus < 1 || ncpus > INT32_MAX) {
@@ -49,33 +49,33 @@
   return (unsigned)ncpus;
 }
 
+static void delete_thread_id_key(void *value) {
+  if (value) {
+    gpr_free(value);
+  }
+  pthread_key_delete(thread_id_key);
+}
+
+static void init_thread_id_key(void) {
+  pthread_key_create(&thread_id_key, delete_thread_id_key);
+}
+
 unsigned gpr_cpu_current_cpu(void) {
   /* NOTE: there's no way I know to return the actual cpu index portably...
      most code that's using this is using it to shard across work queues though,
      so here we use thread identity instead to achieve a similar though not
      identical effect */
-  static auto DeleteValue = [](void* value_ptr) {
-    unsigned int* value = static_cast<unsigned int*>(value_ptr);
-    if (value) {
-      gpr_free(value);
-    }
-  };
-  static pthread_key_t thread_id_key;
-  static int thread_id_key_create_result __attribute__((unused)) =
-      pthread_key_create(&thread_id_key, DeleteValue);
-  // pthread_t isn't portably defined to map to an integral type. So keep track
-  // of thread identity explicitly so hashing works reliably.
-  static std::atomic<unsigned int> thread_counter(0);
-  
-  unsigned int* thread_id =
-      static_cast<unsigned int*>(pthread_getspecific(thread_id_key));
+  static gpr_once once = GPR_ONCE_INIT;
+  gpr_once_init(&once, init_thread_id_key);
+
+  unsigned int *thread_id =
+      static_cast<unsigned int *>(pthread_getspecific(thread_id_key));
   if (thread_id == nullptr) {
-    thread_id = static_cast<unsigned int*>(gpr_malloc(sizeof(unsigned int)));
-    *thread_id = thread_counter++;
+    thread_id = static_cast<unsigned int *>(gpr_malloc(sizeof(unsigned int)));
     pthread_setspecific(thread_id_key, thread_id);
   }
 
-  return (unsigned)GPR_HASH_POINTER(*thread_id, gpr_cpu_num_cores());
+  return (unsigned)GPR_HASH_POINTER(thread_id, gpr_cpu_num_cores());
 }
 
 #endif /* GPR_CPU_POSIX */