Separate CAS/ADD RMWs
diff --git a/include/grpc/impl/codegen/atm_gcc_atomic.h b/include/grpc/impl/codegen/atm_gcc_atomic.h
index e5a623f..4bd3b25 100644
--- a/include/grpc/impl/codegen/atm_gcc_atomic.h
+++ b/include/grpc/impl/codegen/atm_gcc_atomic.h
@@ -41,12 +41,17 @@
 typedef intptr_t gpr_atm;
 
 #ifdef GPR_LOW_LEVEL_COUNTERS
-extern gpr_atm gpr_counter_rmw;
+extern gpr_atm gpr_counter_atm_cas;
+extern gpr_atm gpr_counter_atm_add;
 #define GPR_ATM_INC_COUNTER(counter) \
   __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED)
-#define GPR_ATM_INC_RMW_THEN(blah) (GPR_ATM_INC_COUNTER(gpr_counter_rmw), blah)
+#define GPR_ATM_INC_CAS_THEN(blah) \
+  (GPR_ATM_INC_COUNTER(gpr_counter_atm_cas), blah)
+#define GPR_ATM_INC_ADD_THEN(blah) \
+  (GPR_ATM_INC_COUNTER(gpr_counter_atm_add), blah)
 #else
-#define GPR_ATM_INC_RMW_THEN(blah) blah
+#define GPR_ATM_INC_CAS_THEN(blah) blah
+#define GPR_ATM_INC_ADD_THEN(blah) blah
 #endif
 
 #define gpr_atm_full_barrier() (__atomic_thread_fence(__ATOMIC_SEQ_CST))
@@ -59,28 +64,28 @@
   (__atomic_store_n((p), (intptr_t)(value), __ATOMIC_RELAXED))
 
 #define gpr_atm_no_barrier_fetch_add(p, delta) \
-  GPR_ATM_INC_RMW_THEN(                        \
+  GPR_ATM_INC_ADD_THEN(                        \
       __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED))
 #define gpr_atm_full_fetch_add(p, delta) \
-  GPR_ATM_INC_RMW_THEN(                  \
+  GPR_ATM_INC_ADD_THEN(                  \
       __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL))
 
 static __inline int gpr_atm_no_barrier_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
-  return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+  return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n(
       p, &o, n, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
 }
 
 static __inline int gpr_atm_acq_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
-  return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+  return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n(
       p, &o, n, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
 }
 
 static __inline int gpr_atm_rel_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
-  return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+  return GPR_ATM_INC_CAS_THEN(__atomic_compare_exchange_n(
       p, &o, n, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
 }
 
 #define gpr_atm_full_xchg(p, n) \
-  GPR_ATM_INC_RMW_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL))
+  GPR_ATM_INC_CAS_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL))
 
 #endif /* GRPC_IMPL_CODEGEN_ATM_GCC_ATOMIC_H */
diff --git a/src/core/lib/support/sync_posix.c b/src/core/lib/support/sync_posix.c
index 3b7d780..16e7d6e 100644
--- a/src/core/lib/support/sync_posix.c
+++ b/src/core/lib/support/sync_posix.c
@@ -44,7 +44,8 @@
 
 #ifdef GPR_LOW_LEVEL_COUNTERS
 gpr_atm gpr_mu_locks = 0;
-gpr_atm gpr_counter_rmw = 0;
+gpr_atm gpr_counter_atm_cas = 0;
+gpr_atm gpr_counter_atm_add = 0;
 #endif
 
 void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); }
diff --git a/test/cpp/microbenchmarks/bm_closure.cc b/test/cpp/microbenchmarks/bm_closure.cc
index 16d0578..03aede3 100644
--- a/test/cpp/microbenchmarks/bm_closure.cc
+++ b/test/cpp/microbenchmarks/bm_closure.cc
@@ -65,9 +65,13 @@
     out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) -
                                        mu_locks_at_start_) /
                               (double)state_.iterations())
-        << " atm_rmw/iter:"
-        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) -
-                     rmw_at_start_) /
+        << " atm_cas/iter:"
+        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_cas) -
+                     atm_cas_at_start_) /
+            (double)state_.iterations())
+        << " atm_add/iter:"
+        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_add) -
+                     atm_add_at_start_) /
             (double)state_.iterations());
 #endif
     state_.SetLabel(out.str());
@@ -77,7 +81,10 @@
   benchmark::State& state_;
 #ifdef GPR_LOW_LEVEL_COUNTERS
   const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks);
-  const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw);
+  const size_t atm_cas_at_start_ =
+      gpr_atm_no_barrier_load(&gpr_counter_atm_cas);
+  const size_t atm_add_at_start_ =
+      gpr_atm_no_barrier_load(&gpr_counter_atm_add);
 #endif
 };
 
diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc
index 5bb456a..48e131f 100644
--- a/test/cpp/microbenchmarks/bm_fullstack.cc
+++ b/test/cpp/microbenchmarks/bm_fullstack.cc
@@ -101,7 +101,8 @@
 
 #ifdef GPR_LOW_LEVEL_COUNTERS
 extern "C" gpr_atm gpr_mu_locks;
-extern "C" gpr_atm gpr_counter_rmw;
+extern "C" gpr_atm gpr_counter_atm_cas;
+extern "C" gpr_atm gpr_counter_atm_add;
 #endif
 
 class BaseFixture {
@@ -113,9 +114,13 @@
     out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) -
                                        mu_locks_at_start_) /
                               (double)s.iterations())
-        << " atm_rmw/iter:"
-        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) -
-                     rmw_at_start_) /
+        << " atm_cas/iter:"
+        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_cas) -
+                     atm_cas_at_start_) /
+            (double)s.iterations())
+        << " atm_add/iter:"
+        << ((double)(gpr_atm_no_barrier_load(&gpr_counter_atm_add) -
+                     atm_add_at_start_) /
             (double)s.iterations());
 #endif
     grpc_memory_counters counters_at_end = grpc_memory_counters_snapshot();
@@ -135,7 +140,10 @@
  private:
 #ifdef GPR_LOW_LEVEL_COUNTERS
   const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks);
-  const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw);
+  const size_t atm_cas_at_start_ =
+      gpr_atm_no_barrier_load(&gpr_counter_atm_cas);
+  const size_t atm_add_at_start_ =
+      gpr_atm_no_barrier_load(&gpr_counter_atm_add);
 #endif
   grpc_memory_counters counters_at_start_ = grpc_memory_counters_snapshot();
 };
diff --git a/tools/profiling/microbenchmarks/bm2bq.py b/tools/profiling/microbenchmarks/bm2bq.py
index 62a2f69..8ead4b4 100755
--- a/tools/profiling/microbenchmarks/bm2bq.py
+++ b/tools/profiling/microbenchmarks/bm2bq.py
@@ -66,7 +66,8 @@
   ('cli_stream_stalls_per_iteration', 'float'),
   ('svr_transport_stalls_per_iteration', 'float'),
   ('svr_stream_stalls_per_iteration', 'float'),
-  ('atm_rmw_per_iteration', 'float')
+  ('atm_cas_per_iteration', 'float')
+  ('atm_add_per_iteration', 'float')
 ]
 
 if sys.argv[1] == '--schema':