Merge pull request #1012 from dklempner/nobarrier

Add a no_barrier_load atomic operation.
diff --git a/include/grpc/support/atm_gcc_atomic.h b/include/grpc/support/atm_gcc_atomic.h
index 11d78b4..65d3d0c 100644
--- a/include/grpc/support/atm_gcc_atomic.h
+++ b/include/grpc/support/atm_gcc_atomic.h
@@ -43,6 +43,7 @@
 #define gpr_atm_full_barrier() (__atomic_thread_fence(__ATOMIC_SEQ_CST))
 
 #define gpr_atm_acq_load(p) (__atomic_load_n((p), __ATOMIC_ACQUIRE))
+#define gpr_atm_no_barrier_load(p) (__atomic_load_n((p), __ATOMIC_RELAXED))
 #define gpr_atm_rel_store(p, value) \
   (__atomic_store_n((p), (gpr_intptr)(value), __ATOMIC_RELEASE))
 
diff --git a/include/grpc/support/atm_gcc_sync.h b/include/grpc/support/atm_gcc_sync.h
index e863bfd..4955e44 100644
--- a/include/grpc/support/atm_gcc_sync.h
+++ b/include/grpc/support/atm_gcc_sync.h
@@ -40,9 +40,11 @@
 
 typedef gpr_intptr gpr_atm;
 
+#define GPR_ATM_COMPILE_BARRIER_() __asm__ __volatile__("" : : : "memory")
+
 #if defined(__i386) || defined(__x86_64__)
 /* All loads are acquire loads and all stores are release stores.  */
-#define GPR_ATM_LS_BARRIER_() __asm__ __volatile__("" : : : "memory")
+#define GPR_ATM_LS_BARRIER_() GPR_ATM_COMPILE_BARRIER_()
 #else
 #define GPR_ATM_LS_BARRIER_() gpr_atm_full_barrier()
 #endif
@@ -55,12 +57,19 @@
   return value;
 }
 
+static __inline gpr_atm gpr_atm_no_barrier_load(const gpr_atm *p) {
+  gpr_atm value = *p;
+  GPR_ATM_COMPILE_BARRIER_();
+  return value;
+}
+
 static __inline void gpr_atm_rel_store(gpr_atm *p, gpr_atm value) {
   GPR_ATM_LS_BARRIER_();
   *p = value;
 }
 
 #undef GPR_ATM_LS_BARRIER_
+#undef GPR_ATM_COMPILE_BARRIER_
 
 #define gpr_atm_no_barrier_fetch_add(p, delta) \
   gpr_atm_full_fetch_add((p), (delta))
diff --git a/include/grpc/support/atm_win32.h b/include/grpc/support/atm_win32.h
index 3b9113c..18bf372 100644
--- a/include/grpc/support/atm_win32.h
+++ b/include/grpc/support/atm_win32.h
@@ -49,6 +49,11 @@
   return result;
 }
 
+static __inline gpr_atm gpr_atm_no_barrier_load(const gpr_atm *p) {
+  /* TODO(dklempner): Can we implement something better here? */
+  gpr_atm_acq_load(p);
+}
+
 static __inline void gpr_atm_rel_store(gpr_atm *p, gpr_atm value) {
   gpr_atm_full_barrier();
   *p = value;
diff --git a/src/core/iomgr/fd_posix.c b/src/core/iomgr/fd_posix.c
index abdd49b..9c8133d 100644
--- a/src/core/iomgr/fd_posix.c
+++ b/src/core/iomgr/fd_posix.c
@@ -210,7 +210,7 @@
     /* swap was unsuccessful due to an intervening set_ready call.
        Fall through to the READY code below */
     case READY:
-      assert(gpr_atm_acq_load(st) == READY);
+      assert(gpr_atm_no_barrier_load(st) == READY);
       gpr_atm_rel_store(st, NOT_READY);
       make_callback(closure->cb, closure->cb_arg,
                     !gpr_atm_acq_load(&fd->shutdown),
@@ -245,8 +245,8 @@
          Fall through to the WAITING code below */
       state = gpr_atm_acq_load(st);
     default: /* waiting */
-      assert(gpr_atm_acq_load(st) != READY &&
-             gpr_atm_acq_load(st) != NOT_READY);
+      assert(gpr_atm_no_barrier_load(st) != READY &&
+             gpr_atm_no_barrier_load(st) != NOT_READY);
       callbacks[(*ncallbacks)++] = *(grpc_iomgr_closure *)state;
       gpr_atm_rel_store(st, NOT_READY);
       return;
@@ -271,7 +271,7 @@
   grpc_iomgr_closure cb[2];
   size_t ncb = 0;
   gpr_mu_lock(&fd->set_state_mu);
-  GPR_ASSERT(!gpr_atm_acq_load(&fd->shutdown));
+  GPR_ASSERT(!gpr_atm_no_barrier_load(&fd->shutdown));
   gpr_atm_rel_store(&fd->shutdown, 1);
   set_ready_locked(&fd->readst, cb, &ncb);
   set_ready_locked(&fd->writest, cb, &ncb);