Prevent polling island + workqueue reference loop
diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c
index e5aab29..634946a 100644
--- a/src/core/lib/iomgr/ev_epoll_linux.c
+++ b/src/core/lib/iomgr/ev_epoll_linux.c
@@ -153,7 +153,7 @@
* Polling island Declarations
*/
-// #define GRPC_PI_REF_COUNT_DEBUG
+//#define GRPC_PI_REF_COUNT_DEBUG
#ifdef GRPC_PI_REF_COUNT_DEBUG
#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
@@ -174,7 +174,7 @@
Once the ref count becomes zero, this structure is destroyed which means
we should ensure that there is never a scenario where a PI_ADD_REF() is
racing with a PI_UNREF() that just made the ref_count zero. */
- gpr_refcount ref_count;
+ gpr_atm ref_count;
/* Pointer to the polling_island this merged into.
* merged_to value is only set once in polling_island's lifetime (and that too
@@ -296,7 +296,7 @@
static void pi_add_ref_dbg(polling_island *pi, char *reason, char *file,
int line) {
- long old_cnt = gpr_atm_acq_load(&(pi->ref_count.count));
+ long old_cnt = gpr_atm_acq_load(&pi->ref_count);
pi_add_ref(pi);
gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
(void *)pi, old_cnt, old_cnt + 1, reason, file, line);
@@ -304,17 +304,22 @@
static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
char *reason, char *file, int line) {
- long old_cnt = gpr_atm_acq_load(&(pi->ref_count.count));
+ long old_cnt = gpr_atm_acq_load(&pi->ref_count);
pi_unref(exec_ctx, pi);
gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
(void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
}
#endif
-static void pi_add_ref(polling_island *pi) { gpr_ref(&pi->ref_count); }
+static void pi_add_ref(polling_island *pi) {
+ gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
+}
static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
- /* If ref count went to zero, delete the polling island.
+ /* If ref count went to one, we're back to just the workqueue owning a ref.
+ Unref the workqueue to break the loop.
+
+ If ref count went to zero, delete the polling island.
Note that this deletion not be done under a lock. Once the ref count goes
to zero, we are guaranteed that no one else holds a reference to the
polling island (and that there is no racing pi_add_ref() call either).
@@ -322,12 +327,20 @@
Also, if we are deleting the polling island and the merged_to field is
non-empty, we should remove a ref to the merged_to polling island
*/
- if (gpr_unref(&pi->ref_count)) {
- polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
- polling_island_delete(exec_ctx, pi);
- if (next != NULL) {
- PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
+ switch (gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
+ case 2: /* last external ref: the only one now owned is by the workqueue */
+ GRPC_WORKQUEUE_UNREF(exec_ctx, pi->workqueue, "polling_island");
+ break;
+ case 1: {
+ polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ polling_island_delete(exec_ctx, pi);
+ if (next != NULL) {
+ PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
+ }
+ break;
}
+ case 0:
+ GPR_UNREACHABLE_CODE(return );
}
}
@@ -478,7 +491,7 @@
pi->epoll_fd = -1;
pi->workqueue = NULL;
- gpr_ref_init(&pi->ref_count, 0);
+ gpr_atm_rel_store(&pi->ref_count, 0);
gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
@@ -501,7 +514,7 @@
error);
GPR_ASSERT(pi->workqueue->wakeup_read_fd->polling_island == NULL);
pi->workqueue->wakeup_read_fd->polling_island = pi;
- PI_ADD_REF(pi, 1);
+ PI_ADD_REF(pi, "fd");
}
done:
@@ -525,7 +538,6 @@
gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
close(pi->epoll_fd);
- GRPC_WORKQUEUE_UNREF(exec_ctx, pi->workqueue, "polling_island");
gpr_mu_destroy(&pi->mu);
gpr_free(pi->fds);
gpr_free(pi);
@@ -885,6 +897,7 @@
const char *reason) {
bool is_fd_closed = false;
grpc_error *error = GRPC_ERROR_NONE;
+ polling_island *unref_pi = NULL;
gpr_mu_lock(&fd->mu);
fd->on_done_closure = on_done;
@@ -918,7 +931,7 @@
polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
gpr_mu_unlock(&pi_latest->mu);
- PI_UNREF(exec_ctx, fd->polling_island, "fd_orphan");
+ unref_pi = fd->polling_island;
fd->polling_island = NULL;
}
gpr_mu_unlock(&fd->pi_mu);
@@ -927,6 +940,9 @@
gpr_mu_unlock(&fd->mu);
UNREF_BY(fd, 2, reason); /* Drop the reference */
+ if (unref_pi != NULL) {
+ PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
+ }
GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
}
@@ -1595,6 +1611,8 @@
gpr_mu_unlock(&fd->pi_mu);
gpr_mu_unlock(&pollset->mu);
+
+ GRPC_LOG_IF_ERROR("pollset_add_fd", error);
}
/*******************************************************************************