blob: e50000dcc956d9e17c21b723394791c4e0bc8b36 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
3 * Copyright 2016, Google Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
murgatroid9954070892016-08-08 17:01:18 -070034#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070035
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070036/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070037#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070038
Craig Tiller4509c472017-04-27 19:05:13 +000039#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070040
41#include <assert.h>
42#include <errno.h>
43#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070044#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070045#include <signal.h>
46#include <string.h>
47#include <sys/epoll.h>
48#include <sys/socket.h>
49#include <unistd.h>
50
51#include <grpc/support/alloc.h>
52#include <grpc/support/log.h>
53#include <grpc/support/string_util.h>
54#include <grpc/support/tls.h>
55#include <grpc/support/useful.h>
56
57#include "src/core/lib/iomgr/ev_posix.h"
58#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070059#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070060#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070061#include "src/core/lib/iomgr/wakeup_fd_posix.h"
62#include "src/core/lib/profiling/timers.h"
63#include "src/core/lib/support/block_annotate.h"
64
Craig Tillere24b24d2017-04-06 16:05:45 -070065#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
66
Craig Tillerbc0ab082017-05-05 10:42:44 -070067#define GRPC_POLLING_TRACE(fmt, ...) \
68 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
69 gpr_log(GPR_INFO, (fmt), __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070070 }
71
Sree Kuchibhotla82d73412017-02-09 18:27:45 -080072/* Uncomment the following to enable extra checks on poll_object operations */
Sree Kuchibhotlae6f516e2016-12-08 12:20:23 -080073/* #define PO_DEBUG */
74
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070075static int grpc_wakeup_signal = -1;
76static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070077
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070078/* Implements the function defined in grpc_posix.h. This function might be
79 * called before even calling grpc_init() to set either a different signal to
80 * use. If signum == -1, then the use of signals is disabled */
81void grpc_use_signal(int signum) {
82 grpc_wakeup_signal = signum;
83 is_grpc_wakeup_signal_initialized = true;
84
85 if (grpc_wakeup_signal < 0) {
86 gpr_log(GPR_INFO,
87 "Use of signals is disabled. Epoll engine will not be used");
88 } else {
89 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
90 grpc_wakeup_signal);
91 }
92}
93
94struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070095
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080096typedef enum {
97 POLL_OBJ_FD,
98 POLL_OBJ_POLLSET,
99 POLL_OBJ_POLLSET_SET
100} poll_obj_type;
101
102typedef struct poll_obj {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800103#ifdef PO_DEBUG
104 poll_obj_type obj_type;
105#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800106 gpr_mu mu;
107 struct polling_island *pi;
108} poll_obj;
109
110const char *poll_obj_string(poll_obj_type po_type) {
111 switch (po_type) {
112 case POLL_OBJ_FD:
113 return "fd";
114 case POLL_OBJ_POLLSET:
115 return "pollset";
116 case POLL_OBJ_POLLSET_SET:
117 return "pollset_set";
118 }
119
120 GPR_UNREACHABLE_CODE(return "UNKNOWN");
121}
122
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700123/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700124 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700125 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800126
127#define FD_FROM_PO(po) ((grpc_fd *)(po))
128
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700129struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800130 poll_obj po;
131
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132 int fd;
133 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700134 bit 0 : 1=Active / 0=Orphaned
135 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700136 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700137 gpr_atm refst;
138
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800139 /* The fd is either closed or we relinquished control of it. In either
140 cases, this indicates that the 'fd' on this structure is no longer
141 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700142 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700143
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800144 gpr_atm read_closure;
145 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700146
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700147 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700148 grpc_closure *on_done_closure;
149
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800150 /* The pollset that last noticed that the fd is readable. The actual type
151 * stored in this is (grpc_pollset *) */
152 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700153
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700154 grpc_iomgr_object iomgr_object;
155};
156
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700157/* Reference counting for fds */
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700158// #define GRPC_FD_REF_COUNT_DEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700159#ifdef GRPC_FD_REF_COUNT_DEBUG
160static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
161static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
162 int line);
163#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
164#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
165#else
166static void fd_ref(grpc_fd *fd);
167static void fd_unref(grpc_fd *fd);
168#define GRPC_FD_REF(fd, reason) fd_ref(fd)
169#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
170#endif
171
172static void fd_global_init(void);
173static void fd_global_shutdown(void);
174
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700175/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700177 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700178
Craig Tiller61f96c12017-05-12 13:36:39 -0700179//#define PI_REFCOUNT_DEBUG
180
181#ifdef PI_REFCOUNT_DEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700182
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700183#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700184#define PI_UNREF(exec_ctx, p, r) \
185 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700186
Craig Tillerd8a3c042016-09-09 12:42:37 -0700187#else /* defined(GRPC_WORKQUEUE_REFCOUNT_DEBUG) */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700188
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700189#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700190#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700191
Yuchen Zeng362ac1b2016-09-13 16:01:31 -0700192#endif /* !defined(GRPC_PI_REF_COUNT_DEBUG) */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700193
Craig Tiller460502e2016-10-13 10:02:08 -0700194/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700195typedef struct polling_island {
196 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700197 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
198 the refcount.
199 Once the ref count becomes zero, this structure is destroyed which means
200 we should ensure that there is never a scenario where a PI_ADD_REF() is
201 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700202 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700203
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700204 /* Pointer to the polling_island this merged into.
205 * merged_to value is only set once in polling_island's lifetime (and that too
206 * only if the island is merged with another island). Because of this, we can
207 * use gpr_atm type here so that we can do atomic access on this and reduce
208 * lock contention on 'mu' mutex.
209 *
210 * Note that if this field is not NULL (i.e not 0), all the remaining fields
211 * (except mu and ref_count) are invalid and must be ignored. */
212 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700213
Craig Tiller460502e2016-10-13 10:02:08 -0700214 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700215 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700216
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700217 /* The fd of the underlying epoll set */
218 int epoll_fd;
219
220 /* The file descriptors in the epoll set */
221 size_t fd_cnt;
222 size_t fd_capacity;
223 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700224} polling_island;
225
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700226/*******************************************************************************
227 * Pollset Declarations
228 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700229struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700230 /* Thread id of this worker */
231 pthread_t pt_id;
232
233 /* Used to prevent a worker from getting kicked multiple times */
234 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700235 struct grpc_pollset_worker *next;
236 struct grpc_pollset_worker *prev;
237};
238
239struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800240 poll_obj po;
241
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700242 grpc_pollset_worker root_worker;
243 bool kicked_without_pollers;
244
245 bool shutting_down; /* Is the pollset shutting down ? */
246 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
247 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700248};
249
250/*******************************************************************************
251 * Pollset-set Declarations
252 */
253struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800254 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700255};
256
257/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700258 * Common helpers
259 */
260
Craig Tillerf975f742016-07-01 14:56:27 -0700261static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700262 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700263 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700264 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700265 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700266 }
267 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700268 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700269}
270
271/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700272 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700273 */
274
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700275/* The wakeup fd that is used to wake up all threads in a Polling island. This
276 is useful in the polling island merge operation where we need to wakeup all
277 the threads currently polling the smaller polling island (so that they can
278 start polling the new/merged polling island)
279
280 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
281 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
282static grpc_wakeup_fd polling_island_wakeup_fd;
283
Craig Tiller2e620132016-10-10 15:27:44 -0700284/* The polling island being polled right now.
285 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700286static __thread polling_island *g_current_thread_polling_island;
287
Craig Tillerb39307d2016-06-30 15:39:13 -0700288/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700289static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700290
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700291#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700292/* Currently TSAN may incorrectly flag data races between epoll_ctl and
293 epoll_wait for any grpc_fd structs that are added to the epoll set via
294 epoll_ctl and are returned (within a very short window) via epoll_wait().
295
296 To work-around this race, we establish a happens-before relation between
297 the code just-before epoll_ctl() and the code after epoll_wait() by using
298 this atomic */
299gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700300#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700301
Craig Tillerb39307d2016-06-30 15:39:13 -0700302static void pi_add_ref(polling_island *pi);
303static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700304
Craig Tiller61f96c12017-05-12 13:36:39 -0700305#ifdef PI_REFCOUNT_DEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700306static void pi_add_ref_dbg(polling_island *pi, const char *reason,
307 const char *file, int line) {
Craig Tiller15007612016-07-06 09:36:16 -0700308 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700309 pi_add_ref(pi);
310 gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
311 (void *)pi, old_cnt, old_cnt + 1, reason, file, line);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700312}
313
Craig Tillerb39307d2016-06-30 15:39:13 -0700314static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700315 const char *reason, const char *file, int line) {
Craig Tiller15007612016-07-06 09:36:16 -0700316 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerb39307d2016-06-30 15:39:13 -0700317 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700318 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700319 (void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700320}
321#endif
322
Craig Tiller15007612016-07-06 09:36:16 -0700323static void pi_add_ref(polling_island *pi) {
324 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
325}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700326
Craig Tillerb39307d2016-06-30 15:39:13 -0700327static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700328 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700329 Note that this deletion not be done under a lock. Once the ref count goes
330 to zero, we are guaranteed that no one else holds a reference to the
331 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700332
333 Also, if we are deleting the polling island and the merged_to field is
334 non-empty, we should remove a ref to the merged_to polling island
335 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700336 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
337 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
338 polling_island_delete(exec_ctx, pi);
339 if (next != NULL) {
340 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700341 }
342 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700343}
344
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700345/* The caller is expected to hold pi->mu lock before calling this function */
346static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700347 size_t fd_count, bool add_fd_refs,
348 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700349 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700350 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700351 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700352 char *err_msg;
353 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700354
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700355#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700356 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700357 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700358#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700359
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700360 for (i = 0; i < fd_count; i++) {
361 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
362 ev.data.ptr = fds[i];
363 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700364
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700365 if (err < 0) {
366 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700367 gpr_asprintf(
368 &err_msg,
369 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
370 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
371 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
372 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700373 }
374
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700375 continue;
376 }
377
378 if (pi->fd_cnt == pi->fd_capacity) {
379 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
380 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
381 }
382
383 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700384 if (add_fd_refs) {
385 GRPC_FD_REF(fds[i], "polling_island");
386 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700387 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700388}
389
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700390/* The caller is expected to hold pi->mu before calling this */
391static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700392 grpc_wakeup_fd *wakeup_fd,
393 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700394 struct epoll_event ev;
395 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700396 char *err_msg;
397 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700398
399 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
400 ev.data.ptr = wakeup_fd;
401 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
402 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700403 if (err < 0 && errno != EEXIST) {
404 gpr_asprintf(&err_msg,
405 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
406 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700407 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
408 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700409 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
410 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700411 }
412}
413
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700414/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700415static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700416 bool remove_fd_refs,
417 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700418 int err;
419 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700420 char *err_msg;
421 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700422
423 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700424 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700425 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700426 gpr_asprintf(&err_msg,
427 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
428 "error: %d (%s)",
429 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
430 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
431 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700432 }
433
434 if (remove_fd_refs) {
435 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700436 }
437 }
438
439 pi->fd_cnt = 0;
440}
441
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700442/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700443static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700444 bool is_fd_closed,
445 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700446 int err;
447 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700448 char *err_msg;
449 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700450
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700451 /* If fd is already closed, then it would have been automatically been removed
452 from the epoll set */
453 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700454 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
455 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700456 gpr_asprintf(
457 &err_msg,
458 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
459 pi->epoll_fd, fd->fd, errno, strerror(errno));
460 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
461 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700462 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700463 }
464
465 for (i = 0; i < pi->fd_cnt; i++) {
466 if (pi->fds[i] == fd) {
467 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700468 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700469 break;
470 }
471 }
472}
473
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700474/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700475static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
476 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700477 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700478 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700479 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700480
Craig Tillerb39307d2016-06-30 15:39:13 -0700481 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700482
Craig Tillerb39307d2016-06-30 15:39:13 -0700483 pi = gpr_malloc(sizeof(*pi));
484 gpr_mu_init(&pi->mu);
485 pi->fd_cnt = 0;
486 pi->fd_capacity = 0;
487 pi->fds = NULL;
488 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700489
Craig Tiller15007612016-07-06 09:36:16 -0700490 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700491 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700492 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700493
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700494 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700495
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700496 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700497 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
498 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700499 }
500
Craig Tillerb39307d2016-06-30 15:39:13 -0700501 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700502 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700503 }
504
Craig Tillerb39307d2016-06-30 15:39:13 -0700505done:
506 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700507 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700508 pi = NULL;
509 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700510 return pi;
511}
512
Craig Tillerb39307d2016-06-30 15:39:13 -0700513static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700514 GPR_ASSERT(pi->fd_cnt == 0);
515
Craig Tiller0a06cd72016-07-14 13:21:24 -0700516 if (pi->epoll_fd >= 0) {
517 close(pi->epoll_fd);
518 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700519 gpr_mu_destroy(&pi->mu);
520 gpr_free(pi->fds);
521 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700522}
523
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700524/* Attempts to gets the last polling island in the linked list (liked by the
525 * 'merged_to' field). Since this does not lock the polling island, there are no
526 * guarantees that the island returned is the last island */
527static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
528 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
529 while (next != NULL) {
530 pi = next;
531 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
532 }
533
534 return pi;
535}
536
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700537/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700539 returned polling island's mu.
540 Usage: To lock/unlock polling island "pi", do the following:
541 polling_island *pi_latest = polling_island_lock(pi);
542 ...
543 ... critical section ..
544 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700545 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
546static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700547 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700548
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700549 while (true) {
550 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
551 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700552 /* Looks like 'pi' is the last node in the linked list but unless we check
553 this by holding the pi->mu lock, we cannot be sure (i.e without the
554 pi->mu lock, we don't prevent island merges).
555 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700556 gpr_mu_lock(&pi->mu);
557 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
558 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700559 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700560 break;
561 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700562
563 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
564 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700565 gpr_mu_unlock(&pi->mu);
566 }
567
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700568 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700569 }
570
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700571 return pi;
572}
573
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700574/* Gets the lock on the *latest* polling islands in the linked lists pointed by
575 *p and *q (and also updates *p and *q to point to the latest polling islands)
576
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700577 This function is needed because calling the following block of code to obtain
578 locks on polling islands (*p and *q) is prone to deadlocks.
579 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700580 polling_island_lock(*p, true);
581 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700582 }
583
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700584 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700585 polling_island *p1;
586 polling_island *p2;
587 ..
588 polling_island_lock_pair(&p1, &p2);
589 ..
590 .. Critical section with both p1 and p2 locked
591 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700592 // Release locks: Always call polling_island_unlock_pair() to release locks
593 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700594*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700595static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700596 polling_island *pi_1 = *p;
597 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700598 polling_island *next_1 = NULL;
599 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700600
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700601 /* The algorithm is simple:
602 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
603 keep updating pi_1 and pi_2)
604 - Then obtain locks on the islands by following a lock order rule of
605 locking polling_island with lower address first
606 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
607 pointing to the same island. If that is the case, we can just call
608 polling_island_lock()
609 - After obtaining both the locks, double check that the polling islands
610 are still the last polling islands in their respective linked lists
611 (this is because there might have been polling island merges before
612 we got the lock)
613 - If the polling islands are the last islands, we are done. If not,
614 release the locks and continue the process from the first step */
615 while (true) {
616 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
617 while (next_1 != NULL) {
618 pi_1 = next_1;
619 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700620 }
621
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700622 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
623 while (next_2 != NULL) {
624 pi_2 = next_2;
625 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
626 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700627
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700628 if (pi_1 == pi_2) {
629 pi_1 = pi_2 = polling_island_lock(pi_1);
630 break;
631 }
632
633 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700634 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700635 gpr_mu_lock(&pi_2->mu);
636 } else {
637 gpr_mu_lock(&pi_2->mu);
638 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700639 }
640
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700641 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
642 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
643 if (next_1 == NULL && next_2 == NULL) {
644 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700645 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700646
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700647 gpr_mu_unlock(&pi_1->mu);
648 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700649 }
650
651 *p = pi_1;
652 *q = pi_2;
653}
654
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700655static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
656 if (p == q) {
657 gpr_mu_unlock(&p->mu);
658 } else {
659 gpr_mu_unlock(&p->mu);
660 gpr_mu_unlock(&q->mu);
661 }
662}
663
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700664static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700665 polling_island *q,
666 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700667 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700668 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700669
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700670 if (p != q) {
671 /* Make sure that p points to the polling island with fewer fds than q */
672 if (p->fd_cnt > q->fd_cnt) {
673 GPR_SWAP(polling_island *, p, q);
674 }
675
676 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
677 Note that the refcounts on the fds being moved will not change here.
678 This is why the last param in the following two functions is 'false') */
679 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
680 polling_island_remove_all_fds_locked(p, false, error);
681
682 /* Wakeup all the pollers (if any) on p so that they pickup this change */
683 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
684
685 /* Add the 'merged_to' link from p --> q */
686 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
687 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700688 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700689 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700690
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700691 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700692
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700693 /* Return the merged polling island (Note that no merge would have happened
694 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700695 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700696}
697
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700698static grpc_error *polling_island_global_init() {
699 grpc_error *error = GRPC_ERROR_NONE;
700
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700701 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
702 if (error == GRPC_ERROR_NONE) {
703 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
704 }
705
706 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700707}
708
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700709static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700710 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700711}
712
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700713/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700714 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700715 */
716
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700717/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700718 * but instead so that implementations with multiple threads in (for example)
719 * epoll_wait deal with the race between pollset removal and incoming poll
720 * notifications.
721 *
722 * The problem is that the poller ultimately holds a reference to this
723 * object, so it is very difficult to know when is safe to free it, at least
724 * without some expensive synchronization.
725 *
726 * If we keep the object freelisted, in the worst case losing this race just
727 * becomes a spurious read notification on a reused fd.
728 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700729
730/* The alarm system needs to be able to wakeup 'some poller' sometimes
731 * (specifically when a new alarm needs to be triggered earlier than the next
732 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
733 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700734
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700735static grpc_fd *fd_freelist = NULL;
736static gpr_mu fd_freelist_mu;
737
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700738#ifdef GRPC_FD_REF_COUNT_DEBUG
739#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
740#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
741static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
742 int line) {
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700743 gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd,
744 (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700745 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
746#else
747#define REF_BY(fd, n, reason) ref_by(fd, n)
748#define UNREF_BY(fd, n, reason) unref_by(fd, n)
749static void ref_by(grpc_fd *fd, int n) {
750#endif
751 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
752}
753
754#ifdef GRPC_FD_REF_COUNT_DEBUG
755static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
756 int line) {
757 gpr_atm old;
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700758 gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd,
759 (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700760 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
761#else
762static void unref_by(grpc_fd *fd, int n) {
763 gpr_atm old;
764#endif
765 old = gpr_atm_full_fetch_add(&fd->refst, -n);
766 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700767 /* Add the fd to the freelist */
768 gpr_mu_lock(&fd_freelist_mu);
769 fd->freelist_next = fd_freelist;
770 fd_freelist = fd;
771 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800772
Craig Tiller376887d2017-04-06 08:27:03 -0700773 grpc_lfev_destroy(&fd->read_closure);
774 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700775
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700776 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700777 } else {
778 GPR_ASSERT(old > n);
779 }
780}
781
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700782/* Increment refcount by two to avoid changing the orphan bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700783#ifdef GRPC_FD_REF_COUNT_DEBUG
784static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
785 int line) {
786 ref_by(fd, 2, reason, file, line);
787}
788
789static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
790 int line) {
791 unref_by(fd, 2, reason, file, line);
792}
793#else
794static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700795static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
796#endif
797
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700798static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
799
800static void fd_global_shutdown(void) {
801 gpr_mu_lock(&fd_freelist_mu);
802 gpr_mu_unlock(&fd_freelist_mu);
803 while (fd_freelist != NULL) {
804 grpc_fd *fd = fd_freelist;
805 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800806 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700807 gpr_free(fd);
808 }
809 gpr_mu_destroy(&fd_freelist_mu);
810}
811
812static grpc_fd *fd_create(int fd, const char *name) {
813 grpc_fd *new_fd = NULL;
814
815 gpr_mu_lock(&fd_freelist_mu);
816 if (fd_freelist != NULL) {
817 new_fd = fd_freelist;
818 fd_freelist = fd_freelist->freelist_next;
819 }
820 gpr_mu_unlock(&fd_freelist_mu);
821
822 if (new_fd == NULL) {
823 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800824 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700825 }
826
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800827 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
828 * is a newly created fd (or an fd we got from the freelist), no one else
829 * would be holding a lock to it anyway. */
830 gpr_mu_lock(&new_fd->po.mu);
831 new_fd->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800832#ifdef PO_DEBUG
833 new_fd->po.obj_type = POLL_OBJ_FD;
834#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700835
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700836 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700837 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700838 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700839 grpc_lfev_init(&new_fd->read_closure);
840 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800841 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800842
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700843 new_fd->freelist_next = NULL;
844 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700845
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800846 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700847
848 char *fd_name;
849 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
850 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700851#ifdef GRPC_FD_REF_COUNT_DEBUG
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700852 gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700853#endif
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700854 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700855 return new_fd;
856}
857
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700858static int fd_wrapped_fd(grpc_fd *fd) {
859 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800860 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700861 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700862 ret_fd = fd->fd;
863 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800864 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700865
866 return ret_fd;
867}
868
869static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
870 grpc_closure *on_done, int *release_fd,
871 const char *reason) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700872 bool is_fd_closed = false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700873 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700874 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700875
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800876 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700877 fd->on_done_closure = on_done;
878
879 /* If release_fd is not NULL, we should be relinquishing control of the file
880 descriptor fd->fd (but we still own the grpc_fd structure). */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700881 if (release_fd != NULL) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700882 *release_fd = fd->fd;
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700883 } else {
884 close(fd->fd);
885 is_fd_closed = true;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700886 }
887
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700888 fd->orphaned = true;
889
890 /* Remove the active status but keep referenced. We want this grpc_fd struct
891 to be alive (and not added to freelist) until the end of this function */
892 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700893
894 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700895 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800896 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700897 would actually contain the fd
898 - Remove the fd from the latest polling island
899 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800900 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700901 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800902 if (fd->po.pi != NULL) {
903 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700904 polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700905 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700906
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800907 unref_pi = fd->po.pi;
908 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700909 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700910
Craig Tiller91031da2016-12-28 15:44:25 -0800911 grpc_closure_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700912
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800913 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700914 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700915 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700916 /* Unref stale polling island here, outside the fd lock above.
917 The polling island owns a workqueue which owns an fd, and unreffing
918 inside the lock can cause an eventual lock loop that makes TSAN very
919 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700920 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
921 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700922 GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
Yuchen Zenga0399f22016-08-04 17:52:53 -0700923 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700924}
925
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700926static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
927 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800928 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800929 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700930}
931
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700932static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700933 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700934}
935
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700936/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800937static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700938 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
939 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700940 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700941 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700942 }
Craig Tiller376887d2017-04-06 08:27:03 -0700943 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700944}
945
946static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
947 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700948 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700949}
950
951static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
952 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700953 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700954}
955
956/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700957 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700958 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700959GPR_TLS_DECL(g_current_thread_pollset);
960GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700961static __thread bool g_initialized_sigmask;
962static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700963
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700964static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700965#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700966 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700967#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700968}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700969
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700970static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700971
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700972/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700973static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700974 gpr_tls_init(&g_current_thread_pollset);
975 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700976 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700977 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700978}
979
980static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700981 gpr_tls_destroy(&g_current_thread_pollset);
982 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700983}
984
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700985static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
986 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700987
988 /* Kick the worker only if it was not already kicked */
989 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
990 GRPC_POLLING_TRACE(
991 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700992 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700993 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
994 if (err_num != 0) {
995 err = GRPC_OS_ERROR(err_num, "pthread_kill");
996 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700997 }
998 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700999}
1000
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001001/* Return 1 if the pollset has active threads in pollset_work (pollset must
1002 * be locked) */
1003static int pollset_has_workers(grpc_pollset *p) {
1004 return p->root_worker.next != &p->root_worker;
1005}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001006
1007static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1008 worker->prev->next = worker->next;
1009 worker->next->prev = worker->prev;
1010}
1011
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001012static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1013 if (pollset_has_workers(p)) {
1014 grpc_pollset_worker *w = p->root_worker.next;
1015 remove_worker(p, w);
1016 return w;
1017 } else {
1018 return NULL;
1019 }
1020}
1021
1022static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1023 worker->next = &p->root_worker;
1024 worker->prev = worker->next->prev;
1025 worker->prev->next = worker->next->prev = worker;
1026}
1027
1028static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1029 worker->prev = &p->root_worker;
1030 worker->next = worker->prev->next;
1031 worker->prev->next = worker->next->prev = worker;
1032}
1033
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001035static grpc_error *pollset_kick(grpc_pollset *p,
1036 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001037 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001038 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001039 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 grpc_pollset_worker *worker = specific_worker;
1041 if (worker != NULL) {
1042 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001043 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001044 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001045 for (worker = p->root_worker.next; worker != &p->root_worker;
1046 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001047 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001048 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001049 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001050 }
Craig Tillera218a062016-06-26 09:58:37 -07001051 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001052 } else {
1053 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001054 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001055 } else {
1056 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001057 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001058 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001059 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001060 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001061 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1062 /* Since worker == NULL, it means that we can kick "any" worker on this
1063 pollset 'p'. If 'p' happens to be the same pollset this thread is
1064 currently polling (i.e in pollset_work() function), then there is no need
1065 to kick any other worker since the current thread can just absorb the
1066 kick. This is the reason why we enter this case only when
1067 g_current_thread_pollset is != p */
1068
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001069 GPR_TIMER_MARK("kick_anonymous", 0);
1070 worker = pop_front_worker(p);
1071 if (worker != NULL) {
1072 GPR_TIMER_MARK("finally_kick", 0);
1073 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001074 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001075 } else {
1076 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001077 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001078 }
1079 }
1080
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001081 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001082 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1083 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001084}
1085
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001086static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001087 gpr_mu_init(&pollset->po.mu);
1088 *mu = &pollset->po.mu;
1089 pollset->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001090#ifdef PO_DEBUG
1091 pollset->po.obj_type = POLL_OBJ_POLLSET;
1092#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001093
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001094 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001095 pollset->kicked_without_pollers = false;
1096
1097 pollset->shutting_down = false;
1098 pollset->finish_shutdown_called = false;
1099 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001100}
1101
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001102/* Convert a timespec to milliseconds:
1103 - Very small or negative poll times are clamped to zero to do a non-blocking
1104 poll (which becomes spin polling)
1105 - Other small values are rounded up to one millisecond
1106 - Longer than a millisecond polls are rounded up to the next nearest
1107 millisecond to avoid spinning
1108 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001109static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1110 gpr_timespec now) {
1111 gpr_timespec timeout;
1112 static const int64_t max_spin_polling_us = 10;
1113 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1114 return -1;
1115 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001116
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001117 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1118 max_spin_polling_us,
1119 GPR_TIMESPAN))) <= 0) {
1120 return 0;
1121 }
1122 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001123 int millis = gpr_time_to_millis(gpr_time_add(
1124 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1125 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001126}
1127
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001128static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1129 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001130 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001131
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001132 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001133 different 'notifier's when an fd becomes readable and it is in two epoll
1134 sets (This can happen briefly during polling island merges). In such cases
1135 it does not really matter which notifer is set as the read_notifier_pollset
1136 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001137 /* Use release store to match with acquire load in fd_get_read_notifier */
1138 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001139}
1140
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001141static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001142 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001143}
1144
Craig Tillerb39307d2016-06-30 15:39:13 -07001145static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1146 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001147 if (ps->po.pi != NULL) {
1148 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001149 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001150 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001151}
1152
1153static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1154 grpc_pollset *pollset) {
1155 /* The pollset cannot have any workers if we are at this stage */
1156 GPR_ASSERT(!pollset_has_workers(pollset));
1157
1158 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001159
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001160 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001161 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
Craig Tiller91031da2016-12-28 15:44:25 -08001162 grpc_closure_sched(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001163}
1164
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001165/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001166static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1167 grpc_closure *closure) {
1168 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1169 GPR_ASSERT(!pollset->shutting_down);
1170 pollset->shutting_down = true;
1171 pollset->shutdown_done = closure;
1172 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1173
1174 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1175 because it would release the underlying polling island. In such a case, we
1176 let the last worker call finish_shutdown_locked() from pollset_work() */
1177 if (!pollset_has_workers(pollset)) {
1178 GPR_ASSERT(!pollset->finish_shutdown_called);
1179 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1180 finish_shutdown_locked(exec_ctx, pollset);
1181 }
1182 GPR_TIMER_END("pollset_shutdown", 0);
1183}
1184
1185/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1186 * than destroying the mutexes, there is nothing special that needs to be done
1187 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001188static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001189 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001190 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001191}
1192
Craig Tiller84ea3412016-09-08 14:57:56 -07001193#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001194/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1195static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001196 grpc_pollset *pollset,
1197 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001198 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001199 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001200 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001201 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001202 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001203 char *err_msg;
1204 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001205 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1206
1207 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001208 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001209
1210 Since epoll_fd is immutable, we can read it without obtaining the polling
1211 island lock. There is however a possibility that the polling island (from
1212 which we got the epoll_fd) got merged with another island while we are
1213 in this function. This is still okay because in such a case, we will wakeup
1214 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001215 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001216
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001217 if (pollset->po.pi == NULL) {
1218 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1219 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001220 GPR_TIMER_END("pollset_work_and_unlock", 0);
1221 return; /* Fatal error. We cannot continue */
1222 }
1223
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001224 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001225 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001226 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001227 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001228
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001229 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001230 epoll_fd = pi->epoll_fd;
1231
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001232 /* Update the pollset->po.pi since the island being pointed by
1233 pollset->po.pi maybe older than the one pointed by pi) */
1234 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001235 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1236 polling island to be deleted */
1237 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001238 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1239 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001240 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001241
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001242 /* Add an extra ref so that the island does not get destroyed (which means
1243 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1244 epoll_fd */
1245 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001246 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001247
Craig Tiller61f96c12017-05-12 13:36:39 -07001248 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1249 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001250
Craig Tiller61f96c12017-05-12 13:36:39 -07001251 GRPC_SCHEDULING_START_BLOCKING_REGION;
1252 ep_rv =
1253 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1254 GRPC_SCHEDULING_END_BLOCKING_REGION;
1255 if (ep_rv < 0) {
1256 if (errno != EINTR) {
1257 gpr_asprintf(&err_msg,
1258 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1259 epoll_fd, errno, strerror(errno));
1260 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1261 } else {
1262 /* We were interrupted. Save an interation by doing a zero timeout
1263 epoll_wait to see if there are any other events of interest */
1264 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1265 (void *)pollset, (void *)worker);
1266 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001267 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001268 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001269
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001270#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001271 /* See the definition of g_poll_sync for more details */
1272 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001273#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001274
Craig Tiller61f96c12017-05-12 13:36:39 -07001275 for (int i = 0; i < ep_rv; ++i) {
1276 void *data_ptr = ep_ev[i].data.ptr;
1277 if (data_ptr == &polling_island_wakeup_fd) {
1278 GRPC_POLLING_TRACE(
1279 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1280 "%d) got merged",
1281 (void *)pollset, (void *)worker, epoll_fd);
1282 /* This means that our polling island is merged with a different
1283 island. We do not have to do anything here since the subsequent call
1284 to the function pollset_work_and_unlock() will pick up the correct
1285 epoll_fd */
1286 } else {
1287 grpc_fd *fd = data_ptr;
1288 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1289 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1290 int write_ev = ep_ev[i].events & EPOLLOUT;
1291 if (read_ev || cancel) {
1292 fd_become_readable(exec_ctx, fd, pollset);
1293 }
1294 if (write_ev || cancel) {
1295 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001296 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001297 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001298 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001299
Craig Tiller61f96c12017-05-12 13:36:39 -07001300 g_current_thread_polling_island = NULL;
1301 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1302
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001303 GPR_ASSERT(pi != NULL);
1304
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001305 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001306 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001307 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001308 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001309 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001310 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001311
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001312 GPR_TIMER_END("pollset_work_and_unlock", 0);
1313}
1314
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001315/* pollset->po.mu lock must be held by the caller before calling this.
1316 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001317 during the course of its execution but it will always re-acquire the lock and
1318 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001319static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1320 grpc_pollset_worker **worker_hdl,
1321 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001322 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001323 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001324 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1325
1326 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001327
1328 grpc_pollset_worker worker;
1329 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001330 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001331 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001332
Craig Tiller557c88c2017-04-05 17:20:18 -07001333 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001334
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001335 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1336 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001337
1338 if (pollset->kicked_without_pollers) {
1339 /* If the pollset was kicked without pollers, pretend that the current
1340 worker got the kick and skip polling. A kick indicates that there is some
1341 work that needs attention like an event on the completion queue or an
1342 alarm */
1343 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1344 pollset->kicked_without_pollers = 0;
1345 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001346 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001347 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1348 worker that there is some pending work that needs immediate attention
1349 (like an event on the completion queue, or a polling island merge that
1350 results in a new epoll-fd to wait on) and that the worker should not
1351 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001352
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001353 A worker can be kicked anytime from the point it is added to the pollset
1354 via push_front_worker() (or push_back_worker()) to the point it is
1355 removed via remove_worker().
1356 If the worker is kicked before/during it calls epoll_pwait(), it should
1357 immediately exit from epoll_wait(). If the worker is kicked after it
1358 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001359
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001360 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001361 times *except* when it is in epoll_pwait(). This way, the worker never
1362 misses acting on a kick */
1363
Craig Tiller19196992016-06-27 18:45:56 -07001364 if (!g_initialized_sigmask) {
1365 sigemptyset(&new_mask);
1366 sigaddset(&new_mask, grpc_wakeup_signal);
1367 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1368 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1369 g_initialized_sigmask = true;
1370 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1371 This is the mask used at all times *except during
1372 epoll_wait()*"
1373 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001374 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001375
Craig Tiller19196992016-06-27 18:45:56 -07001376 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001377 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001378 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001379
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001380 push_front_worker(pollset, &worker); /* Add worker to pollset */
1381
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001382 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1383 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001384 grpc_exec_ctx_flush(exec_ctx);
1385
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001386 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001387
1388 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1389 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001390 remove_worker(pollset, &worker);
1391 }
1392
1393 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1394 false at this point) and the pollset is shutting down, we may have to
1395 finish the shutdown process by calling finish_shutdown_locked().
1396 See pollset_shutdown() for more details.
1397
1398 Note: Continuing to access pollset here is safe; it is the caller's
1399 responsibility to not destroy a pollset when it has outstanding calls to
1400 pollset_work() */
1401 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1402 !pollset->finish_shutdown_called) {
1403 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1404 finish_shutdown_locked(exec_ctx, pollset);
1405
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001406 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001407 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001408 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001409 }
1410
Craig Tiller557c88c2017-04-05 17:20:18 -07001411 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001412
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001413 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1414 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001415
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001416 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001417
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001418 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1419 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001420}
1421
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001422static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001423 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001424 poll_obj_type item_type) {
1425 GPR_TIMER_BEGIN("add_poll_object", 0);
1426
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001427#ifdef PO_DEBUG
1428 GPR_ASSERT(item->obj_type == item_type);
1429 GPR_ASSERT(bag->obj_type == bag_type);
1430#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001431
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001432 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001433 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001434
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001435 gpr_mu_lock(&bag->mu);
1436 gpr_mu_lock(&item->mu);
1437
Craig Tiller7212c232016-07-06 13:11:09 -07001438retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001439 /*
1440 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1441 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1442 * a refcount of 2) and point item->pi and bag->pi to the new island
1443 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1444 * the other's non-NULL pi
1445 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1446 * polling islands and update item->pi and bag->pi to point to the new
1447 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001448 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001449
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001450 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1451 * orphaned */
1452 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1453 gpr_mu_unlock(&item->mu);
1454 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001455 return;
1456 }
1457
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001458 if (item->pi == bag->pi) {
1459 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001460 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001461 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001462
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001463 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1464 * we need to do some extra work to make TSAN happy */
1465 if (item_type == POLL_OBJ_FD) {
1466 /* Unlock before creating a new polling island: the polling island will
1467 create a workqueue which creates a file descriptor, and holding an fd
1468 lock here can eventually cause a loop to appear to TSAN (making it
1469 unhappy). We don't think it's a real loop (there's an epoch point
1470 where that loop possibility disappears), but the advantages of
1471 keeping TSAN happy outweigh any performance advantage we might have
1472 by keeping the lock held. */
1473 gpr_mu_unlock(&item->mu);
1474 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1475 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001476
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001477 /* Need to reverify any assumptions made between the initial lock and
1478 getting to this branch: if they've changed, we need to throw away our
1479 work and figure things out again. */
1480 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001481 GRPC_POLLING_TRACE(
1482 "add_poll_object: Raced creating new polling island. pi_new: %p "
1483 "(fd: %d, %s: %p)",
1484 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1485 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001486 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001487 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001488 polling_island_remove_all_fds_locked(pi_new, true, &error);
1489
1490 /* Ref and unref so that the polling island gets deleted during unref
1491 */
1492 PI_ADD_REF(pi_new, "dance_of_destruction");
1493 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1494 goto retry;
1495 }
Craig Tiller27da6422016-07-06 13:14:46 -07001496 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001497 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001498 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001499
1500 GRPC_POLLING_TRACE(
1501 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1502 "%s: %p)",
1503 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1504 poll_obj_string(bag_type), (void *)bag);
1505 } else {
1506 GRPC_POLLING_TRACE(
1507 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1508 (void *)pi_new, poll_obj_string(item_type),
1509 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001510 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001511 } else if (item->pi == NULL) {
1512 /* GPR_ASSERT(bag->pi != NULL) */
1513 /* Make pi_new point to latest pi*/
1514 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001515
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001516 if (item_type == POLL_OBJ_FD) {
1517 grpc_fd *fd = FD_FROM_PO(item);
1518 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1519 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001520
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001521 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001522 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001523 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1524 "bag(%s): %p)",
1525 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1526 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001527 } else if (bag->pi == NULL) {
1528 /* GPR_ASSERT(item->pi != NULL) */
1529 /* Make pi_new to point to latest pi */
1530 pi_new = polling_island_lock(item->pi);
1531 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001532 GRPC_POLLING_TRACE(
1533 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1534 "bag(%s): %p)",
1535 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1536 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001537 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001538 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001539 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001540 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1541 "bag(%s): %p)",
1542 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1543 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001544 }
1545
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001546 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1547 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001548
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001549 if (item->pi != pi_new) {
1550 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1551 if (item->pi != NULL) {
1552 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001553 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001554 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001555 }
1556
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001557 if (bag->pi != pi_new) {
1558 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1559 if (bag->pi != NULL) {
1560 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001561 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001562 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001563 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001564
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001565 gpr_mu_unlock(&item->mu);
1566 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001567
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001568 GRPC_LOG_IF_ERROR("add_poll_object", error);
1569 GPR_TIMER_END("add_poll_object", 0);
1570}
Craig Tiller57726ca2016-09-12 11:59:45 -07001571
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001572static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1573 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001574 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001575 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001576}
1577
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001578/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001579 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001580 */
1581
1582static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001583 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001584 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001585 pss->po.pi = NULL;
1586#ifdef PO_DEBUG
1587 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1588#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001589 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001590}
1591
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001592static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1593 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001594 gpr_mu_destroy(&pss->po.mu);
1595
1596 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001597 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001598 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001599
1600 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001601}
1602
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001603static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1604 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001605 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001606 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001607}
1608
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001609static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1610 grpc_fd *fd) {
1611 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001612}
1613
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001614static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001615 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001616 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001617 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001618}
1619
1620static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001621 grpc_pollset_set *pss, grpc_pollset *ps) {
1622 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001623}
1624
1625static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1626 grpc_pollset_set *bag,
1627 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001628 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001629 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001630}
1631
1632static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1633 grpc_pollset_set *bag,
1634 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001635 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001636}
1637
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001638/* Test helper functions
1639 * */
1640void *grpc_fd_get_polling_island(grpc_fd *fd) {
1641 polling_island *pi;
1642
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001643 gpr_mu_lock(&fd->po.mu);
1644 pi = fd->po.pi;
1645 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001646
1647 return pi;
1648}
1649
1650void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1651 polling_island *pi;
1652
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001653 gpr_mu_lock(&ps->po.mu);
1654 pi = ps->po.pi;
1655 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001656
1657 return pi;
1658}
1659
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001660bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001661 polling_island *p1 = p;
1662 polling_island *p2 = q;
1663
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001664 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1665 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001666 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001667 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001668
1669 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001670}
1671
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001672/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001673 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001674 */
1675
1676static void shutdown_engine(void) {
1677 fd_global_shutdown();
1678 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001679 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001680}
1681
1682static const grpc_event_engine_vtable vtable = {
1683 .pollset_size = sizeof(grpc_pollset),
1684
1685 .fd_create = fd_create,
1686 .fd_wrapped_fd = fd_wrapped_fd,
1687 .fd_orphan = fd_orphan,
1688 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001689 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001690 .fd_notify_on_read = fd_notify_on_read,
1691 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001692 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001693
1694 .pollset_init = pollset_init,
1695 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001696 .pollset_destroy = pollset_destroy,
1697 .pollset_work = pollset_work,
1698 .pollset_kick = pollset_kick,
1699 .pollset_add_fd = pollset_add_fd,
1700
1701 .pollset_set_create = pollset_set_create,
1702 .pollset_set_destroy = pollset_set_destroy,
1703 .pollset_set_add_pollset = pollset_set_add_pollset,
1704 .pollset_set_del_pollset = pollset_set_del_pollset,
1705 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1706 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1707 .pollset_set_add_fd = pollset_set_add_fd,
1708 .pollset_set_del_fd = pollset_set_del_fd,
1709
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001710 .shutdown_engine = shutdown_engine,
1711};
1712
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001713/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1714 * Create a dummy epoll_fd to make sure epoll support is available */
1715static bool is_epoll_available() {
1716 int fd = epoll_create1(EPOLL_CLOEXEC);
1717 if (fd < 0) {
1718 gpr_log(
1719 GPR_ERROR,
1720 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1721 fd);
1722 return false;
1723 }
1724 close(fd);
1725 return true;
1726}
1727
Craig Tillerf8382b82017-04-27 15:09:48 -07001728const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1729 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001730 /* If use of signals is disabled, we cannot use epoll engine*/
1731 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1732 return NULL;
1733 }
1734
Ken Paysoncd7d0472016-10-11 12:24:20 -07001735 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001736 return NULL;
1737 }
1738
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001739 if (!is_epoll_available()) {
1740 return NULL;
1741 }
1742
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001743 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001744 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1745 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001746 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001747 grpc_use_signal(SIGRTMIN + 6);
1748 } else {
1749 return NULL;
1750 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001751 }
1752
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001753 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001754
1755 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1756 return NULL;
1757 }
1758
1759 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1760 polling_island_global_init())) {
1761 return NULL;
1762 }
1763
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001764 return &vtable;
1765}
1766
murgatroid99623dd4f2016-08-08 17:31:27 -07001767#else /* defined(GRPC_LINUX_EPOLL) */
1768#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001769#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001770/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001771 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001772const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1773 bool explicit_request) {
1774 return NULL;
1775}
murgatroid99623dd4f2016-08-08 17:31:27 -07001776#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001777
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001778void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001779#endif /* !defined(GRPC_LINUX_EPOLL) */