blob: 7406036ca64b9064c92414367868ad91aca2584e [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070021/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070022#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070023
Craig Tiller4509c472017-04-27 19:05:13 +000024#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
26#include <assert.h>
27#include <errno.h>
28#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070029#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030#include <signal.h>
31#include <string.h>
32#include <sys/epoll.h>
33#include <sys/socket.h>
34#include <unistd.h>
35
36#include <grpc/support/alloc.h>
37#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
42#include "src/core/lib/iomgr/ev_posix.h"
43#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070044#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070045#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070046#include "src/core/lib/iomgr/wakeup_fd_posix.h"
47#include "src/core/lib/profiling/timers.h"
48#include "src/core/lib/support/block_annotate.h"
49
Craig Tillere24b24d2017-04-06 16:05:45 -070050#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
51
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070052#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070053 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070054 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070055 }
56
Sree Kuchibhotla82d73412017-02-09 18:27:45 -080057/* Uncomment the following to enable extra checks on poll_object operations */
Sree Kuchibhotlae6f516e2016-12-08 12:20:23 -080058/* #define PO_DEBUG */
59
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070060static int grpc_wakeup_signal = -1;
61static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070062
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070063/* Implements the function defined in grpc_posix.h. This function might be
64 * called before even calling grpc_init() to set either a different signal to
65 * use. If signum == -1, then the use of signals is disabled */
66void grpc_use_signal(int signum) {
67 grpc_wakeup_signal = signum;
68 is_grpc_wakeup_signal_initialized = true;
69
70 if (grpc_wakeup_signal < 0) {
71 gpr_log(GPR_INFO,
72 "Use of signals is disabled. Epoll engine will not be used");
73 } else {
74 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
75 grpc_wakeup_signal);
76 }
77}
78
79struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070080
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080081typedef enum {
82 POLL_OBJ_FD,
83 POLL_OBJ_POLLSET,
84 POLL_OBJ_POLLSET_SET
85} poll_obj_type;
86
87typedef struct poll_obj {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080088#ifdef PO_DEBUG
89 poll_obj_type obj_type;
90#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080091 gpr_mu mu;
92 struct polling_island *pi;
93} poll_obj;
94
95const char *poll_obj_string(poll_obj_type po_type) {
96 switch (po_type) {
97 case POLL_OBJ_FD:
98 return "fd";
99 case POLL_OBJ_POLLSET:
100 return "pollset";
101 case POLL_OBJ_POLLSET_SET:
102 return "pollset_set";
103 }
104
105 GPR_UNREACHABLE_CODE(return "UNKNOWN");
106}
107
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700108/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700109 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700110 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800111
112#define FD_FROM_PO(po) ((grpc_fd *)(po))
113
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700114struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115 poll_obj po;
116
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700117 int fd;
118 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700119 bit 0 : 1=Active / 0=Orphaned
120 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700121 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700122 gpr_atm refst;
123
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800124 /* The fd is either closed or we relinquished control of it. In either
125 cases, this indicates that the 'fd' on this structure is no longer
126 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700127 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700128
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800129 gpr_atm read_closure;
130 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700131
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133 grpc_closure *on_done_closure;
134
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800135 /* The pollset that last noticed that the fd is readable. The actual type
136 * stored in this is (grpc_pollset *) */
137 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700138
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700139 grpc_iomgr_object iomgr_object;
140};
141
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700142/* Reference counting for fds */
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700143// #define GRPC_FD_REF_COUNT_DEBUG
ncteisend39010e2017-06-08 17:08:07 -0700144#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700145static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
146static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
147 int line);
148#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
149#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
150#else
151static void fd_ref(grpc_fd *fd);
152static void fd_unref(grpc_fd *fd);
153#define GRPC_FD_REF(fd, reason) fd_ref(fd)
154#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
155#endif
156
157static void fd_global_init(void);
158static void fd_global_shutdown(void);
159
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700162 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700163
ncteisena1354852017-06-08 16:25:53 -0700164#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700166#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700167#define PI_UNREF(exec_ctx, p, r) \
168 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisend39010e2017-06-08 17:08:07 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700173#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
206 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700218 struct grpc_pollset_worker *next;
219 struct grpc_pollset_worker *prev;
220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
230 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerf975f742016-07-01 14:56:27 -0700244static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700245 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700269static __thread polling_island *g_current_thread_polling_island;
270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700272static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerb39307d2016-06-30 15:39:13 -0700285static void pi_add_ref(polling_island *pi);
286static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700289static void pi_add_ref_dbg(polling_island *pi, const char *reason,
290 const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700291 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700292 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
293 gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
294 " (%s) - (%s, %d)",
295 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700296 }
297 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700298}
299
Craig Tillerb39307d2016-06-30 15:39:13 -0700300static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700301 const char *reason, const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700302 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700303 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
304 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
305 " (%s) - (%s, %d)",
306 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700307 }
308 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700309}
310#endif
311
Craig Tiller15007612016-07-06 09:36:16 -0700312static void pi_add_ref(polling_island *pi) {
313 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
314}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700315
Craig Tillerb39307d2016-06-30 15:39:13 -0700316static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700317 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700318 Note that this deletion not be done under a lock. Once the ref count goes
319 to zero, we are guaranteed that no one else holds a reference to the
320 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700321
322 Also, if we are deleting the polling island and the merged_to field is
323 non-empty, we should remove a ref to the merged_to polling island
324 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700325 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
326 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
327 polling_island_delete(exec_ctx, pi);
328 if (next != NULL) {
329 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700330 }
331 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700332}
333
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700334/* The caller is expected to hold pi->mu lock before calling this function */
335static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700336 size_t fd_count, bool add_fd_refs,
337 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700338 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700339 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700340 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700341 char *err_msg;
342 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700343
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700344#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700345 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700346 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700347#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700348
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700349 for (i = 0; i < fd_count; i++) {
350 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
351 ev.data.ptr = fds[i];
352 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700353
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700354 if (err < 0) {
355 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700356 gpr_asprintf(
357 &err_msg,
358 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
359 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
360 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
361 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700362 }
363
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700364 continue;
365 }
366
367 if (pi->fd_cnt == pi->fd_capacity) {
368 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
369 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
370 }
371
372 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700373 if (add_fd_refs) {
374 GRPC_FD_REF(fds[i], "polling_island");
375 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700376 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700377}
378
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700379/* The caller is expected to hold pi->mu before calling this */
380static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700381 grpc_wakeup_fd *wakeup_fd,
382 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700383 struct epoll_event ev;
384 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700385 char *err_msg;
386 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700387
388 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
389 ev.data.ptr = wakeup_fd;
390 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
391 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700392 if (err < 0 && errno != EEXIST) {
393 gpr_asprintf(&err_msg,
394 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
395 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700396 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
397 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700398 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
399 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700400 }
401}
402
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700403/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700404static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700405 bool remove_fd_refs,
406 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700407 int err;
408 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700409 char *err_msg;
410 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700411
412 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700413 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700414 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700415 gpr_asprintf(&err_msg,
416 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
417 "error: %d (%s)",
418 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
419 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
420 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700421 }
422
423 if (remove_fd_refs) {
424 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700425 }
426 }
427
428 pi->fd_cnt = 0;
429}
430
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700431/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700432static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700433 bool is_fd_closed,
434 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700435 int err;
436 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700437 char *err_msg;
438 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700439
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700440 /* If fd is already closed, then it would have been automatically been removed
441 from the epoll set */
442 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700443 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
444 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700445 gpr_asprintf(
446 &err_msg,
447 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
448 pi->epoll_fd, fd->fd, errno, strerror(errno));
449 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
450 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700451 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700452 }
453
454 for (i = 0; i < pi->fd_cnt; i++) {
455 if (pi->fds[i] == fd) {
456 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700457 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700458 break;
459 }
460 }
461}
462
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700463/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700464static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
465 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700466 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700467 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700468 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700469
Craig Tillerb39307d2016-06-30 15:39:13 -0700470 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700471
Craig Tillerb39307d2016-06-30 15:39:13 -0700472 pi = gpr_malloc(sizeof(*pi));
473 gpr_mu_init(&pi->mu);
474 pi->fd_cnt = 0;
475 pi->fd_capacity = 0;
476 pi->fds = NULL;
477 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700478
Craig Tiller15007612016-07-06 09:36:16 -0700479 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700480 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700481 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700482
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700483 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700484
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700485 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700486 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
487 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700488 }
489
Craig Tillerb39307d2016-06-30 15:39:13 -0700490 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700491 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700492 }
493
Craig Tillerb39307d2016-06-30 15:39:13 -0700494done:
495 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700496 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700497 pi = NULL;
498 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700499 return pi;
500}
501
Craig Tillerb39307d2016-06-30 15:39:13 -0700502static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700503 GPR_ASSERT(pi->fd_cnt == 0);
504
Craig Tiller0a06cd72016-07-14 13:21:24 -0700505 if (pi->epoll_fd >= 0) {
506 close(pi->epoll_fd);
507 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700508 gpr_mu_destroy(&pi->mu);
509 gpr_free(pi->fds);
510 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700511}
512
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700513/* Attempts to gets the last polling island in the linked list (liked by the
514 * 'merged_to' field). Since this does not lock the polling island, there are no
515 * guarantees that the island returned is the last island */
516static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
517 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
518 while (next != NULL) {
519 pi = next;
520 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
521 }
522
523 return pi;
524}
525
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700526/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700527 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700528 returned polling island's mu.
529 Usage: To lock/unlock polling island "pi", do the following:
530 polling_island *pi_latest = polling_island_lock(pi);
531 ...
532 ... critical section ..
533 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700534 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
535static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700536 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700537
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700538 while (true) {
539 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
540 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700541 /* Looks like 'pi' is the last node in the linked list but unless we check
542 this by holding the pi->mu lock, we cannot be sure (i.e without the
543 pi->mu lock, we don't prevent island merges).
544 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700545 gpr_mu_lock(&pi->mu);
546 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
547 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700548 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700549 break;
550 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700551
552 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
553 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700554 gpr_mu_unlock(&pi->mu);
555 }
556
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700557 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700558 }
559
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700560 return pi;
561}
562
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700563/* Gets the lock on the *latest* polling islands in the linked lists pointed by
564 *p and *q (and also updates *p and *q to point to the latest polling islands)
565
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700566 This function is needed because calling the following block of code to obtain
567 locks on polling islands (*p and *q) is prone to deadlocks.
568 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700569 polling_island_lock(*p, true);
570 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700571 }
572
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700573 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700574 polling_island *p1;
575 polling_island *p2;
576 ..
577 polling_island_lock_pair(&p1, &p2);
578 ..
579 .. Critical section with both p1 and p2 locked
580 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700581 // Release locks: Always call polling_island_unlock_pair() to release locks
582 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700583*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700584static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700585 polling_island *pi_1 = *p;
586 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700587 polling_island *next_1 = NULL;
588 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700589
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700590 /* The algorithm is simple:
591 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
592 keep updating pi_1 and pi_2)
593 - Then obtain locks on the islands by following a lock order rule of
594 locking polling_island with lower address first
595 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
596 pointing to the same island. If that is the case, we can just call
597 polling_island_lock()
598 - After obtaining both the locks, double check that the polling islands
599 are still the last polling islands in their respective linked lists
600 (this is because there might have been polling island merges before
601 we got the lock)
602 - If the polling islands are the last islands, we are done. If not,
603 release the locks and continue the process from the first step */
604 while (true) {
605 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
606 while (next_1 != NULL) {
607 pi_1 = next_1;
608 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700609 }
610
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700611 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
612 while (next_2 != NULL) {
613 pi_2 = next_2;
614 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
615 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700616
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700617 if (pi_1 == pi_2) {
618 pi_1 = pi_2 = polling_island_lock(pi_1);
619 break;
620 }
621
622 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700623 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700624 gpr_mu_lock(&pi_2->mu);
625 } else {
626 gpr_mu_lock(&pi_2->mu);
627 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700628 }
629
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700630 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
631 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
632 if (next_1 == NULL && next_2 == NULL) {
633 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700634 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700635
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700636 gpr_mu_unlock(&pi_1->mu);
637 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700638 }
639
640 *p = pi_1;
641 *q = pi_2;
642}
643
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700644static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
645 if (p == q) {
646 gpr_mu_unlock(&p->mu);
647 } else {
648 gpr_mu_unlock(&p->mu);
649 gpr_mu_unlock(&q->mu);
650 }
651}
652
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700653static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700654 polling_island *q,
655 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700656 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700657 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700658
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700659 if (p != q) {
660 /* Make sure that p points to the polling island with fewer fds than q */
661 if (p->fd_cnt > q->fd_cnt) {
662 GPR_SWAP(polling_island *, p, q);
663 }
664
665 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
666 Note that the refcounts on the fds being moved will not change here.
667 This is why the last param in the following two functions is 'false') */
668 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
669 polling_island_remove_all_fds_locked(p, false, error);
670
671 /* Wakeup all the pollers (if any) on p so that they pickup this change */
672 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
673
674 /* Add the 'merged_to' link from p --> q */
675 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
676 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700677 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700678 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700679
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700680 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700681
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700682 /* Return the merged polling island (Note that no merge would have happened
683 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700684 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700685}
686
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700687static grpc_error *polling_island_global_init() {
688 grpc_error *error = GRPC_ERROR_NONE;
689
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700690 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
691 if (error == GRPC_ERROR_NONE) {
692 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
693 }
694
695 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700696}
697
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700698static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700699 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700700}
701
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700702/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700703 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700704 */
705
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700706/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700707 * but instead so that implementations with multiple threads in (for example)
708 * epoll_wait deal with the race between pollset removal and incoming poll
709 * notifications.
710 *
711 * The problem is that the poller ultimately holds a reference to this
712 * object, so it is very difficult to know when is safe to free it, at least
713 * without some expensive synchronization.
714 *
715 * If we keep the object freelisted, in the worst case losing this race just
716 * becomes a spurious read notification on a reused fd.
717 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700718
719/* The alarm system needs to be able to wakeup 'some poller' sometimes
720 * (specifically when a new alarm needs to be triggered earlier than the next
721 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
722 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700723
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700724static grpc_fd *fd_freelist = NULL;
725static gpr_mu fd_freelist_mu;
726
ncteisend39010e2017-06-08 17:08:07 -0700727#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700728#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
729#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
730static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
731 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700732 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700733 gpr_log(GPR_DEBUG,
734 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700735 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700736 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
737 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700738#else
739#define REF_BY(fd, n, reason) ref_by(fd, n)
740#define UNREF_BY(fd, n, reason) unref_by(fd, n)
741static void ref_by(grpc_fd *fd, int n) {
742#endif
743 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
744}
745
ncteisend39010e2017-06-08 17:08:07 -0700746#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700747static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
748 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700749 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700750 gpr_log(GPR_DEBUG,
751 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700752 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700753 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
754 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700755#else
756static void unref_by(grpc_fd *fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700757#endif
ncteisend39010e2017-06-08 17:08:07 -0700758 gpr_atm old;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700759 old = gpr_atm_full_fetch_add(&fd->refst, -n);
760 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700761 /* Add the fd to the freelist */
762 gpr_mu_lock(&fd_freelist_mu);
763 fd->freelist_next = fd_freelist;
764 fd_freelist = fd;
765 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800766
Craig Tiller376887d2017-04-06 08:27:03 -0700767 grpc_lfev_destroy(&fd->read_closure);
768 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700769
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700770 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700771 } else {
772 GPR_ASSERT(old > n);
773 }
774}
775
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700776/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700777#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700778static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
779 int line) {
780 ref_by(fd, 2, reason, file, line);
781}
782
783static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
784 int line) {
785 unref_by(fd, 2, reason, file, line);
786}
787#else
788static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700789static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
790#endif
791
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700792static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
793
794static void fd_global_shutdown(void) {
795 gpr_mu_lock(&fd_freelist_mu);
796 gpr_mu_unlock(&fd_freelist_mu);
797 while (fd_freelist != NULL) {
798 grpc_fd *fd = fd_freelist;
799 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800800 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700801 gpr_free(fd);
802 }
803 gpr_mu_destroy(&fd_freelist_mu);
804}
805
806static grpc_fd *fd_create(int fd, const char *name) {
807 grpc_fd *new_fd = NULL;
808
809 gpr_mu_lock(&fd_freelist_mu);
810 if (fd_freelist != NULL) {
811 new_fd = fd_freelist;
812 fd_freelist = fd_freelist->freelist_next;
813 }
814 gpr_mu_unlock(&fd_freelist_mu);
815
816 if (new_fd == NULL) {
817 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800818 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700819 }
820
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800821 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
822 * is a newly created fd (or an fd we got from the freelist), no one else
823 * would be holding a lock to it anyway. */
824 gpr_mu_lock(&new_fd->po.mu);
825 new_fd->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800826#ifdef PO_DEBUG
827 new_fd->po.obj_type = POLL_OBJ_FD;
828#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700829
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700830 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700831 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700832 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700833 grpc_lfev_init(&new_fd->read_closure);
834 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800835 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800836
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700837 new_fd->freelist_next = NULL;
838 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700839
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800840 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841
842 char *fd_name;
843 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
844 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
ncteisend39010e2017-06-08 17:08:07 -0700845#ifndef NDEBUG
ncteisen3ac64f82017-06-19 17:35:44 -0700846 gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700847#endif
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700848 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700849 return new_fd;
850}
851
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700852static int fd_wrapped_fd(grpc_fd *fd) {
853 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800854 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700855 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700856 ret_fd = fd->fd;
857 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800858 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700859
860 return ret_fd;
861}
862
863static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
864 grpc_closure *on_done, int *release_fd,
865 const char *reason) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700866 bool is_fd_closed = false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700867 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700868 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700869
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800870 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700871 fd->on_done_closure = on_done;
872
873 /* If release_fd is not NULL, we should be relinquishing control of the file
874 descriptor fd->fd (but we still own the grpc_fd structure). */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700875 if (release_fd != NULL) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700876 *release_fd = fd->fd;
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700877 } else {
878 close(fd->fd);
879 is_fd_closed = true;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700880 }
881
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700882 fd->orphaned = true;
883
884 /* Remove the active status but keep referenced. We want this grpc_fd struct
885 to be alive (and not added to freelist) until the end of this function */
886 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700887
888 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700889 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800890 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700891 would actually contain the fd
892 - Remove the fd from the latest polling island
893 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800894 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700895 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800896 if (fd->po.pi != NULL) {
897 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700898 polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700899 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700900
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800901 unref_pi = fd->po.pi;
902 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700903 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700904
ncteisen969b46e2017-06-08 14:57:11 -0700905 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700906
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800907 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700908 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700909 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700910 /* Unref stale polling island here, outside the fd lock above.
911 The polling island owns a workqueue which owns an fd, and unreffing
912 inside the lock can cause an eventual lock loop that makes TSAN very
913 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700914 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
915 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700916 if (error != GRPC_ERROR_NONE) {
917 const char *msg = grpc_error_string(error);
918 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
919 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700920 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700921}
922
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700923static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
924 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800925 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800926 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700927}
928
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700929static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700930 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700931}
932
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700933/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800934static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700935 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
936 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700937 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700938 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700939 }
Craig Tiller376887d2017-04-06 08:27:03 -0700940 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700941}
942
943static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
944 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700945 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700946}
947
948static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
949 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700950 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700951}
952
953/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700954 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700955 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700956GPR_TLS_DECL(g_current_thread_pollset);
957GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700958static __thread bool g_initialized_sigmask;
959static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700960
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700961static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700962#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700963 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700964#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700965}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700966
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700967static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700968
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700969/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700970static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700971 gpr_tls_init(&g_current_thread_pollset);
972 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700973 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700974 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700975}
976
977static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700978 gpr_tls_destroy(&g_current_thread_pollset);
979 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700980}
981
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700982static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
983 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700984
985 /* Kick the worker only if it was not already kicked */
986 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
987 GRPC_POLLING_TRACE(
988 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700989 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700990 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
991 if (err_num != 0) {
992 err = GRPC_OS_ERROR(err_num, "pthread_kill");
993 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700994 }
995 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700996}
997
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700998/* Return 1 if the pollset has active threads in pollset_work (pollset must
999 * be locked) */
1000static int pollset_has_workers(grpc_pollset *p) {
1001 return p->root_worker.next != &p->root_worker;
1002}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001003
1004static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1005 worker->prev->next = worker->next;
1006 worker->next->prev = worker->prev;
1007}
1008
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001009static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1010 if (pollset_has_workers(p)) {
1011 grpc_pollset_worker *w = p->root_worker.next;
1012 remove_worker(p, w);
1013 return w;
1014 } else {
1015 return NULL;
1016 }
1017}
1018
1019static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1020 worker->next = &p->root_worker;
1021 worker->prev = worker->next->prev;
1022 worker->prev->next = worker->next->prev = worker;
1023}
1024
1025static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1026 worker->prev = &p->root_worker;
1027 worker->next = worker->prev->next;
1028 worker->prev->next = worker->next->prev = worker;
1029}
1030
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001031/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001032static grpc_error *pollset_kick(grpc_pollset *p,
1033 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001035 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001036 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001037 grpc_pollset_worker *worker = specific_worker;
1038 if (worker != NULL) {
1039 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001041 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001042 for (worker = p->root_worker.next; worker != &p->root_worker;
1043 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001044 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001045 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001046 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001047 }
Craig Tillera218a062016-06-26 09:58:37 -07001048 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001049 } else {
1050 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001051 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001052 } else {
1053 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001055 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001056 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001057 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001058 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1059 /* Since worker == NULL, it means that we can kick "any" worker on this
1060 pollset 'p'. If 'p' happens to be the same pollset this thread is
1061 currently polling (i.e in pollset_work() function), then there is no need
1062 to kick any other worker since the current thread can just absorb the
1063 kick. This is the reason why we enter this case only when
1064 g_current_thread_pollset is != p */
1065
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001066 GPR_TIMER_MARK("kick_anonymous", 0);
1067 worker = pop_front_worker(p);
1068 if (worker != NULL) {
1069 GPR_TIMER_MARK("finally_kick", 0);
1070 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001071 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001072 } else {
1073 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001074 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001075 }
1076 }
1077
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001078 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001079 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1080 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001081}
1082
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001083static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001084 gpr_mu_init(&pollset->po.mu);
1085 *mu = &pollset->po.mu;
1086 pollset->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001087#ifdef PO_DEBUG
1088 pollset->po.obj_type = POLL_OBJ_POLLSET;
1089#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001091 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001092 pollset->kicked_without_pollers = false;
1093
1094 pollset->shutting_down = false;
1095 pollset->finish_shutdown_called = false;
1096 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001097}
1098
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001099/* Convert a timespec to milliseconds:
1100 - Very small or negative poll times are clamped to zero to do a non-blocking
1101 poll (which becomes spin polling)
1102 - Other small values are rounded up to one millisecond
1103 - Longer than a millisecond polls are rounded up to the next nearest
1104 millisecond to avoid spinning
1105 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001106static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1107 gpr_timespec now) {
1108 gpr_timespec timeout;
1109 static const int64_t max_spin_polling_us = 10;
1110 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1111 return -1;
1112 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001113
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001114 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1115 max_spin_polling_us,
1116 GPR_TIMESPAN))) <= 0) {
1117 return 0;
1118 }
1119 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001120 int millis = gpr_time_to_millis(gpr_time_add(
1121 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1122 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001123}
1124
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001125static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1126 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001127 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001128
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001129 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001130 different 'notifier's when an fd becomes readable and it is in two epoll
1131 sets (This can happen briefly during polling island merges). In such cases
1132 it does not really matter which notifer is set as the read_notifier_pollset
1133 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001134 /* Use release store to match with acquire load in fd_get_read_notifier */
1135 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001136}
1137
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001138static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001139 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001140}
1141
Craig Tillerb39307d2016-06-30 15:39:13 -07001142static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1143 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001144 if (ps->po.pi != NULL) {
1145 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001146 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001147 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001148}
1149
1150static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1151 grpc_pollset *pollset) {
1152 /* The pollset cannot have any workers if we are at this stage */
1153 GPR_ASSERT(!pollset_has_workers(pollset));
1154
1155 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001156
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001157 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001158 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001159 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001160}
1161
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001162/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001163static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1164 grpc_closure *closure) {
1165 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1166 GPR_ASSERT(!pollset->shutting_down);
1167 pollset->shutting_down = true;
1168 pollset->shutdown_done = closure;
1169 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1170
1171 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1172 because it would release the underlying polling island. In such a case, we
1173 let the last worker call finish_shutdown_locked() from pollset_work() */
1174 if (!pollset_has_workers(pollset)) {
1175 GPR_ASSERT(!pollset->finish_shutdown_called);
1176 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1177 finish_shutdown_locked(exec_ctx, pollset);
1178 }
1179 GPR_TIMER_END("pollset_shutdown", 0);
1180}
1181
1182/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1183 * than destroying the mutexes, there is nothing special that needs to be done
1184 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001185static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001186 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001187 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001188}
1189
Craig Tiller84ea3412016-09-08 14:57:56 -07001190#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001191/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1192static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001193 grpc_pollset *pollset,
1194 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001195 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001196 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001197 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001198 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001199 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001200 char *err_msg;
1201 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001202 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1203
1204 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001205 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001206
1207 Since epoll_fd is immutable, we can read it without obtaining the polling
1208 island lock. There is however a possibility that the polling island (from
1209 which we got the epoll_fd) got merged with another island while we are
1210 in this function. This is still okay because in such a case, we will wakeup
1211 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001212 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001213
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001214 if (pollset->po.pi == NULL) {
1215 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1216 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001217 GPR_TIMER_END("pollset_work_and_unlock", 0);
1218 return; /* Fatal error. We cannot continue */
1219 }
1220
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001221 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001222 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001223 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001224 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001225
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001226 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001227 epoll_fd = pi->epoll_fd;
1228
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001229 /* Update the pollset->po.pi since the island being pointed by
1230 pollset->po.pi maybe older than the one pointed by pi) */
1231 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001232 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1233 polling island to be deleted */
1234 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001235 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1236 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001237 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001238
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001239 /* Add an extra ref so that the island does not get destroyed (which means
1240 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1241 epoll_fd */
1242 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001243 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001244
Craig Tiller61f96c12017-05-12 13:36:39 -07001245 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1246 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001247
Craig Tiller61f96c12017-05-12 13:36:39 -07001248 GRPC_SCHEDULING_START_BLOCKING_REGION;
1249 ep_rv =
1250 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1251 GRPC_SCHEDULING_END_BLOCKING_REGION;
1252 if (ep_rv < 0) {
1253 if (errno != EINTR) {
1254 gpr_asprintf(&err_msg,
1255 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1256 epoll_fd, errno, strerror(errno));
1257 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1258 } else {
1259 /* We were interrupted. Save an interation by doing a zero timeout
1260 epoll_wait to see if there are any other events of interest */
1261 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1262 (void *)pollset, (void *)worker);
1263 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001264 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001265 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001266
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001267#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001268 /* See the definition of g_poll_sync for more details */
1269 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001270#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001271
Craig Tiller61f96c12017-05-12 13:36:39 -07001272 for (int i = 0; i < ep_rv; ++i) {
1273 void *data_ptr = ep_ev[i].data.ptr;
1274 if (data_ptr == &polling_island_wakeup_fd) {
1275 GRPC_POLLING_TRACE(
1276 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1277 "%d) got merged",
1278 (void *)pollset, (void *)worker, epoll_fd);
1279 /* This means that our polling island is merged with a different
1280 island. We do not have to do anything here since the subsequent call
1281 to the function pollset_work_and_unlock() will pick up the correct
1282 epoll_fd */
1283 } else {
1284 grpc_fd *fd = data_ptr;
1285 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1286 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1287 int write_ev = ep_ev[i].events & EPOLLOUT;
1288 if (read_ev || cancel) {
1289 fd_become_readable(exec_ctx, fd, pollset);
1290 }
1291 if (write_ev || cancel) {
1292 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001293 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001294 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001295 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001296
Craig Tiller61f96c12017-05-12 13:36:39 -07001297 g_current_thread_polling_island = NULL;
1298 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1299
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001300 GPR_ASSERT(pi != NULL);
1301
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001302 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001303 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001304 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001305 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001306 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001307 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001308
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001309 GPR_TIMER_END("pollset_work_and_unlock", 0);
1310}
1311
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001312/* pollset->po.mu lock must be held by the caller before calling this.
1313 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001314 during the course of its execution but it will always re-acquire the lock and
1315 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001316static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1317 grpc_pollset_worker **worker_hdl,
1318 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001319 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001320 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001321 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1322
1323 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001324
1325 grpc_pollset_worker worker;
1326 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001327 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001328 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001329
Craig Tiller557c88c2017-04-05 17:20:18 -07001330 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001331
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001332 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1333 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001334
1335 if (pollset->kicked_without_pollers) {
1336 /* If the pollset was kicked without pollers, pretend that the current
1337 worker got the kick and skip polling. A kick indicates that there is some
1338 work that needs attention like an event on the completion queue or an
1339 alarm */
1340 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1341 pollset->kicked_without_pollers = 0;
1342 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001343 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001344 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1345 worker that there is some pending work that needs immediate attention
1346 (like an event on the completion queue, or a polling island merge that
1347 results in a new epoll-fd to wait on) and that the worker should not
1348 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001349
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001350 A worker can be kicked anytime from the point it is added to the pollset
1351 via push_front_worker() (or push_back_worker()) to the point it is
1352 removed via remove_worker().
1353 If the worker is kicked before/during it calls epoll_pwait(), it should
1354 immediately exit from epoll_wait(). If the worker is kicked after it
1355 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001356
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001357 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001358 times *except* when it is in epoll_pwait(). This way, the worker never
1359 misses acting on a kick */
1360
Craig Tiller19196992016-06-27 18:45:56 -07001361 if (!g_initialized_sigmask) {
1362 sigemptyset(&new_mask);
1363 sigaddset(&new_mask, grpc_wakeup_signal);
1364 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1365 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1366 g_initialized_sigmask = true;
1367 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1368 This is the mask used at all times *except during
1369 epoll_wait()*"
1370 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001371 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001372
Craig Tiller19196992016-06-27 18:45:56 -07001373 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001374 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001375 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001376
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001377 push_front_worker(pollset, &worker); /* Add worker to pollset */
1378
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001379 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1380 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001381 grpc_exec_ctx_flush(exec_ctx);
1382
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001383 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001384
1385 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1386 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001387 remove_worker(pollset, &worker);
1388 }
1389
1390 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1391 false at this point) and the pollset is shutting down, we may have to
1392 finish the shutdown process by calling finish_shutdown_locked().
1393 See pollset_shutdown() for more details.
1394
1395 Note: Continuing to access pollset here is safe; it is the caller's
1396 responsibility to not destroy a pollset when it has outstanding calls to
1397 pollset_work() */
1398 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1399 !pollset->finish_shutdown_called) {
1400 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1401 finish_shutdown_locked(exec_ctx, pollset);
1402
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001403 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001404 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001405 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001406 }
1407
Craig Tiller557c88c2017-04-05 17:20:18 -07001408 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001409
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001410 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1411 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001412
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001413 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001414
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001415 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1416 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001417}
1418
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001419static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001420 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001421 poll_obj_type item_type) {
1422 GPR_TIMER_BEGIN("add_poll_object", 0);
1423
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001424#ifdef PO_DEBUG
1425 GPR_ASSERT(item->obj_type == item_type);
1426 GPR_ASSERT(bag->obj_type == bag_type);
1427#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001428
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001429 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001430 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001431
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001432 gpr_mu_lock(&bag->mu);
1433 gpr_mu_lock(&item->mu);
1434
Craig Tiller7212c232016-07-06 13:11:09 -07001435retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001436 /*
1437 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1438 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1439 * a refcount of 2) and point item->pi and bag->pi to the new island
1440 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1441 * the other's non-NULL pi
1442 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1443 * polling islands and update item->pi and bag->pi to point to the new
1444 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001445 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001446
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001447 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1448 * orphaned */
1449 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1450 gpr_mu_unlock(&item->mu);
1451 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001452 return;
1453 }
1454
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001455 if (item->pi == bag->pi) {
1456 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001457 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001458 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001459
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001460 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1461 * we need to do some extra work to make TSAN happy */
1462 if (item_type == POLL_OBJ_FD) {
1463 /* Unlock before creating a new polling island: the polling island will
1464 create a workqueue which creates a file descriptor, and holding an fd
1465 lock here can eventually cause a loop to appear to TSAN (making it
1466 unhappy). We don't think it's a real loop (there's an epoch point
1467 where that loop possibility disappears), but the advantages of
1468 keeping TSAN happy outweigh any performance advantage we might have
1469 by keeping the lock held. */
1470 gpr_mu_unlock(&item->mu);
1471 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1472 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001473
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001474 /* Need to reverify any assumptions made between the initial lock and
1475 getting to this branch: if they've changed, we need to throw away our
1476 work and figure things out again. */
1477 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001478 GRPC_POLLING_TRACE(
1479 "add_poll_object: Raced creating new polling island. pi_new: %p "
1480 "(fd: %d, %s: %p)",
1481 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1482 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001483 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001484 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001485 polling_island_remove_all_fds_locked(pi_new, true, &error);
1486
1487 /* Ref and unref so that the polling island gets deleted during unref
1488 */
1489 PI_ADD_REF(pi_new, "dance_of_destruction");
1490 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1491 goto retry;
1492 }
Craig Tiller27da6422016-07-06 13:14:46 -07001493 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001494 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001495 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001496
1497 GRPC_POLLING_TRACE(
1498 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1499 "%s: %p)",
1500 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1501 poll_obj_string(bag_type), (void *)bag);
1502 } else {
1503 GRPC_POLLING_TRACE(
1504 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1505 (void *)pi_new, poll_obj_string(item_type),
1506 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001507 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001508 } else if (item->pi == NULL) {
1509 /* GPR_ASSERT(bag->pi != NULL) */
1510 /* Make pi_new point to latest pi*/
1511 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001512
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001513 if (item_type == POLL_OBJ_FD) {
1514 grpc_fd *fd = FD_FROM_PO(item);
1515 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1516 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001517
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001518 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001519 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001520 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1521 "bag(%s): %p)",
1522 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1523 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001524 } else if (bag->pi == NULL) {
1525 /* GPR_ASSERT(item->pi != NULL) */
1526 /* Make pi_new to point to latest pi */
1527 pi_new = polling_island_lock(item->pi);
1528 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001529 GRPC_POLLING_TRACE(
1530 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1531 "bag(%s): %p)",
1532 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1533 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001534 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001535 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001536 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001537 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1538 "bag(%s): %p)",
1539 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1540 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001541 }
1542
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001543 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1544 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001545
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001546 if (item->pi != pi_new) {
1547 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1548 if (item->pi != NULL) {
1549 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001550 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001551 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001552 }
1553
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001554 if (bag->pi != pi_new) {
1555 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1556 if (bag->pi != NULL) {
1557 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001558 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001559 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001560 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001561
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001562 gpr_mu_unlock(&item->mu);
1563 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001564
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001565 GRPC_LOG_IF_ERROR("add_poll_object", error);
1566 GPR_TIMER_END("add_poll_object", 0);
1567}
Craig Tiller57726ca2016-09-12 11:59:45 -07001568
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001569static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1570 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001571 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001572 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001573}
1574
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001575/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001576 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001577 */
1578
1579static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001580 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001581 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001582 pss->po.pi = NULL;
1583#ifdef PO_DEBUG
1584 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1585#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001586 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001587}
1588
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001589static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1590 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001591 gpr_mu_destroy(&pss->po.mu);
1592
1593 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001594 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001595 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001596
1597 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001598}
1599
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001600static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1601 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001602 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001603 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001604}
1605
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001606static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1607 grpc_fd *fd) {
1608 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001609}
1610
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001611static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001612 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001613 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001614 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001615}
1616
1617static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001618 grpc_pollset_set *pss, grpc_pollset *ps) {
1619 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001620}
1621
1622static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1623 grpc_pollset_set *bag,
1624 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001625 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001626 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001627}
1628
1629static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1630 grpc_pollset_set *bag,
1631 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001632 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001633}
1634
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001635/* Test helper functions
1636 * */
1637void *grpc_fd_get_polling_island(grpc_fd *fd) {
1638 polling_island *pi;
1639
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001640 gpr_mu_lock(&fd->po.mu);
1641 pi = fd->po.pi;
1642 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001643
1644 return pi;
1645}
1646
1647void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1648 polling_island *pi;
1649
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001650 gpr_mu_lock(&ps->po.mu);
1651 pi = ps->po.pi;
1652 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001653
1654 return pi;
1655}
1656
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001657bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001658 polling_island *p1 = p;
1659 polling_island *p2 = q;
1660
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001661 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1662 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001663 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001664 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001665
1666 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001667}
1668
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001669/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001670 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001671 */
1672
1673static void shutdown_engine(void) {
1674 fd_global_shutdown();
1675 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001676 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001677}
1678
1679static const grpc_event_engine_vtable vtable = {
1680 .pollset_size = sizeof(grpc_pollset),
1681
1682 .fd_create = fd_create,
1683 .fd_wrapped_fd = fd_wrapped_fd,
1684 .fd_orphan = fd_orphan,
1685 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001686 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001687 .fd_notify_on_read = fd_notify_on_read,
1688 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001689 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001690
1691 .pollset_init = pollset_init,
1692 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001693 .pollset_destroy = pollset_destroy,
1694 .pollset_work = pollset_work,
1695 .pollset_kick = pollset_kick,
1696 .pollset_add_fd = pollset_add_fd,
1697
1698 .pollset_set_create = pollset_set_create,
1699 .pollset_set_destroy = pollset_set_destroy,
1700 .pollset_set_add_pollset = pollset_set_add_pollset,
1701 .pollset_set_del_pollset = pollset_set_del_pollset,
1702 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1703 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1704 .pollset_set_add_fd = pollset_set_add_fd,
1705 .pollset_set_del_fd = pollset_set_del_fd,
1706
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001707 .shutdown_engine = shutdown_engine,
1708};
1709
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001710/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1711 * Create a dummy epoll_fd to make sure epoll support is available */
1712static bool is_epoll_available() {
1713 int fd = epoll_create1(EPOLL_CLOEXEC);
1714 if (fd < 0) {
1715 gpr_log(
1716 GPR_ERROR,
1717 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1718 fd);
1719 return false;
1720 }
1721 close(fd);
1722 return true;
1723}
1724
Craig Tillerf8382b82017-04-27 15:09:48 -07001725const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1726 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001727 /* If use of signals is disabled, we cannot use epoll engine*/
1728 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1729 return NULL;
1730 }
1731
Ken Paysoncd7d0472016-10-11 12:24:20 -07001732 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001733 return NULL;
1734 }
1735
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001736 if (!is_epoll_available()) {
1737 return NULL;
1738 }
1739
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001740 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001741 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1742 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001743 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001744 grpc_use_signal(SIGRTMIN + 6);
1745 } else {
1746 return NULL;
1747 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001748 }
1749
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001750 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001751
1752 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1753 return NULL;
1754 }
1755
1756 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1757 polling_island_global_init())) {
1758 return NULL;
1759 }
1760
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001761 return &vtable;
1762}
1763
murgatroid99623dd4f2016-08-08 17:31:27 -07001764#else /* defined(GRPC_LINUX_EPOLL) */
1765#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001766#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001767/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001768 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001769const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1770 bool explicit_request) {
1771 return NULL;
1772}
murgatroid99623dd4f2016-08-08 17:31:27 -07001773#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001774
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001775void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001776#endif /* !defined(GRPC_LINUX_EPOLL) */