blob: d278d34092944b0f41d9a1afaf1f97863f38c2dd [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070021/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070022#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070023
Craig Tiller4509c472017-04-27 19:05:13 +000024#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
26#include <assert.h>
27#include <errno.h>
28#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070029#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030#include <signal.h>
31#include <string.h>
32#include <sys/epoll.h>
33#include <sys/socket.h>
34#include <unistd.h>
35
36#include <grpc/support/alloc.h>
37#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070042#include "src/core/lib/debug/stats.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070043#include "src/core/lib/iomgr/ev_posix.h"
44#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070045#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070046#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070047#include "src/core/lib/iomgr/wakeup_fd_posix.h"
48#include "src/core/lib/profiling/timers.h"
49#include "src/core/lib/support/block_annotate.h"
50
Craig Tillere24b24d2017-04-06 16:05:45 -070051#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
52
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070053#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070054 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070055 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070056 }
57
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070058static int grpc_wakeup_signal = -1;
59static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070060
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070061/* Implements the function defined in grpc_posix.h. This function might be
62 * called before even calling grpc_init() to set either a different signal to
63 * use. If signum == -1, then the use of signals is disabled */
64void grpc_use_signal(int signum) {
65 grpc_wakeup_signal = signum;
66 is_grpc_wakeup_signal_initialized = true;
67
68 if (grpc_wakeup_signal < 0) {
69 gpr_log(GPR_INFO,
70 "Use of signals is disabled. Epoll engine will not be used");
71 } else {
72 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
73 grpc_wakeup_signal);
74 }
75}
76
77struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070078
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080079typedef enum {
80 POLL_OBJ_FD,
81 POLL_OBJ_POLLSET,
82 POLL_OBJ_POLLSET_SET
83} poll_obj_type;
84
85typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040086#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080087 poll_obj_type obj_type;
88#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080089 gpr_mu mu;
90 struct polling_island *pi;
91} poll_obj;
92
93const char *poll_obj_string(poll_obj_type po_type) {
94 switch (po_type) {
95 case POLL_OBJ_FD:
96 return "fd";
97 case POLL_OBJ_POLLSET:
98 return "pollset";
99 case POLL_OBJ_POLLSET_SET:
100 return "pollset_set";
101 }
102
103 GPR_UNREACHABLE_CODE(return "UNKNOWN");
104}
105
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700106/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700107 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700108 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800109
110#define FD_FROM_PO(po) ((grpc_fd *)(po))
111
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700112struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800113 poll_obj po;
114
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700115 int fd;
116 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700117 bit 0 : 1=Active / 0=Orphaned
118 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700119 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700120 gpr_atm refst;
121
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800122 /* The fd is either closed or we relinquished control of it. In either
123 cases, this indicates that the 'fd' on this structure is no longer
124 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700125 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700126
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800127 gpr_atm read_closure;
128 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700129
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700130 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700131 grpc_closure *on_done_closure;
132
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800133 /* The pollset that last noticed that the fd is readable. The actual type
134 * stored in this is (grpc_pollset *) */
135 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700136
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700137 grpc_iomgr_object iomgr_object;
138};
139
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700140/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700141#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700142static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
143static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
144 int line);
145#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
146#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
147#else
148static void fd_ref(grpc_fd *fd);
149static void fd_unref(grpc_fd *fd);
150#define GRPC_FD_REF(fd, reason) fd_ref(fd)
151#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
152#endif
153
154static void fd_global_init(void);
155static void fd_global_shutdown(void);
156
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700157/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700158 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700159 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700160
ncteisena1354852017-06-08 16:25:53 -0700161#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700162
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700163#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700164#define PI_UNREF(exec_ctx, p, r) \
165 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700166
ncteisend39010e2017-06-08 17:08:07 -0700167#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700168
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700169#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700170#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
ncteisena1354852017-06-08 16:25:53 -0700172#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700173
Craig Tiller460502e2016-10-13 10:02:08 -0700174/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700175typedef struct polling_island {
176 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700177 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
178 the refcount.
179 Once the ref count becomes zero, this structure is destroyed which means
180 we should ensure that there is never a scenario where a PI_ADD_REF() is
181 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700182 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700183
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700184 /* Pointer to the polling_island this merged into.
185 * merged_to value is only set once in polling_island's lifetime (and that too
186 * only if the island is merged with another island). Because of this, we can
187 * use gpr_atm type here so that we can do atomic access on this and reduce
188 * lock contention on 'mu' mutex.
189 *
190 * Note that if this field is not NULL (i.e not 0), all the remaining fields
191 * (except mu and ref_count) are invalid and must be ignored. */
192 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700193
Craig Tiller460502e2016-10-13 10:02:08 -0700194 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700195 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700196
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700197 /* The fd of the underlying epoll set */
198 int epoll_fd;
199
200 /* The file descriptors in the epoll set */
201 size_t fd_cnt;
202 size_t fd_capacity;
203 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700204} polling_island;
205
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700206/*******************************************************************************
207 * Pollset Declarations
208 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700210 /* Thread id of this worker */
211 pthread_t pt_id;
212
213 /* Used to prevent a worker from getting kicked multiple times */
214 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700215 struct grpc_pollset_worker *next;
216 struct grpc_pollset_worker *prev;
217};
218
219struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800220 poll_obj po;
221
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700222 grpc_pollset_worker root_worker;
223 bool kicked_without_pollers;
224
225 bool shutting_down; /* Is the pollset shutting down ? */
226 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
227 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700228};
229
230/*******************************************************************************
231 * Pollset-set Declarations
232 */
233struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800234 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700235};
236
237/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700238 * Common helpers
239 */
240
Craig Tillerf975f742016-07-01 14:56:27 -0700241static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700242 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700243 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700244 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700245 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700246 }
247 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700248 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249}
250
251/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700252 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700253 */
254
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255/* The wakeup fd that is used to wake up all threads in a Polling island. This
256 is useful in the polling island merge operation where we need to wakeup all
257 the threads currently polling the smaller polling island (so that they can
258 start polling the new/merged polling island)
259
260 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
261 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
262static grpc_wakeup_fd polling_island_wakeup_fd;
263
Craig Tiller2e620132016-10-10 15:27:44 -0700264/* The polling island being polled right now.
265 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700266static __thread polling_island *g_current_thread_polling_island;
267
Craig Tillerb39307d2016-06-30 15:39:13 -0700268/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700269static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700270
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700271#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700272/* Currently TSAN may incorrectly flag data races between epoll_ctl and
273 epoll_wait for any grpc_fd structs that are added to the epoll set via
274 epoll_ctl and are returned (within a very short window) via epoll_wait().
275
276 To work-around this race, we establish a happens-before relation between
277 the code just-before epoll_ctl() and the code after epoll_wait() by using
278 this atomic */
279gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700280#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700281
Craig Tillerb39307d2016-06-30 15:39:13 -0700282static void pi_add_ref(polling_island *pi);
283static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700284
ncteisena1354852017-06-08 16:25:53 -0700285#ifndef NDEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700286static void pi_add_ref_dbg(polling_island *pi, const char *reason,
287 const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700288 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700289 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
290 gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
291 " (%s) - (%s, %d)",
292 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700293 }
294 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700295}
296
Craig Tillerb39307d2016-06-30 15:39:13 -0700297static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700298 const char *reason, const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700299 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700300 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
301 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
302 " (%s) - (%s, %d)",
303 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700304 }
305 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700306}
307#endif
308
Craig Tiller15007612016-07-06 09:36:16 -0700309static void pi_add_ref(polling_island *pi) {
310 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
311}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700312
Craig Tillerb39307d2016-06-30 15:39:13 -0700313static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700314 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700315 Note that this deletion not be done under a lock. Once the ref count goes
316 to zero, we are guaranteed that no one else holds a reference to the
317 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700318
319 Also, if we are deleting the polling island and the merged_to field is
320 non-empty, we should remove a ref to the merged_to polling island
321 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700322 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
323 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
324 polling_island_delete(exec_ctx, pi);
325 if (next != NULL) {
326 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700327 }
328 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700329}
330
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700331/* The caller is expected to hold pi->mu lock before calling this function */
332static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700333 size_t fd_count, bool add_fd_refs,
334 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700335 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700336 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700337 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700338 char *err_msg;
339 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700340
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700341#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700342 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700343 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700344#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700345
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700346 for (i = 0; i < fd_count; i++) {
347 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
348 ev.data.ptr = fds[i];
349 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700350
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700351 if (err < 0) {
352 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700353 gpr_asprintf(
354 &err_msg,
355 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
356 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
357 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
358 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700359 }
360
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700361 continue;
362 }
363
364 if (pi->fd_cnt == pi->fd_capacity) {
365 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
366 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
367 }
368
369 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700370 if (add_fd_refs) {
371 GRPC_FD_REF(fds[i], "polling_island");
372 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700373 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700374}
375
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700376/* The caller is expected to hold pi->mu before calling this */
377static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700378 grpc_wakeup_fd *wakeup_fd,
379 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700380 struct epoll_event ev;
381 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700382 char *err_msg;
383 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700384
385 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
386 ev.data.ptr = wakeup_fd;
387 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
388 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700389 if (err < 0 && errno != EEXIST) {
390 gpr_asprintf(&err_msg,
391 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
392 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700393 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
394 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700395 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
396 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700397 }
398}
399
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700400/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700401static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700402 bool remove_fd_refs,
403 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700404 int err;
405 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700406 char *err_msg;
407 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700408
409 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700410 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700411 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700412 gpr_asprintf(&err_msg,
413 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
414 "error: %d (%s)",
415 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
416 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
417 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700418 }
419
420 if (remove_fd_refs) {
421 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700422 }
423 }
424
425 pi->fd_cnt = 0;
426}
427
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700428/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700429static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700430 bool is_fd_closed,
431 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700432 int err;
433 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700434 char *err_msg;
435 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700436
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700437 /* If fd is already closed, then it would have been automatically been removed
438 from the epoll set */
439 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700440 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
441 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700442 gpr_asprintf(
443 &err_msg,
444 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
445 pi->epoll_fd, fd->fd, errno, strerror(errno));
446 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
447 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700448 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700449 }
450
451 for (i = 0; i < pi->fd_cnt; i++) {
452 if (pi->fds[i] == fd) {
453 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700454 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700455 break;
456 }
457 }
458}
459
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700460/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700461static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
462 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700463 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700464 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700465 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700466
Craig Tillerb39307d2016-06-30 15:39:13 -0700467 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700468
Craig Tillerb39307d2016-06-30 15:39:13 -0700469 pi = gpr_malloc(sizeof(*pi));
470 gpr_mu_init(&pi->mu);
471 pi->fd_cnt = 0;
472 pi->fd_capacity = 0;
473 pi->fds = NULL;
474 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700475
Craig Tiller15007612016-07-06 09:36:16 -0700476 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700477 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700478 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700479
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700480 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700481
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700482 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700483 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
484 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700485 }
486
Craig Tillerb39307d2016-06-30 15:39:13 -0700487 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700488 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700489 }
490
Craig Tillerb39307d2016-06-30 15:39:13 -0700491done:
492 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700493 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700494 pi = NULL;
495 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700496 return pi;
497}
498
Craig Tillerb39307d2016-06-30 15:39:13 -0700499static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700500 GPR_ASSERT(pi->fd_cnt == 0);
501
Craig Tiller0a06cd72016-07-14 13:21:24 -0700502 if (pi->epoll_fd >= 0) {
503 close(pi->epoll_fd);
504 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700505 gpr_mu_destroy(&pi->mu);
506 gpr_free(pi->fds);
507 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700508}
509
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700510/* Attempts to gets the last polling island in the linked list (liked by the
511 * 'merged_to' field). Since this does not lock the polling island, there are no
512 * guarantees that the island returned is the last island */
513static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
514 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
515 while (next != NULL) {
516 pi = next;
517 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
518 }
519
520 return pi;
521}
522
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700523/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700524 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700525 returned polling island's mu.
526 Usage: To lock/unlock polling island "pi", do the following:
527 polling_island *pi_latest = polling_island_lock(pi);
528 ...
529 ... critical section ..
530 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700531 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
532static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700533 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700534
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700535 while (true) {
536 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
537 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538 /* Looks like 'pi' is the last node in the linked list but unless we check
539 this by holding the pi->mu lock, we cannot be sure (i.e without the
540 pi->mu lock, we don't prevent island merges).
541 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700542 gpr_mu_lock(&pi->mu);
543 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
544 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700545 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700546 break;
547 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700548
549 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
550 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700551 gpr_mu_unlock(&pi->mu);
552 }
553
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700554 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700555 }
556
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700557 return pi;
558}
559
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700560/* Gets the lock on the *latest* polling islands in the linked lists pointed by
561 *p and *q (and also updates *p and *q to point to the latest polling islands)
562
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700563 This function is needed because calling the following block of code to obtain
564 locks on polling islands (*p and *q) is prone to deadlocks.
565 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700566 polling_island_lock(*p, true);
567 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700568 }
569
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700570 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700571 polling_island *p1;
572 polling_island *p2;
573 ..
574 polling_island_lock_pair(&p1, &p2);
575 ..
576 .. Critical section with both p1 and p2 locked
577 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700578 // Release locks: Always call polling_island_unlock_pair() to release locks
579 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700580*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700581static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700582 polling_island *pi_1 = *p;
583 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700584 polling_island *next_1 = NULL;
585 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700586
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700587 /* The algorithm is simple:
588 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
589 keep updating pi_1 and pi_2)
590 - Then obtain locks on the islands by following a lock order rule of
591 locking polling_island with lower address first
592 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
593 pointing to the same island. If that is the case, we can just call
594 polling_island_lock()
595 - After obtaining both the locks, double check that the polling islands
596 are still the last polling islands in their respective linked lists
597 (this is because there might have been polling island merges before
598 we got the lock)
599 - If the polling islands are the last islands, we are done. If not,
600 release the locks and continue the process from the first step */
601 while (true) {
602 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
603 while (next_1 != NULL) {
604 pi_1 = next_1;
605 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700606 }
607
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700608 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
609 while (next_2 != NULL) {
610 pi_2 = next_2;
611 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
612 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700613
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700614 if (pi_1 == pi_2) {
615 pi_1 = pi_2 = polling_island_lock(pi_1);
616 break;
617 }
618
619 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700620 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700621 gpr_mu_lock(&pi_2->mu);
622 } else {
623 gpr_mu_lock(&pi_2->mu);
624 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700625 }
626
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700627 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
628 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
629 if (next_1 == NULL && next_2 == NULL) {
630 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700631 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700632
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700633 gpr_mu_unlock(&pi_1->mu);
634 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700635 }
636
637 *p = pi_1;
638 *q = pi_2;
639}
640
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700641static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
642 if (p == q) {
643 gpr_mu_unlock(&p->mu);
644 } else {
645 gpr_mu_unlock(&p->mu);
646 gpr_mu_unlock(&q->mu);
647 }
648}
649
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700650static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700651 polling_island *q,
652 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700653 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700654 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700655
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700656 if (p != q) {
657 /* Make sure that p points to the polling island with fewer fds than q */
658 if (p->fd_cnt > q->fd_cnt) {
659 GPR_SWAP(polling_island *, p, q);
660 }
661
662 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
663 Note that the refcounts on the fds being moved will not change here.
664 This is why the last param in the following two functions is 'false') */
665 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
666 polling_island_remove_all_fds_locked(p, false, error);
667
668 /* Wakeup all the pollers (if any) on p so that they pickup this change */
669 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
670
671 /* Add the 'merged_to' link from p --> q */
672 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
673 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700674 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700675 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700676
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700677 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700678
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700679 /* Return the merged polling island (Note that no merge would have happened
680 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700681 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700682}
683
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700684static grpc_error *polling_island_global_init() {
685 grpc_error *error = GRPC_ERROR_NONE;
686
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700687 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
688 if (error == GRPC_ERROR_NONE) {
689 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
690 }
691
692 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700693}
694
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700695static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700696 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700697}
698
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700699/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700700 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700701 */
702
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700703/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700704 * but instead so that implementations with multiple threads in (for example)
705 * epoll_wait deal with the race between pollset removal and incoming poll
706 * notifications.
707 *
708 * The problem is that the poller ultimately holds a reference to this
709 * object, so it is very difficult to know when is safe to free it, at least
710 * without some expensive synchronization.
711 *
712 * If we keep the object freelisted, in the worst case losing this race just
713 * becomes a spurious read notification on a reused fd.
714 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700715
716/* The alarm system needs to be able to wakeup 'some poller' sometimes
717 * (specifically when a new alarm needs to be triggered earlier than the next
718 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
719 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700720
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700721static grpc_fd *fd_freelist = NULL;
722static gpr_mu fd_freelist_mu;
723
ncteisend39010e2017-06-08 17:08:07 -0700724#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700725#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
726#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
727static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
728 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700729 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700730 gpr_log(GPR_DEBUG,
731 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700732 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700733 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
734 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700735#else
736#define REF_BY(fd, n, reason) ref_by(fd, n)
737#define UNREF_BY(fd, n, reason) unref_by(fd, n)
738static void ref_by(grpc_fd *fd, int n) {
739#endif
740 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
741}
742
ncteisend39010e2017-06-08 17:08:07 -0700743#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700744static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
745 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700746 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700747 gpr_log(GPR_DEBUG,
748 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700749 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700750 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
751 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700752#else
753static void unref_by(grpc_fd *fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700754#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700755 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700756 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700757 /* Add the fd to the freelist */
758 gpr_mu_lock(&fd_freelist_mu);
759 fd->freelist_next = fd_freelist;
760 fd_freelist = fd;
761 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800762
Craig Tiller376887d2017-04-06 08:27:03 -0700763 grpc_lfev_destroy(&fd->read_closure);
764 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700765
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700766 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700767 } else {
768 GPR_ASSERT(old > n);
769 }
770}
771
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700772/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700773#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700774static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
775 int line) {
776 ref_by(fd, 2, reason, file, line);
777}
778
779static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
780 int line) {
781 unref_by(fd, 2, reason, file, line);
782}
783#else
784static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700785static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
786#endif
787
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700788static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
789
790static void fd_global_shutdown(void) {
791 gpr_mu_lock(&fd_freelist_mu);
792 gpr_mu_unlock(&fd_freelist_mu);
793 while (fd_freelist != NULL) {
794 grpc_fd *fd = fd_freelist;
795 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800796 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700797 gpr_free(fd);
798 }
799 gpr_mu_destroy(&fd_freelist_mu);
800}
801
802static grpc_fd *fd_create(int fd, const char *name) {
803 grpc_fd *new_fd = NULL;
804
805 gpr_mu_lock(&fd_freelist_mu);
806 if (fd_freelist != NULL) {
807 new_fd = fd_freelist;
808 fd_freelist = fd_freelist->freelist_next;
809 }
810 gpr_mu_unlock(&fd_freelist_mu);
811
812 if (new_fd == NULL) {
813 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800814 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700815 }
816
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800817 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
818 * is a newly created fd (or an fd we got from the freelist), no one else
819 * would be holding a lock to it anyway. */
820 gpr_mu_lock(&new_fd->po.mu);
821 new_fd->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -0400822#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800823 new_fd->po.obj_type = POLL_OBJ_FD;
824#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700825
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700826 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700827 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700828 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700829 grpc_lfev_init(&new_fd->read_closure);
830 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800831 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800832
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700833 new_fd->freelist_next = NULL;
834 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700835
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800836 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700837
838 char *fd_name;
839 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
840 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700841 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700842 return new_fd;
843}
844
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700845static int fd_wrapped_fd(grpc_fd *fd) {
846 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800847 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700848 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700849 ret_fd = fd->fd;
850 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800851 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700852
853 return ret_fd;
854}
855
856static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
857 grpc_closure *on_done, int *release_fd,
858 const char *reason) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700859 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700860 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700861
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800862 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700863 fd->on_done_closure = on_done;
864
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700865 /* Remove the active status but keep referenced. We want this grpc_fd struct
866 to be alive (and not added to freelist) until the end of this function */
867 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700868
869 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700870 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800871 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700872 would actually contain the fd
873 - Remove the fd from the latest polling island
874 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800875 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700876 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800877 if (fd->po.pi != NULL) {
878 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700879 polling_island_remove_fd_locked(pi_latest, fd, false /* is_fd_closed */,
880 &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700881 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700882
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800883 unref_pi = fd->po.pi;
884 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700885 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700886
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700887 /* If release_fd is not NULL, we should be relinquishing control of the file
888 descriptor fd->fd (but we still own the grpc_fd structure). */
889 if (release_fd != NULL) {
890 *release_fd = fd->fd;
891 } else {
892 close(fd->fd);
893 }
894
895 fd->orphaned = true;
896
ncteisen969b46e2017-06-08 14:57:11 -0700897 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700898
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800899 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700900 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700901 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700902 /* Unref stale polling island here, outside the fd lock above.
903 The polling island owns a workqueue which owns an fd, and unreffing
904 inside the lock can cause an eventual lock loop that makes TSAN very
905 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700906 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
907 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700908 if (error != GRPC_ERROR_NONE) {
909 const char *msg = grpc_error_string(error);
910 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
911 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700912 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700913}
914
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700915static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
916 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800917 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800918 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700919}
920
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700921static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700922 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700923}
924
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700925/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800926static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700927 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
928 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700929 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700930 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700931 }
Craig Tiller376887d2017-04-06 08:27:03 -0700932 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700933}
934
935static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
936 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700937 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700938}
939
940static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
941 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700942 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700943}
944
945/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700946 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700947 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700948GPR_TLS_DECL(g_current_thread_pollset);
949GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700950static __thread bool g_initialized_sigmask;
951static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700952
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700953static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700954#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700955 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700956#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700957}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700958
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700959static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700960
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700961/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700962static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700963 gpr_tls_init(&g_current_thread_pollset);
964 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700965 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700966 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700967}
968
969static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700970 gpr_tls_destroy(&g_current_thread_pollset);
971 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700972}
973
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700974static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
975 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700976
977 /* Kick the worker only if it was not already kicked */
978 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
979 GRPC_POLLING_TRACE(
980 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700981 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700982 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
983 if (err_num != 0) {
984 err = GRPC_OS_ERROR(err_num, "pthread_kill");
985 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700986 }
987 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700988}
989
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700990/* Return 1 if the pollset has active threads in pollset_work (pollset must
991 * be locked) */
992static int pollset_has_workers(grpc_pollset *p) {
993 return p->root_worker.next != &p->root_worker;
994}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700995
996static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
997 worker->prev->next = worker->next;
998 worker->next->prev = worker->prev;
999}
1000
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001001static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1002 if (pollset_has_workers(p)) {
1003 grpc_pollset_worker *w = p->root_worker.next;
1004 remove_worker(p, w);
1005 return w;
1006 } else {
1007 return NULL;
1008 }
1009}
1010
1011static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1012 worker->next = &p->root_worker;
1013 worker->prev = worker->next->prev;
1014 worker->prev->next = worker->next->prev = worker;
1015}
1016
1017static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1018 worker->prev = &p->root_worker;
1019 worker->next = worker->prev->next;
1020 worker->prev->next = worker->next->prev = worker;
1021}
1022
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001023/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001024static grpc_error *pollset_kick(grpc_pollset *p,
1025 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001026 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001027 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001028 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001029 grpc_pollset_worker *worker = specific_worker;
1030 if (worker != NULL) {
1031 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001032 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001033 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034 for (worker = p->root_worker.next; worker != &p->root_worker;
1035 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001036 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001037 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001038 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001039 }
Craig Tillera218a062016-06-26 09:58:37 -07001040 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001041 } else {
1042 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001043 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001044 } else {
1045 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001046 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001047 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001048 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001049 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001050 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1051 /* Since worker == NULL, it means that we can kick "any" worker on this
1052 pollset 'p'. If 'p' happens to be the same pollset this thread is
1053 currently polling (i.e in pollset_work() function), then there is no need
1054 to kick any other worker since the current thread can just absorb the
1055 kick. This is the reason why we enter this case only when
1056 g_current_thread_pollset is != p */
1057
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001058 GPR_TIMER_MARK("kick_anonymous", 0);
1059 worker = pop_front_worker(p);
1060 if (worker != NULL) {
1061 GPR_TIMER_MARK("finally_kick", 0);
1062 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001063 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001064 } else {
1065 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001066 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001067 }
1068 }
1069
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001070 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001071 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1072 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001073}
1074
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001075static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001076 gpr_mu_init(&pollset->po.mu);
1077 *mu = &pollset->po.mu;
1078 pollset->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001079#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001080 pollset->po.obj_type = POLL_OBJ_POLLSET;
1081#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001082
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001083 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001084 pollset->kicked_without_pollers = false;
1085
1086 pollset->shutting_down = false;
1087 pollset->finish_shutdown_called = false;
1088 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001089}
1090
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001091/* Convert a timespec to milliseconds:
1092 - Very small or negative poll times are clamped to zero to do a non-blocking
1093 poll (which becomes spin polling)
1094 - Other small values are rounded up to one millisecond
1095 - Longer than a millisecond polls are rounded up to the next nearest
1096 millisecond to avoid spinning
1097 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001098static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1099 gpr_timespec now) {
1100 gpr_timespec timeout;
1101 static const int64_t max_spin_polling_us = 10;
1102 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1103 return -1;
1104 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001105
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001106 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1107 max_spin_polling_us,
1108 GPR_TIMESPAN))) <= 0) {
1109 return 0;
1110 }
1111 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001112 int millis = gpr_time_to_millis(gpr_time_add(
1113 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1114 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001115}
1116
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001117static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1118 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001119 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001120
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001121 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001122 different 'notifier's when an fd becomes readable and it is in two epoll
1123 sets (This can happen briefly during polling island merges). In such cases
1124 it does not really matter which notifer is set as the read_notifier_pollset
1125 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001126 /* Use release store to match with acquire load in fd_get_read_notifier */
1127 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001128}
1129
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001130static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001131 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001132}
1133
Craig Tillerb39307d2016-06-30 15:39:13 -07001134static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1135 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001136 if (ps->po.pi != NULL) {
1137 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001138 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001139 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001140}
1141
1142static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1143 grpc_pollset *pollset) {
1144 /* The pollset cannot have any workers if we are at this stage */
1145 GPR_ASSERT(!pollset_has_workers(pollset));
1146
1147 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001148
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001149 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001150 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001151 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001152}
1153
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001154/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001155static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1156 grpc_closure *closure) {
1157 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1158 GPR_ASSERT(!pollset->shutting_down);
1159 pollset->shutting_down = true;
1160 pollset->shutdown_done = closure;
1161 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1162
1163 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1164 because it would release the underlying polling island. In such a case, we
1165 let the last worker call finish_shutdown_locked() from pollset_work() */
1166 if (!pollset_has_workers(pollset)) {
1167 GPR_ASSERT(!pollset->finish_shutdown_called);
1168 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1169 finish_shutdown_locked(exec_ctx, pollset);
1170 }
1171 GPR_TIMER_END("pollset_shutdown", 0);
1172}
1173
1174/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1175 * than destroying the mutexes, there is nothing special that needs to be done
1176 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001177static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001178 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001179 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001180}
1181
Craig Tiller84ea3412016-09-08 14:57:56 -07001182#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001183/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1184static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001185 grpc_pollset *pollset,
1186 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001187 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001188 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001189 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001190 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001191 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001192 char *err_msg;
1193 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001194 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1195
1196 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001197 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001198
1199 Since epoll_fd is immutable, we can read it without obtaining the polling
1200 island lock. There is however a possibility that the polling island (from
1201 which we got the epoll_fd) got merged with another island while we are
1202 in this function. This is still okay because in such a case, we will wakeup
1203 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001204 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001205
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001206 if (pollset->po.pi == NULL) {
1207 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1208 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001209 GPR_TIMER_END("pollset_work_and_unlock", 0);
1210 return; /* Fatal error. We cannot continue */
1211 }
1212
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001213 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001214 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001215 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001216 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001217
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001218 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001219 epoll_fd = pi->epoll_fd;
1220
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001221 /* Update the pollset->po.pi since the island being pointed by
1222 pollset->po.pi maybe older than the one pointed by pi) */
1223 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001224 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1225 polling island to be deleted */
1226 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001227 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1228 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001229 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001230
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001231 /* Add an extra ref so that the island does not get destroyed (which means
1232 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1233 epoll_fd */
1234 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001235 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001236
Craig Tiller61f96c12017-05-12 13:36:39 -07001237 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1238 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001239
Craig Tiller61f96c12017-05-12 13:36:39 -07001240 GRPC_SCHEDULING_START_BLOCKING_REGION;
Craig Tillerb4bb1cd2017-07-20 14:18:17 -07001241 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001242 ep_rv =
1243 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1244 GRPC_SCHEDULING_END_BLOCKING_REGION;
1245 if (ep_rv < 0) {
1246 if (errno != EINTR) {
1247 gpr_asprintf(&err_msg,
1248 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1249 epoll_fd, errno, strerror(errno));
1250 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1251 } else {
1252 /* We were interrupted. Save an interation by doing a zero timeout
1253 epoll_wait to see if there are any other events of interest */
1254 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1255 (void *)pollset, (void *)worker);
1256 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001257 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001258 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001259
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001260#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001261 /* See the definition of g_poll_sync for more details */
1262 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001263#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001264
Craig Tiller61f96c12017-05-12 13:36:39 -07001265 for (int i = 0; i < ep_rv; ++i) {
1266 void *data_ptr = ep_ev[i].data.ptr;
1267 if (data_ptr == &polling_island_wakeup_fd) {
1268 GRPC_POLLING_TRACE(
1269 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1270 "%d) got merged",
1271 (void *)pollset, (void *)worker, epoll_fd);
1272 /* This means that our polling island is merged with a different
1273 island. We do not have to do anything here since the subsequent call
1274 to the function pollset_work_and_unlock() will pick up the correct
1275 epoll_fd */
1276 } else {
1277 grpc_fd *fd = data_ptr;
1278 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1279 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1280 int write_ev = ep_ev[i].events & EPOLLOUT;
1281 if (read_ev || cancel) {
1282 fd_become_readable(exec_ctx, fd, pollset);
1283 }
1284 if (write_ev || cancel) {
1285 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001286 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001287 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001288 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001289
Craig Tiller61f96c12017-05-12 13:36:39 -07001290 g_current_thread_polling_island = NULL;
1291 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1292
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001293 GPR_ASSERT(pi != NULL);
1294
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001295 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001296 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001297 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001298 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001299 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001300 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001301
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001302 GPR_TIMER_END("pollset_work_and_unlock", 0);
1303}
1304
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001305/* pollset->po.mu lock must be held by the caller before calling this.
1306 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001307 during the course of its execution but it will always re-acquire the lock and
1308 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001309static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1310 grpc_pollset_worker **worker_hdl,
1311 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001312 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001313 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001314 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1315
1316 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001317
1318 grpc_pollset_worker worker;
1319 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001320 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001321 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001322
Craig Tiller557c88c2017-04-05 17:20:18 -07001323 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001324
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001325 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1326 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001327
1328 if (pollset->kicked_without_pollers) {
1329 /* If the pollset was kicked without pollers, pretend that the current
1330 worker got the kick and skip polling. A kick indicates that there is some
1331 work that needs attention like an event on the completion queue or an
1332 alarm */
1333 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1334 pollset->kicked_without_pollers = 0;
1335 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001336 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001337 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1338 worker that there is some pending work that needs immediate attention
1339 (like an event on the completion queue, or a polling island merge that
1340 results in a new epoll-fd to wait on) and that the worker should not
1341 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001342
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001343 A worker can be kicked anytime from the point it is added to the pollset
1344 via push_front_worker() (or push_back_worker()) to the point it is
1345 removed via remove_worker().
1346 If the worker is kicked before/during it calls epoll_pwait(), it should
1347 immediately exit from epoll_wait(). If the worker is kicked after it
1348 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001349
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001350 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001351 times *except* when it is in epoll_pwait(). This way, the worker never
1352 misses acting on a kick */
1353
Craig Tiller19196992016-06-27 18:45:56 -07001354 if (!g_initialized_sigmask) {
1355 sigemptyset(&new_mask);
1356 sigaddset(&new_mask, grpc_wakeup_signal);
1357 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1358 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1359 g_initialized_sigmask = true;
1360 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1361 This is the mask used at all times *except during
1362 epoll_wait()*"
1363 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001364 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001365
Craig Tiller19196992016-06-27 18:45:56 -07001366 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001367 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001368 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001369
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001370 push_front_worker(pollset, &worker); /* Add worker to pollset */
1371
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001372 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1373 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001374 grpc_exec_ctx_flush(exec_ctx);
1375
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001376 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001377
1378 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1379 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001380 remove_worker(pollset, &worker);
1381 }
1382
1383 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1384 false at this point) and the pollset is shutting down, we may have to
1385 finish the shutdown process by calling finish_shutdown_locked().
1386 See pollset_shutdown() for more details.
1387
1388 Note: Continuing to access pollset here is safe; it is the caller's
1389 responsibility to not destroy a pollset when it has outstanding calls to
1390 pollset_work() */
1391 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1392 !pollset->finish_shutdown_called) {
1393 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1394 finish_shutdown_locked(exec_ctx, pollset);
1395
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001396 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001397 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001398 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001399 }
1400
Craig Tiller557c88c2017-04-05 17:20:18 -07001401 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001402
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001403 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1404 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001405
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001406 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001407
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001408 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1409 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001410}
1411
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001412static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001413 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001414 poll_obj_type item_type) {
1415 GPR_TIMER_BEGIN("add_poll_object", 0);
1416
ncteisene9cd8a82017-06-29 06:03:52 -04001417#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001418 GPR_ASSERT(item->obj_type == item_type);
1419 GPR_ASSERT(bag->obj_type == bag_type);
1420#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001421
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001422 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001423 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001424
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001425 gpr_mu_lock(&bag->mu);
1426 gpr_mu_lock(&item->mu);
1427
Craig Tiller7212c232016-07-06 13:11:09 -07001428retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001429 /*
1430 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1431 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1432 * a refcount of 2) and point item->pi and bag->pi to the new island
1433 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1434 * the other's non-NULL pi
1435 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1436 * polling islands and update item->pi and bag->pi to point to the new
1437 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001438 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001439
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001440 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1441 * orphaned */
1442 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1443 gpr_mu_unlock(&item->mu);
1444 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001445 return;
1446 }
1447
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001448 if (item->pi == bag->pi) {
1449 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001450 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001451 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001452
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001453 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1454 * we need to do some extra work to make TSAN happy */
1455 if (item_type == POLL_OBJ_FD) {
1456 /* Unlock before creating a new polling island: the polling island will
1457 create a workqueue which creates a file descriptor, and holding an fd
1458 lock here can eventually cause a loop to appear to TSAN (making it
1459 unhappy). We don't think it's a real loop (there's an epoch point
1460 where that loop possibility disappears), but the advantages of
1461 keeping TSAN happy outweigh any performance advantage we might have
1462 by keeping the lock held. */
1463 gpr_mu_unlock(&item->mu);
1464 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1465 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001466
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001467 /* Need to reverify any assumptions made between the initial lock and
1468 getting to this branch: if they've changed, we need to throw away our
1469 work and figure things out again. */
1470 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001471 GRPC_POLLING_TRACE(
1472 "add_poll_object: Raced creating new polling island. pi_new: %p "
1473 "(fd: %d, %s: %p)",
1474 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1475 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001476 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001477 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001478 polling_island_remove_all_fds_locked(pi_new, true, &error);
1479
1480 /* Ref and unref so that the polling island gets deleted during unref
1481 */
1482 PI_ADD_REF(pi_new, "dance_of_destruction");
1483 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1484 goto retry;
1485 }
Craig Tiller27da6422016-07-06 13:14:46 -07001486 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001487 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001488 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001489
1490 GRPC_POLLING_TRACE(
1491 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1492 "%s: %p)",
1493 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1494 poll_obj_string(bag_type), (void *)bag);
1495 } else {
1496 GRPC_POLLING_TRACE(
1497 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1498 (void *)pi_new, poll_obj_string(item_type),
1499 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001500 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001501 } else if (item->pi == NULL) {
1502 /* GPR_ASSERT(bag->pi != NULL) */
1503 /* Make pi_new point to latest pi*/
1504 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001505
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001506 if (item_type == POLL_OBJ_FD) {
1507 grpc_fd *fd = FD_FROM_PO(item);
1508 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1509 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001510
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001511 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001512 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001513 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1514 "bag(%s): %p)",
1515 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1516 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001517 } else if (bag->pi == NULL) {
1518 /* GPR_ASSERT(item->pi != NULL) */
1519 /* Make pi_new to point to latest pi */
1520 pi_new = polling_island_lock(item->pi);
1521 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001522 GRPC_POLLING_TRACE(
1523 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1524 "bag(%s): %p)",
1525 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1526 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001527 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001528 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001529 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001530 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1531 "bag(%s): %p)",
1532 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1533 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001534 }
1535
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001536 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1537 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001538
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001539 if (item->pi != pi_new) {
1540 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1541 if (item->pi != NULL) {
1542 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001543 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001544 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001545 }
1546
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001547 if (bag->pi != pi_new) {
1548 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1549 if (bag->pi != NULL) {
1550 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001551 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001552 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001553 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001554
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001555 gpr_mu_unlock(&item->mu);
1556 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001557
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001558 GRPC_LOG_IF_ERROR("add_poll_object", error);
1559 GPR_TIMER_END("add_poll_object", 0);
1560}
Craig Tiller57726ca2016-09-12 11:59:45 -07001561
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001562static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1563 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001564 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001565 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001566}
1567
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001568/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001569 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001570 */
1571
1572static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001573 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001574 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001575 pss->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001576#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001577 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1578#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001579 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001580}
1581
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001582static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1583 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001584 gpr_mu_destroy(&pss->po.mu);
1585
1586 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001587 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001588 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001589
1590 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001591}
1592
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001593static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1594 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001595 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001596 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001597}
1598
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001599static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1600 grpc_fd *fd) {
1601 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001602}
1603
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001604static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001605 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001606 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001607 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001608}
1609
1610static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001611 grpc_pollset_set *pss, grpc_pollset *ps) {
1612 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001613}
1614
1615static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1616 grpc_pollset_set *bag,
1617 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001618 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001619 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001620}
1621
1622static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1623 grpc_pollset_set *bag,
1624 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001625 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001626}
1627
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001628/* Test helper functions
1629 * */
1630void *grpc_fd_get_polling_island(grpc_fd *fd) {
1631 polling_island *pi;
1632
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001633 gpr_mu_lock(&fd->po.mu);
1634 pi = fd->po.pi;
1635 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001636
1637 return pi;
1638}
1639
1640void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1641 polling_island *pi;
1642
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001643 gpr_mu_lock(&ps->po.mu);
1644 pi = ps->po.pi;
1645 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001646
1647 return pi;
1648}
1649
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001650bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001651 polling_island *p1 = p;
1652 polling_island *p2 = q;
1653
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001654 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1655 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001656 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001657 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001658
1659 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001660}
1661
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001662/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001663 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001664 */
1665
1666static void shutdown_engine(void) {
1667 fd_global_shutdown();
1668 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001669 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001670}
1671
1672static const grpc_event_engine_vtable vtable = {
1673 .pollset_size = sizeof(grpc_pollset),
1674
1675 .fd_create = fd_create,
1676 .fd_wrapped_fd = fd_wrapped_fd,
1677 .fd_orphan = fd_orphan,
1678 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001679 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001680 .fd_notify_on_read = fd_notify_on_read,
1681 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001682 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001683
1684 .pollset_init = pollset_init,
1685 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001686 .pollset_destroy = pollset_destroy,
1687 .pollset_work = pollset_work,
1688 .pollset_kick = pollset_kick,
1689 .pollset_add_fd = pollset_add_fd,
1690
1691 .pollset_set_create = pollset_set_create,
1692 .pollset_set_destroy = pollset_set_destroy,
1693 .pollset_set_add_pollset = pollset_set_add_pollset,
1694 .pollset_set_del_pollset = pollset_set_del_pollset,
1695 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1696 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1697 .pollset_set_add_fd = pollset_set_add_fd,
1698 .pollset_set_del_fd = pollset_set_del_fd,
1699
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001700 .shutdown_engine = shutdown_engine,
1701};
1702
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001703/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1704 * Create a dummy epoll_fd to make sure epoll support is available */
1705static bool is_epoll_available() {
1706 int fd = epoll_create1(EPOLL_CLOEXEC);
1707 if (fd < 0) {
1708 gpr_log(
1709 GPR_ERROR,
1710 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1711 fd);
1712 return false;
1713 }
1714 close(fd);
1715 return true;
1716}
1717
Craig Tillerf8382b82017-04-27 15:09:48 -07001718const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1719 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001720 /* If use of signals is disabled, we cannot use epoll engine*/
1721 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1722 return NULL;
1723 }
1724
Ken Paysoncd7d0472016-10-11 12:24:20 -07001725 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001726 return NULL;
1727 }
1728
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001729 if (!is_epoll_available()) {
1730 return NULL;
1731 }
1732
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001733 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001734 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1735 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001736 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001737 grpc_use_signal(SIGRTMIN + 6);
1738 } else {
1739 return NULL;
1740 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001741 }
1742
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001743 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001744
1745 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1746 return NULL;
1747 }
1748
1749 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1750 polling_island_global_init())) {
1751 return NULL;
1752 }
1753
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001754 return &vtable;
1755}
1756
murgatroid99623dd4f2016-08-08 17:31:27 -07001757#else /* defined(GRPC_LINUX_EPOLL) */
1758#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001759#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001760/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001761 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001762const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1763 bool explicit_request) {
1764 return NULL;
1765}
murgatroid99623dd4f2016-08-08 17:31:27 -07001766#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001767
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001768void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001769#endif /* !defined(GRPC_LINUX_EPOLL) */