blob: a822560ab9532d6025867f99542fe70b5fdd5295 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070021/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070022#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070023
Craig Tiller4509c472017-04-27 19:05:13 +000024#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
26#include <assert.h>
27#include <errno.h>
28#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070029#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030#include <signal.h>
31#include <string.h>
32#include <sys/epoll.h>
33#include <sys/socket.h>
34#include <unistd.h>
35
36#include <grpc/support/alloc.h>
37#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
42#include "src/core/lib/iomgr/ev_posix.h"
43#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070044#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070045#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070046#include "src/core/lib/iomgr/wakeup_fd_posix.h"
47#include "src/core/lib/profiling/timers.h"
48#include "src/core/lib/support/block_annotate.h"
49
Craig Tillere24b24d2017-04-06 16:05:45 -070050#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
51
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070052#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070053 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070054 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070055 }
56
Sree Kuchibhotla82d73412017-02-09 18:27:45 -080057/* Uncomment the following to enable extra checks on poll_object operations */
Sree Kuchibhotlae6f516e2016-12-08 12:20:23 -080058/* #define PO_DEBUG */
59
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070060static int grpc_wakeup_signal = -1;
61static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070062
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070063/* Implements the function defined in grpc_posix.h. This function might be
64 * called before even calling grpc_init() to set either a different signal to
65 * use. If signum == -1, then the use of signals is disabled */
66void grpc_use_signal(int signum) {
67 grpc_wakeup_signal = signum;
68 is_grpc_wakeup_signal_initialized = true;
69
70 if (grpc_wakeup_signal < 0) {
71 gpr_log(GPR_INFO,
72 "Use of signals is disabled. Epoll engine will not be used");
73 } else {
74 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
75 grpc_wakeup_signal);
76 }
77}
78
79struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070080
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080081typedef enum {
82 POLL_OBJ_FD,
83 POLL_OBJ_POLLSET,
84 POLL_OBJ_POLLSET_SET
85} poll_obj_type;
86
87typedef struct poll_obj {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080088#ifdef PO_DEBUG
89 poll_obj_type obj_type;
90#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080091 gpr_mu mu;
92 struct polling_island *pi;
93} poll_obj;
94
95const char *poll_obj_string(poll_obj_type po_type) {
96 switch (po_type) {
97 case POLL_OBJ_FD:
98 return "fd";
99 case POLL_OBJ_POLLSET:
100 return "pollset";
101 case POLL_OBJ_POLLSET_SET:
102 return "pollset_set";
103 }
104
105 GPR_UNREACHABLE_CODE(return "UNKNOWN");
106}
107
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700108/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700109 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700110 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800111
112#define FD_FROM_PO(po) ((grpc_fd *)(po))
113
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700114struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115 poll_obj po;
116
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700117 int fd;
118 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700119 bit 0 : 1=Active / 0=Orphaned
120 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700121 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700122 gpr_atm refst;
123
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800124 /* The fd is either closed or we relinquished control of it. In either
125 cases, this indicates that the 'fd' on this structure is no longer
126 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700127 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700128
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800129 gpr_atm read_closure;
130 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700131
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133 grpc_closure *on_done_closure;
134
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800135 /* The pollset that last noticed that the fd is readable. The actual type
136 * stored in this is (grpc_pollset *) */
137 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700138
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700139 grpc_iomgr_object iomgr_object;
140};
141
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700142/* Reference counting for fds */
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700143// #define GRPC_FD_REF_COUNT_DEBUG
ncteisend39010e2017-06-08 17:08:07 -0700144#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700145static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
146static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
147 int line);
148#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
149#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
150#else
151static void fd_ref(grpc_fd *fd);
152static void fd_unref(grpc_fd *fd);
153#define GRPC_FD_REF(fd, reason) fd_ref(fd)
154#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
155#endif
156
157static void fd_global_init(void);
158static void fd_global_shutdown(void);
159
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700162 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700163
ncteisena1354852017-06-08 16:25:53 -0700164#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700166#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700167#define PI_UNREF(exec_ctx, p, r) \
168 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisend39010e2017-06-08 17:08:07 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700173#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
206 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700218 struct grpc_pollset_worker *next;
219 struct grpc_pollset_worker *prev;
220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
230 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerf975f742016-07-01 14:56:27 -0700244static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700245 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700269static __thread polling_island *g_current_thread_polling_island;
270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700272static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerb39307d2016-06-30 15:39:13 -0700285static void pi_add_ref(polling_island *pi);
286static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700289static void pi_add_ref_dbg(polling_island *pi, const char *reason,
290 const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700291 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisena1354852017-06-08 16:25:53 -0700292 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
293 gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
ncteisend39010e2017-06-08 17:08:07 -0700294 (void *)pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700295 }
296 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700297}
298
Craig Tillerb39307d2016-06-30 15:39:13 -0700299static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700300 const char *reason, const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700301 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisena1354852017-06-08 16:25:53 -0700302 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
303 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
ncteisend39010e2017-06-08 17:08:07 -0700304 (void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700305 }
306 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700307}
308#endif
309
Craig Tiller15007612016-07-06 09:36:16 -0700310static void pi_add_ref(polling_island *pi) {
311 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
312}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700313
Craig Tillerb39307d2016-06-30 15:39:13 -0700314static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700315 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700316 Note that this deletion not be done under a lock. Once the ref count goes
317 to zero, we are guaranteed that no one else holds a reference to the
318 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700319
320 Also, if we are deleting the polling island and the merged_to field is
321 non-empty, we should remove a ref to the merged_to polling island
322 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700323 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
324 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
325 polling_island_delete(exec_ctx, pi);
326 if (next != NULL) {
327 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700328 }
329 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700330}
331
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700332/* The caller is expected to hold pi->mu lock before calling this function */
333static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700334 size_t fd_count, bool add_fd_refs,
335 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700336 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700337 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700338 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700339 char *err_msg;
340 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700341
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700342#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700343 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700344 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700345#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700346
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700347 for (i = 0; i < fd_count; i++) {
348 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
349 ev.data.ptr = fds[i];
350 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700351
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700352 if (err < 0) {
353 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700354 gpr_asprintf(
355 &err_msg,
356 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
357 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
358 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
359 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700360 }
361
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700362 continue;
363 }
364
365 if (pi->fd_cnt == pi->fd_capacity) {
366 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
367 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
368 }
369
370 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700371 if (add_fd_refs) {
372 GRPC_FD_REF(fds[i], "polling_island");
373 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700374 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700375}
376
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700377/* The caller is expected to hold pi->mu before calling this */
378static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700379 grpc_wakeup_fd *wakeup_fd,
380 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700381 struct epoll_event ev;
382 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700383 char *err_msg;
384 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700385
386 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
387 ev.data.ptr = wakeup_fd;
388 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
389 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700390 if (err < 0 && errno != EEXIST) {
391 gpr_asprintf(&err_msg,
392 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
393 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700394 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
395 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700396 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
397 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700398 }
399}
400
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700401/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700402static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700403 bool remove_fd_refs,
404 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700405 int err;
406 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700407 char *err_msg;
408 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700409
410 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700411 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700412 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700413 gpr_asprintf(&err_msg,
414 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
415 "error: %d (%s)",
416 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
417 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
418 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700419 }
420
421 if (remove_fd_refs) {
422 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700423 }
424 }
425
426 pi->fd_cnt = 0;
427}
428
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700429/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700430static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700431 bool is_fd_closed,
432 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700433 int err;
434 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700435 char *err_msg;
436 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700437
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700438 /* If fd is already closed, then it would have been automatically been removed
439 from the epoll set */
440 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700441 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
442 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700443 gpr_asprintf(
444 &err_msg,
445 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
446 pi->epoll_fd, fd->fd, errno, strerror(errno));
447 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
448 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700449 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700450 }
451
452 for (i = 0; i < pi->fd_cnt; i++) {
453 if (pi->fds[i] == fd) {
454 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700455 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700456 break;
457 }
458 }
459}
460
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700461/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700462static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
463 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700464 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700465 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700466 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700467
Craig Tillerb39307d2016-06-30 15:39:13 -0700468 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700469
Craig Tillerb39307d2016-06-30 15:39:13 -0700470 pi = gpr_malloc(sizeof(*pi));
471 gpr_mu_init(&pi->mu);
472 pi->fd_cnt = 0;
473 pi->fd_capacity = 0;
474 pi->fds = NULL;
475 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700476
Craig Tiller15007612016-07-06 09:36:16 -0700477 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700478 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700479 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700480
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700481 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700482
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700483 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700484 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
485 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700486 }
487
Craig Tillerb39307d2016-06-30 15:39:13 -0700488 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700489 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700490 }
491
Craig Tillerb39307d2016-06-30 15:39:13 -0700492done:
493 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700494 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700495 pi = NULL;
496 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700497 return pi;
498}
499
Craig Tillerb39307d2016-06-30 15:39:13 -0700500static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700501 GPR_ASSERT(pi->fd_cnt == 0);
502
Craig Tiller0a06cd72016-07-14 13:21:24 -0700503 if (pi->epoll_fd >= 0) {
504 close(pi->epoll_fd);
505 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700506 gpr_mu_destroy(&pi->mu);
507 gpr_free(pi->fds);
508 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700509}
510
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700511/* Attempts to gets the last polling island in the linked list (liked by the
512 * 'merged_to' field). Since this does not lock the polling island, there are no
513 * guarantees that the island returned is the last island */
514static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
515 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
516 while (next != NULL) {
517 pi = next;
518 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
519 }
520
521 return pi;
522}
523
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700524/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700525 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700526 returned polling island's mu.
527 Usage: To lock/unlock polling island "pi", do the following:
528 polling_island *pi_latest = polling_island_lock(pi);
529 ...
530 ... critical section ..
531 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700532 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
533static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700534 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700535
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700536 while (true) {
537 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
538 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700539 /* Looks like 'pi' is the last node in the linked list but unless we check
540 this by holding the pi->mu lock, we cannot be sure (i.e without the
541 pi->mu lock, we don't prevent island merges).
542 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700543 gpr_mu_lock(&pi->mu);
544 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
545 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700546 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700547 break;
548 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700549
550 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
551 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700552 gpr_mu_unlock(&pi->mu);
553 }
554
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700555 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700556 }
557
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700558 return pi;
559}
560
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700561/* Gets the lock on the *latest* polling islands in the linked lists pointed by
562 *p and *q (and also updates *p and *q to point to the latest polling islands)
563
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700564 This function is needed because calling the following block of code to obtain
565 locks on polling islands (*p and *q) is prone to deadlocks.
566 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700567 polling_island_lock(*p, true);
568 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700569 }
570
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700571 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700572 polling_island *p1;
573 polling_island *p2;
574 ..
575 polling_island_lock_pair(&p1, &p2);
576 ..
577 .. Critical section with both p1 and p2 locked
578 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700579 // Release locks: Always call polling_island_unlock_pair() to release locks
580 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700581*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700582static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700583 polling_island *pi_1 = *p;
584 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700585 polling_island *next_1 = NULL;
586 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700587
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700588 /* The algorithm is simple:
589 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
590 keep updating pi_1 and pi_2)
591 - Then obtain locks on the islands by following a lock order rule of
592 locking polling_island with lower address first
593 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
594 pointing to the same island. If that is the case, we can just call
595 polling_island_lock()
596 - After obtaining both the locks, double check that the polling islands
597 are still the last polling islands in their respective linked lists
598 (this is because there might have been polling island merges before
599 we got the lock)
600 - If the polling islands are the last islands, we are done. If not,
601 release the locks and continue the process from the first step */
602 while (true) {
603 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
604 while (next_1 != NULL) {
605 pi_1 = next_1;
606 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700607 }
608
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700609 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
610 while (next_2 != NULL) {
611 pi_2 = next_2;
612 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
613 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700614
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700615 if (pi_1 == pi_2) {
616 pi_1 = pi_2 = polling_island_lock(pi_1);
617 break;
618 }
619
620 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700621 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700622 gpr_mu_lock(&pi_2->mu);
623 } else {
624 gpr_mu_lock(&pi_2->mu);
625 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700626 }
627
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700628 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
629 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
630 if (next_1 == NULL && next_2 == NULL) {
631 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700632 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700633
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700634 gpr_mu_unlock(&pi_1->mu);
635 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700636 }
637
638 *p = pi_1;
639 *q = pi_2;
640}
641
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700642static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
643 if (p == q) {
644 gpr_mu_unlock(&p->mu);
645 } else {
646 gpr_mu_unlock(&p->mu);
647 gpr_mu_unlock(&q->mu);
648 }
649}
650
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700651static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700652 polling_island *q,
653 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700654 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700655 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700656
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700657 if (p != q) {
658 /* Make sure that p points to the polling island with fewer fds than q */
659 if (p->fd_cnt > q->fd_cnt) {
660 GPR_SWAP(polling_island *, p, q);
661 }
662
663 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
664 Note that the refcounts on the fds being moved will not change here.
665 This is why the last param in the following two functions is 'false') */
666 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
667 polling_island_remove_all_fds_locked(p, false, error);
668
669 /* Wakeup all the pollers (if any) on p so that they pickup this change */
670 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
671
672 /* Add the 'merged_to' link from p --> q */
673 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
674 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700675 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700676 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700677
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700678 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700679
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700680 /* Return the merged polling island (Note that no merge would have happened
681 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700682 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700683}
684
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700685static grpc_error *polling_island_global_init() {
686 grpc_error *error = GRPC_ERROR_NONE;
687
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700688 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
689 if (error == GRPC_ERROR_NONE) {
690 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
691 }
692
693 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700694}
695
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700696static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700697 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700698}
699
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700700/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700701 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700702 */
703
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700704/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700705 * but instead so that implementations with multiple threads in (for example)
706 * epoll_wait deal with the race between pollset removal and incoming poll
707 * notifications.
708 *
709 * The problem is that the poller ultimately holds a reference to this
710 * object, so it is very difficult to know when is safe to free it, at least
711 * without some expensive synchronization.
712 *
713 * If we keep the object freelisted, in the worst case losing this race just
714 * becomes a spurious read notification on a reused fd.
715 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700716
717/* The alarm system needs to be able to wakeup 'some poller' sometimes
718 * (specifically when a new alarm needs to be triggered earlier than the next
719 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
720 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700721
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700722static grpc_fd *fd_freelist = NULL;
723static gpr_mu fd_freelist_mu;
724
ncteisend39010e2017-06-08 17:08:07 -0700725#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700726#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
727#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
728static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
729 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700730 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700731 gpr_log(GPR_DEBUG,
732 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
733 fd->fd, (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700734 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
735 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700736#else
737#define REF_BY(fd, n, reason) ref_by(fd, n)
738#define UNREF_BY(fd, n, reason) unref_by(fd, n)
739static void ref_by(grpc_fd *fd, int n) {
740#endif
741 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
742}
743
ncteisend39010e2017-06-08 17:08:07 -0700744#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700745static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
746 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700747 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700748 gpr_log(GPR_DEBUG,
749 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
750 fd->fd, (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700751 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
752 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700753#else
754static void unref_by(grpc_fd *fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700755#endif
ncteisend39010e2017-06-08 17:08:07 -0700756 gpr_atm old;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700757 old = gpr_atm_full_fetch_add(&fd->refst, -n);
758 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700759 /* Add the fd to the freelist */
760 gpr_mu_lock(&fd_freelist_mu);
761 fd->freelist_next = fd_freelist;
762 fd_freelist = fd;
763 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800764
Craig Tiller376887d2017-04-06 08:27:03 -0700765 grpc_lfev_destroy(&fd->read_closure);
766 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700767
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700768 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700769 } else {
770 GPR_ASSERT(old > n);
771 }
772}
773
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700774/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700775#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700776static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
777 int line) {
778 ref_by(fd, 2, reason, file, line);
779}
780
781static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
782 int line) {
783 unref_by(fd, 2, reason, file, line);
784}
785#else
786static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700787static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
788#endif
789
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700790static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
791
792static void fd_global_shutdown(void) {
793 gpr_mu_lock(&fd_freelist_mu);
794 gpr_mu_unlock(&fd_freelist_mu);
795 while (fd_freelist != NULL) {
796 grpc_fd *fd = fd_freelist;
797 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800798 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700799 gpr_free(fd);
800 }
801 gpr_mu_destroy(&fd_freelist_mu);
802}
803
804static grpc_fd *fd_create(int fd, const char *name) {
805 grpc_fd *new_fd = NULL;
806
807 gpr_mu_lock(&fd_freelist_mu);
808 if (fd_freelist != NULL) {
809 new_fd = fd_freelist;
810 fd_freelist = fd_freelist->freelist_next;
811 }
812 gpr_mu_unlock(&fd_freelist_mu);
813
814 if (new_fd == NULL) {
815 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800816 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700817 }
818
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800819 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
820 * is a newly created fd (or an fd we got from the freelist), no one else
821 * would be holding a lock to it anyway. */
822 gpr_mu_lock(&new_fd->po.mu);
823 new_fd->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800824#ifdef PO_DEBUG
825 new_fd->po.obj_type = POLL_OBJ_FD;
826#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700827
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700828 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700829 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700830 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700831 grpc_lfev_init(&new_fd->read_closure);
832 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800833 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800834
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700835 new_fd->freelist_next = NULL;
836 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700837
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800838 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700839
840 char *fd_name;
841 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
842 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
ncteisend39010e2017-06-08 17:08:07 -0700843#ifndef NDEBUG
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700844 gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700845#endif
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700846 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700847 return new_fd;
848}
849
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700850static int fd_wrapped_fd(grpc_fd *fd) {
851 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800852 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700853 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700854 ret_fd = fd->fd;
855 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800856 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700857
858 return ret_fd;
859}
860
861static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
862 grpc_closure *on_done, int *release_fd,
863 const char *reason) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700864 bool is_fd_closed = false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700865 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700866 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700867
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800868 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700869 fd->on_done_closure = on_done;
870
871 /* If release_fd is not NULL, we should be relinquishing control of the file
872 descriptor fd->fd (but we still own the grpc_fd structure). */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700873 if (release_fd != NULL) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700874 *release_fd = fd->fd;
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700875 } else {
876 close(fd->fd);
877 is_fd_closed = true;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700878 }
879
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700880 fd->orphaned = true;
881
882 /* Remove the active status but keep referenced. We want this grpc_fd struct
883 to be alive (and not added to freelist) until the end of this function */
884 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700885
886 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700887 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800888 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700889 would actually contain the fd
890 - Remove the fd from the latest polling island
891 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800892 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700893 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800894 if (fd->po.pi != NULL) {
895 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700896 polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700897 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700898
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800899 unref_pi = fd->po.pi;
900 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700901 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700902
ncteisen969b46e2017-06-08 14:57:11 -0700903 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700904
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800905 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700906 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700907 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700908 /* Unref stale polling island here, outside the fd lock above.
909 The polling island owns a workqueue which owns an fd, and unreffing
910 inside the lock can cause an eventual lock loop that makes TSAN very
911 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700912 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
913 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700914 if (error != GRPC_ERROR_NONE) {
915 const char *msg = grpc_error_string(error);
916 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
917 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700918 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700919}
920
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700921static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
922 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800923 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800924 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700925}
926
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700927static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700928 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700929}
930
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700931/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800932static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700933 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
934 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700935 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700936 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700937 }
Craig Tiller376887d2017-04-06 08:27:03 -0700938 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700939}
940
941static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
942 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700943 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700944}
945
946static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
947 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700948 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700949}
950
951/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700952 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700953 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700954GPR_TLS_DECL(g_current_thread_pollset);
955GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700956static __thread bool g_initialized_sigmask;
957static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700958
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700959static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700960#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700961 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700962#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700963}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700964
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700965static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700966
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700967/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700968static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700969 gpr_tls_init(&g_current_thread_pollset);
970 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700971 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700972 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700973}
974
975static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700976 gpr_tls_destroy(&g_current_thread_pollset);
977 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700978}
979
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700980static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
981 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700982
983 /* Kick the worker only if it was not already kicked */
984 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
985 GRPC_POLLING_TRACE(
986 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700987 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700988 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
989 if (err_num != 0) {
990 err = GRPC_OS_ERROR(err_num, "pthread_kill");
991 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700992 }
993 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700994}
995
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700996/* Return 1 if the pollset has active threads in pollset_work (pollset must
997 * be locked) */
998static int pollset_has_workers(grpc_pollset *p) {
999 return p->root_worker.next != &p->root_worker;
1000}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001001
1002static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1003 worker->prev->next = worker->next;
1004 worker->next->prev = worker->prev;
1005}
1006
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001007static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1008 if (pollset_has_workers(p)) {
1009 grpc_pollset_worker *w = p->root_worker.next;
1010 remove_worker(p, w);
1011 return w;
1012 } else {
1013 return NULL;
1014 }
1015}
1016
1017static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1018 worker->next = &p->root_worker;
1019 worker->prev = worker->next->prev;
1020 worker->prev->next = worker->next->prev = worker;
1021}
1022
1023static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1024 worker->prev = &p->root_worker;
1025 worker->next = worker->prev->next;
1026 worker->prev->next = worker->next->prev = worker;
1027}
1028
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001029/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001030static grpc_error *pollset_kick(grpc_pollset *p,
1031 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001032 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001033 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001034 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001035 grpc_pollset_worker *worker = specific_worker;
1036 if (worker != NULL) {
1037 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001038 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001039 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 for (worker = p->root_worker.next; worker != &p->root_worker;
1041 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001042 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001043 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001044 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001045 }
Craig Tillera218a062016-06-26 09:58:37 -07001046 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001047 } else {
1048 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001049 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001050 } else {
1051 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001053 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001055 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001056 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1057 /* Since worker == NULL, it means that we can kick "any" worker on this
1058 pollset 'p'. If 'p' happens to be the same pollset this thread is
1059 currently polling (i.e in pollset_work() function), then there is no need
1060 to kick any other worker since the current thread can just absorb the
1061 kick. This is the reason why we enter this case only when
1062 g_current_thread_pollset is != p */
1063
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001064 GPR_TIMER_MARK("kick_anonymous", 0);
1065 worker = pop_front_worker(p);
1066 if (worker != NULL) {
1067 GPR_TIMER_MARK("finally_kick", 0);
1068 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001069 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001070 } else {
1071 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001072 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001073 }
1074 }
1075
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001076 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001077 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1078 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001079}
1080
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001081static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001082 gpr_mu_init(&pollset->po.mu);
1083 *mu = &pollset->po.mu;
1084 pollset->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001085#ifdef PO_DEBUG
1086 pollset->po.obj_type = POLL_OBJ_POLLSET;
1087#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001088
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001089 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090 pollset->kicked_without_pollers = false;
1091
1092 pollset->shutting_down = false;
1093 pollset->finish_shutdown_called = false;
1094 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001095}
1096
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001097/* Convert a timespec to milliseconds:
1098 - Very small or negative poll times are clamped to zero to do a non-blocking
1099 poll (which becomes spin polling)
1100 - Other small values are rounded up to one millisecond
1101 - Longer than a millisecond polls are rounded up to the next nearest
1102 millisecond to avoid spinning
1103 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001104static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1105 gpr_timespec now) {
1106 gpr_timespec timeout;
1107 static const int64_t max_spin_polling_us = 10;
1108 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1109 return -1;
1110 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001111
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001112 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1113 max_spin_polling_us,
1114 GPR_TIMESPAN))) <= 0) {
1115 return 0;
1116 }
1117 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001118 int millis = gpr_time_to_millis(gpr_time_add(
1119 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1120 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001121}
1122
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001123static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1124 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001125 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001126
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001127 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001128 different 'notifier's when an fd becomes readable and it is in two epoll
1129 sets (This can happen briefly during polling island merges). In such cases
1130 it does not really matter which notifer is set as the read_notifier_pollset
1131 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001132 /* Use release store to match with acquire load in fd_get_read_notifier */
1133 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001134}
1135
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001136static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001137 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001138}
1139
Craig Tillerb39307d2016-06-30 15:39:13 -07001140static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1141 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001142 if (ps->po.pi != NULL) {
1143 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001144 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001145 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001146}
1147
1148static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1149 grpc_pollset *pollset) {
1150 /* The pollset cannot have any workers if we are at this stage */
1151 GPR_ASSERT(!pollset_has_workers(pollset));
1152
1153 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001154
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001155 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001156 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001157 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001158}
1159
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001160/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001161static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1162 grpc_closure *closure) {
1163 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1164 GPR_ASSERT(!pollset->shutting_down);
1165 pollset->shutting_down = true;
1166 pollset->shutdown_done = closure;
1167 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1168
1169 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1170 because it would release the underlying polling island. In such a case, we
1171 let the last worker call finish_shutdown_locked() from pollset_work() */
1172 if (!pollset_has_workers(pollset)) {
1173 GPR_ASSERT(!pollset->finish_shutdown_called);
1174 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1175 finish_shutdown_locked(exec_ctx, pollset);
1176 }
1177 GPR_TIMER_END("pollset_shutdown", 0);
1178}
1179
1180/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1181 * than destroying the mutexes, there is nothing special that needs to be done
1182 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001183static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001184 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001185 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001186}
1187
Craig Tiller84ea3412016-09-08 14:57:56 -07001188#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001189/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1190static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001191 grpc_pollset *pollset,
1192 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001193 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001194 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001195 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001196 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001197 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001198 char *err_msg;
1199 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001200 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1201
1202 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001203 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001204
1205 Since epoll_fd is immutable, we can read it without obtaining the polling
1206 island lock. There is however a possibility that the polling island (from
1207 which we got the epoll_fd) got merged with another island while we are
1208 in this function. This is still okay because in such a case, we will wakeup
1209 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001210 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001211
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001212 if (pollset->po.pi == NULL) {
1213 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1214 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001215 GPR_TIMER_END("pollset_work_and_unlock", 0);
1216 return; /* Fatal error. We cannot continue */
1217 }
1218
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001219 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001220 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001221 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001222 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001223
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001224 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001225 epoll_fd = pi->epoll_fd;
1226
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001227 /* Update the pollset->po.pi since the island being pointed by
1228 pollset->po.pi maybe older than the one pointed by pi) */
1229 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001230 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1231 polling island to be deleted */
1232 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001233 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1234 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001235 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001236
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001237 /* Add an extra ref so that the island does not get destroyed (which means
1238 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1239 epoll_fd */
1240 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001241 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001242
Craig Tiller61f96c12017-05-12 13:36:39 -07001243 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1244 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001245
Craig Tiller61f96c12017-05-12 13:36:39 -07001246 GRPC_SCHEDULING_START_BLOCKING_REGION;
1247 ep_rv =
1248 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1249 GRPC_SCHEDULING_END_BLOCKING_REGION;
1250 if (ep_rv < 0) {
1251 if (errno != EINTR) {
1252 gpr_asprintf(&err_msg,
1253 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1254 epoll_fd, errno, strerror(errno));
1255 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1256 } else {
1257 /* We were interrupted. Save an interation by doing a zero timeout
1258 epoll_wait to see if there are any other events of interest */
1259 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1260 (void *)pollset, (void *)worker);
1261 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001262 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001263 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001264
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001265#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001266 /* See the definition of g_poll_sync for more details */
1267 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001268#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001269
Craig Tiller61f96c12017-05-12 13:36:39 -07001270 for (int i = 0; i < ep_rv; ++i) {
1271 void *data_ptr = ep_ev[i].data.ptr;
1272 if (data_ptr == &polling_island_wakeup_fd) {
1273 GRPC_POLLING_TRACE(
1274 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1275 "%d) got merged",
1276 (void *)pollset, (void *)worker, epoll_fd);
1277 /* This means that our polling island is merged with a different
1278 island. We do not have to do anything here since the subsequent call
1279 to the function pollset_work_and_unlock() will pick up the correct
1280 epoll_fd */
1281 } else {
1282 grpc_fd *fd = data_ptr;
1283 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1284 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1285 int write_ev = ep_ev[i].events & EPOLLOUT;
1286 if (read_ev || cancel) {
1287 fd_become_readable(exec_ctx, fd, pollset);
1288 }
1289 if (write_ev || cancel) {
1290 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001291 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001292 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001293 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001294
Craig Tiller61f96c12017-05-12 13:36:39 -07001295 g_current_thread_polling_island = NULL;
1296 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1297
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001298 GPR_ASSERT(pi != NULL);
1299
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001300 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001301 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001302 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001303 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001304 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001305 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001306
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001307 GPR_TIMER_END("pollset_work_and_unlock", 0);
1308}
1309
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001310/* pollset->po.mu lock must be held by the caller before calling this.
1311 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001312 during the course of its execution but it will always re-acquire the lock and
1313 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001314static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1315 grpc_pollset_worker **worker_hdl,
1316 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001317 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001318 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001319 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1320
1321 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001322
1323 grpc_pollset_worker worker;
1324 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001325 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001326 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001327
Craig Tiller557c88c2017-04-05 17:20:18 -07001328 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001329
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001330 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1331 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001332
1333 if (pollset->kicked_without_pollers) {
1334 /* If the pollset was kicked without pollers, pretend that the current
1335 worker got the kick and skip polling. A kick indicates that there is some
1336 work that needs attention like an event on the completion queue or an
1337 alarm */
1338 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1339 pollset->kicked_without_pollers = 0;
1340 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001341 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001342 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1343 worker that there is some pending work that needs immediate attention
1344 (like an event on the completion queue, or a polling island merge that
1345 results in a new epoll-fd to wait on) and that the worker should not
1346 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001347
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001348 A worker can be kicked anytime from the point it is added to the pollset
1349 via push_front_worker() (or push_back_worker()) to the point it is
1350 removed via remove_worker().
1351 If the worker is kicked before/during it calls epoll_pwait(), it should
1352 immediately exit from epoll_wait(). If the worker is kicked after it
1353 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001354
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001355 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001356 times *except* when it is in epoll_pwait(). This way, the worker never
1357 misses acting on a kick */
1358
Craig Tiller19196992016-06-27 18:45:56 -07001359 if (!g_initialized_sigmask) {
1360 sigemptyset(&new_mask);
1361 sigaddset(&new_mask, grpc_wakeup_signal);
1362 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1363 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1364 g_initialized_sigmask = true;
1365 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1366 This is the mask used at all times *except during
1367 epoll_wait()*"
1368 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001369 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001370
Craig Tiller19196992016-06-27 18:45:56 -07001371 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001372 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001373 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001374
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001375 push_front_worker(pollset, &worker); /* Add worker to pollset */
1376
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001377 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1378 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001379 grpc_exec_ctx_flush(exec_ctx);
1380
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001381 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001382
1383 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1384 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001385 remove_worker(pollset, &worker);
1386 }
1387
1388 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1389 false at this point) and the pollset is shutting down, we may have to
1390 finish the shutdown process by calling finish_shutdown_locked().
1391 See pollset_shutdown() for more details.
1392
1393 Note: Continuing to access pollset here is safe; it is the caller's
1394 responsibility to not destroy a pollset when it has outstanding calls to
1395 pollset_work() */
1396 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1397 !pollset->finish_shutdown_called) {
1398 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1399 finish_shutdown_locked(exec_ctx, pollset);
1400
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001401 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001402 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001403 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001404 }
1405
Craig Tiller557c88c2017-04-05 17:20:18 -07001406 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001407
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001408 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1409 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001410
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001411 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001412
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001413 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1414 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001415}
1416
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001417static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001418 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001419 poll_obj_type item_type) {
1420 GPR_TIMER_BEGIN("add_poll_object", 0);
1421
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001422#ifdef PO_DEBUG
1423 GPR_ASSERT(item->obj_type == item_type);
1424 GPR_ASSERT(bag->obj_type == bag_type);
1425#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001426
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001427 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001428 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001429
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001430 gpr_mu_lock(&bag->mu);
1431 gpr_mu_lock(&item->mu);
1432
Craig Tiller7212c232016-07-06 13:11:09 -07001433retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001434 /*
1435 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1436 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1437 * a refcount of 2) and point item->pi and bag->pi to the new island
1438 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1439 * the other's non-NULL pi
1440 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1441 * polling islands and update item->pi and bag->pi to point to the new
1442 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001443 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001444
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001445 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1446 * orphaned */
1447 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1448 gpr_mu_unlock(&item->mu);
1449 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001450 return;
1451 }
1452
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001453 if (item->pi == bag->pi) {
1454 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001455 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001456 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001457
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001458 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1459 * we need to do some extra work to make TSAN happy */
1460 if (item_type == POLL_OBJ_FD) {
1461 /* Unlock before creating a new polling island: the polling island will
1462 create a workqueue which creates a file descriptor, and holding an fd
1463 lock here can eventually cause a loop to appear to TSAN (making it
1464 unhappy). We don't think it's a real loop (there's an epoch point
1465 where that loop possibility disappears), but the advantages of
1466 keeping TSAN happy outweigh any performance advantage we might have
1467 by keeping the lock held. */
1468 gpr_mu_unlock(&item->mu);
1469 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1470 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001471
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001472 /* Need to reverify any assumptions made between the initial lock and
1473 getting to this branch: if they've changed, we need to throw away our
1474 work and figure things out again. */
1475 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001476 GRPC_POLLING_TRACE(
1477 "add_poll_object: Raced creating new polling island. pi_new: %p "
1478 "(fd: %d, %s: %p)",
1479 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1480 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001481 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001482 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001483 polling_island_remove_all_fds_locked(pi_new, true, &error);
1484
1485 /* Ref and unref so that the polling island gets deleted during unref
1486 */
1487 PI_ADD_REF(pi_new, "dance_of_destruction");
1488 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1489 goto retry;
1490 }
Craig Tiller27da6422016-07-06 13:14:46 -07001491 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001492 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001493 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001494
1495 GRPC_POLLING_TRACE(
1496 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1497 "%s: %p)",
1498 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1499 poll_obj_string(bag_type), (void *)bag);
1500 } else {
1501 GRPC_POLLING_TRACE(
1502 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1503 (void *)pi_new, poll_obj_string(item_type),
1504 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001505 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001506 } else if (item->pi == NULL) {
1507 /* GPR_ASSERT(bag->pi != NULL) */
1508 /* Make pi_new point to latest pi*/
1509 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001510
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001511 if (item_type == POLL_OBJ_FD) {
1512 grpc_fd *fd = FD_FROM_PO(item);
1513 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1514 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001515
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001516 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001517 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001518 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1519 "bag(%s): %p)",
1520 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1521 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001522 } else if (bag->pi == NULL) {
1523 /* GPR_ASSERT(item->pi != NULL) */
1524 /* Make pi_new to point to latest pi */
1525 pi_new = polling_island_lock(item->pi);
1526 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001527 GRPC_POLLING_TRACE(
1528 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1529 "bag(%s): %p)",
1530 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1531 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001532 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001533 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001534 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001535 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1536 "bag(%s): %p)",
1537 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1538 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001539 }
1540
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001541 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1542 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001543
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001544 if (item->pi != pi_new) {
1545 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1546 if (item->pi != NULL) {
1547 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001548 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001549 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001550 }
1551
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001552 if (bag->pi != pi_new) {
1553 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1554 if (bag->pi != NULL) {
1555 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001556 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001557 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001558 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001559
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001560 gpr_mu_unlock(&item->mu);
1561 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001562
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001563 GRPC_LOG_IF_ERROR("add_poll_object", error);
1564 GPR_TIMER_END("add_poll_object", 0);
1565}
Craig Tiller57726ca2016-09-12 11:59:45 -07001566
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001567static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1568 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001569 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001570 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001571}
1572
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001573/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001574 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001575 */
1576
1577static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001578 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001579 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001580 pss->po.pi = NULL;
1581#ifdef PO_DEBUG
1582 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1583#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001584 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001585}
1586
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001587static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1588 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001589 gpr_mu_destroy(&pss->po.mu);
1590
1591 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001592 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001593 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001594
1595 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001596}
1597
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001598static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1599 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001600 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001601 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001602}
1603
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001604static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1605 grpc_fd *fd) {
1606 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001607}
1608
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001609static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001610 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001611 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001612 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001613}
1614
1615static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001616 grpc_pollset_set *pss, grpc_pollset *ps) {
1617 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001618}
1619
1620static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1621 grpc_pollset_set *bag,
1622 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001623 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001624 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001625}
1626
1627static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1628 grpc_pollset_set *bag,
1629 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001630 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001631}
1632
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001633/* Test helper functions
1634 * */
1635void *grpc_fd_get_polling_island(grpc_fd *fd) {
1636 polling_island *pi;
1637
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001638 gpr_mu_lock(&fd->po.mu);
1639 pi = fd->po.pi;
1640 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001641
1642 return pi;
1643}
1644
1645void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1646 polling_island *pi;
1647
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001648 gpr_mu_lock(&ps->po.mu);
1649 pi = ps->po.pi;
1650 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001651
1652 return pi;
1653}
1654
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001655bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001656 polling_island *p1 = p;
1657 polling_island *p2 = q;
1658
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001659 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1660 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001661 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001662 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001663
1664 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001665}
1666
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001667/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001668 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001669 */
1670
1671static void shutdown_engine(void) {
1672 fd_global_shutdown();
1673 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001674 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001675}
1676
1677static const grpc_event_engine_vtable vtable = {
1678 .pollset_size = sizeof(grpc_pollset),
1679
1680 .fd_create = fd_create,
1681 .fd_wrapped_fd = fd_wrapped_fd,
1682 .fd_orphan = fd_orphan,
1683 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001684 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001685 .fd_notify_on_read = fd_notify_on_read,
1686 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001687 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001688
1689 .pollset_init = pollset_init,
1690 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001691 .pollset_destroy = pollset_destroy,
1692 .pollset_work = pollset_work,
1693 .pollset_kick = pollset_kick,
1694 .pollset_add_fd = pollset_add_fd,
1695
1696 .pollset_set_create = pollset_set_create,
1697 .pollset_set_destroy = pollset_set_destroy,
1698 .pollset_set_add_pollset = pollset_set_add_pollset,
1699 .pollset_set_del_pollset = pollset_set_del_pollset,
1700 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1701 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1702 .pollset_set_add_fd = pollset_set_add_fd,
1703 .pollset_set_del_fd = pollset_set_del_fd,
1704
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001705 .shutdown_engine = shutdown_engine,
1706};
1707
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001708/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1709 * Create a dummy epoll_fd to make sure epoll support is available */
1710static bool is_epoll_available() {
1711 int fd = epoll_create1(EPOLL_CLOEXEC);
1712 if (fd < 0) {
1713 gpr_log(
1714 GPR_ERROR,
1715 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1716 fd);
1717 return false;
1718 }
1719 close(fd);
1720 return true;
1721}
1722
Craig Tillerf8382b82017-04-27 15:09:48 -07001723const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1724 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001725 /* If use of signals is disabled, we cannot use epoll engine*/
1726 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1727 return NULL;
1728 }
1729
Ken Paysoncd7d0472016-10-11 12:24:20 -07001730 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001731 return NULL;
1732 }
1733
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001734 if (!is_epoll_available()) {
1735 return NULL;
1736 }
1737
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001738 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001739 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1740 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001741 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001742 grpc_use_signal(SIGRTMIN + 6);
1743 } else {
1744 return NULL;
1745 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001746 }
1747
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001748 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001749
1750 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1751 return NULL;
1752 }
1753
1754 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1755 polling_island_global_init())) {
1756 return NULL;
1757 }
1758
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001759 return &vtable;
1760}
1761
murgatroid99623dd4f2016-08-08 17:31:27 -07001762#else /* defined(GRPC_LINUX_EPOLL) */
1763#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001764#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001765/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001766 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001767const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1768 bool explicit_request) {
1769 return NULL;
1770}
murgatroid99623dd4f2016-08-08 17:31:27 -07001771#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001772
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001773void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001774#endif /* !defined(GRPC_LINUX_EPOLL) */