blob: 90f730e66a39bcfa9e17dc560de648568c61adc0 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070021/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070022#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070023
Craig Tiller4509c472017-04-27 19:05:13 +000024#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
26#include <assert.h>
27#include <errno.h>
28#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070029#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030#include <signal.h>
31#include <string.h>
32#include <sys/epoll.h>
33#include <sys/socket.h>
34#include <unistd.h>
35
36#include <grpc/support/alloc.h>
37#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
42#include "src/core/lib/iomgr/ev_posix.h"
43#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070044#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070045#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070046#include "src/core/lib/iomgr/wakeup_fd_posix.h"
47#include "src/core/lib/profiling/timers.h"
48#include "src/core/lib/support/block_annotate.h"
49
Craig Tillere24b24d2017-04-06 16:05:45 -070050#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
51
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070052#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070053 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070054 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070055 }
56
Sree Kuchibhotla82d73412017-02-09 18:27:45 -080057/* Uncomment the following to enable extra checks on poll_object operations */
Sree Kuchibhotlae6f516e2016-12-08 12:20:23 -080058/* #define PO_DEBUG */
59
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070060static int grpc_wakeup_signal = -1;
61static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070062
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070063/* Implements the function defined in grpc_posix.h. This function might be
64 * called before even calling grpc_init() to set either a different signal to
65 * use. If signum == -1, then the use of signals is disabled */
66void grpc_use_signal(int signum) {
67 grpc_wakeup_signal = signum;
68 is_grpc_wakeup_signal_initialized = true;
69
70 if (grpc_wakeup_signal < 0) {
71 gpr_log(GPR_INFO,
72 "Use of signals is disabled. Epoll engine will not be used");
73 } else {
74 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
75 grpc_wakeup_signal);
76 }
77}
78
79struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070080
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080081typedef enum {
82 POLL_OBJ_FD,
83 POLL_OBJ_POLLSET,
84 POLL_OBJ_POLLSET_SET
85} poll_obj_type;
86
87typedef struct poll_obj {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080088#ifdef PO_DEBUG
89 poll_obj_type obj_type;
90#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080091 gpr_mu mu;
92 struct polling_island *pi;
93} poll_obj;
94
95const char *poll_obj_string(poll_obj_type po_type) {
96 switch (po_type) {
97 case POLL_OBJ_FD:
98 return "fd";
99 case POLL_OBJ_POLLSET:
100 return "pollset";
101 case POLL_OBJ_POLLSET_SET:
102 return "pollset_set";
103 }
104
105 GPR_UNREACHABLE_CODE(return "UNKNOWN");
106}
107
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700108/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700109 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700110 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800111
112#define FD_FROM_PO(po) ((grpc_fd *)(po))
113
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700114struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115 poll_obj po;
116
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700117 int fd;
118 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700119 bit 0 : 1=Active / 0=Orphaned
120 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700121 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700122 gpr_atm refst;
123
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800124 /* The fd is either closed or we relinquished control of it. In either
125 cases, this indicates that the 'fd' on this structure is no longer
126 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700127 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700128
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800129 gpr_atm read_closure;
130 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700131
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133 grpc_closure *on_done_closure;
134
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800135 /* The pollset that last noticed that the fd is readable. The actual type
136 * stored in this is (grpc_pollset *) */
137 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700138
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700139 grpc_iomgr_object iomgr_object;
140};
141
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700142/* Reference counting for fds */
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700143// #define GRPC_FD_REF_COUNT_DEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700144#ifdef GRPC_FD_REF_COUNT_DEBUG
145static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
146static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
147 int line);
148#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
149#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
150#else
151static void fd_ref(grpc_fd *fd);
152static void fd_unref(grpc_fd *fd);
153#define GRPC_FD_REF(fd, reason) fd_ref(fd)
154#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
155#endif
156
157static void fd_global_init(void);
158static void fd_global_shutdown(void);
159
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700162 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700163
ncteisena1354852017-06-08 16:25:53 -0700164#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700166#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700167#define PI_UNREF(exec_ctx, p, r) \
168 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisena1354852017-06-08 16:25:53 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700173#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
206 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700218 struct grpc_pollset_worker *next;
219 struct grpc_pollset_worker *prev;
220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
230 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerf975f742016-07-01 14:56:27 -0700244static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700245 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700269static __thread polling_island *g_current_thread_polling_island;
270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700272static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerb39307d2016-06-30 15:39:13 -0700285static void pi_add_ref(polling_island *pi);
286static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
289grpc_tracer_flag grpc_trace_workqueue_refcount = GRPC_TRACER_INITIALIZER(false);
Craig Tillera10b0b12016-09-09 16:20:07 -0700290static void pi_add_ref_dbg(polling_island *pi, const char *reason,
291 const char *file, int line) {
ncteisena1354852017-06-08 16:25:53 -0700292 if (GRPC_TRACER_ON(grpc_trace_workqueue_refcount)) {
293 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
294 gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700295 (void *)pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700296 }
297 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700298}
299
Craig Tillerb39307d2016-06-30 15:39:13 -0700300static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700301 const char *reason, const char *file, int line) {
ncteisena1354852017-06-08 16:25:53 -0700302 if (GRPC_TRACER_ON(grpc_trace_workqueue_refcount)) {
303 long old_cnt = gpr_atm_acq_load(&pi->ref_count);
304 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700305 (void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700306 }
307 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700308}
309#endif
310
Craig Tiller15007612016-07-06 09:36:16 -0700311static void pi_add_ref(polling_island *pi) {
312 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
313}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700314
Craig Tillerb39307d2016-06-30 15:39:13 -0700315static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700316 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700317 Note that this deletion not be done under a lock. Once the ref count goes
318 to zero, we are guaranteed that no one else holds a reference to the
319 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700320
321 Also, if we are deleting the polling island and the merged_to field is
322 non-empty, we should remove a ref to the merged_to polling island
323 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700324 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
325 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
326 polling_island_delete(exec_ctx, pi);
327 if (next != NULL) {
328 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700329 }
330 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700331}
332
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700333/* The caller is expected to hold pi->mu lock before calling this function */
334static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700335 size_t fd_count, bool add_fd_refs,
336 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700337 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700338 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700339 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700340 char *err_msg;
341 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700342
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700343#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700344 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700345 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700346#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700347
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700348 for (i = 0; i < fd_count; i++) {
349 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
350 ev.data.ptr = fds[i];
351 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700352
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700353 if (err < 0) {
354 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700355 gpr_asprintf(
356 &err_msg,
357 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
358 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
359 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
360 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700361 }
362
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700363 continue;
364 }
365
366 if (pi->fd_cnt == pi->fd_capacity) {
367 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
368 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
369 }
370
371 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700372 if (add_fd_refs) {
373 GRPC_FD_REF(fds[i], "polling_island");
374 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700375 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700376}
377
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700378/* The caller is expected to hold pi->mu before calling this */
379static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700380 grpc_wakeup_fd *wakeup_fd,
381 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700382 struct epoll_event ev;
383 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700384 char *err_msg;
385 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700386
387 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
388 ev.data.ptr = wakeup_fd;
389 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
390 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700391 if (err < 0 && errno != EEXIST) {
392 gpr_asprintf(&err_msg,
393 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
394 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700395 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
396 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700397 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
398 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700399 }
400}
401
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700402/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700403static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700404 bool remove_fd_refs,
405 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700406 int err;
407 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700408 char *err_msg;
409 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700410
411 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700412 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700413 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700414 gpr_asprintf(&err_msg,
415 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
416 "error: %d (%s)",
417 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
418 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
419 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700420 }
421
422 if (remove_fd_refs) {
423 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700424 }
425 }
426
427 pi->fd_cnt = 0;
428}
429
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700430/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700431static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700432 bool is_fd_closed,
433 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700434 int err;
435 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700436 char *err_msg;
437 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700438
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700439 /* If fd is already closed, then it would have been automatically been removed
440 from the epoll set */
441 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700442 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
443 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700444 gpr_asprintf(
445 &err_msg,
446 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
447 pi->epoll_fd, fd->fd, errno, strerror(errno));
448 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
449 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700450 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700451 }
452
453 for (i = 0; i < pi->fd_cnt; i++) {
454 if (pi->fds[i] == fd) {
455 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700456 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700457 break;
458 }
459 }
460}
461
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700462/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700463static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
464 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700465 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700466 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700467 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700468
Craig Tillerb39307d2016-06-30 15:39:13 -0700469 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700470
Craig Tillerb39307d2016-06-30 15:39:13 -0700471 pi = gpr_malloc(sizeof(*pi));
472 gpr_mu_init(&pi->mu);
473 pi->fd_cnt = 0;
474 pi->fd_capacity = 0;
475 pi->fds = NULL;
476 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700477
Craig Tiller15007612016-07-06 09:36:16 -0700478 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700479 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700480 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700481
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700482 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700483
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700484 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700485 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
486 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700487 }
488
Craig Tillerb39307d2016-06-30 15:39:13 -0700489 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700490 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700491 }
492
Craig Tillerb39307d2016-06-30 15:39:13 -0700493done:
494 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700495 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700496 pi = NULL;
497 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700498 return pi;
499}
500
Craig Tillerb39307d2016-06-30 15:39:13 -0700501static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700502 GPR_ASSERT(pi->fd_cnt == 0);
503
Craig Tiller0a06cd72016-07-14 13:21:24 -0700504 if (pi->epoll_fd >= 0) {
505 close(pi->epoll_fd);
506 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700507 gpr_mu_destroy(&pi->mu);
508 gpr_free(pi->fds);
509 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700510}
511
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700512/* Attempts to gets the last polling island in the linked list (liked by the
513 * 'merged_to' field). Since this does not lock the polling island, there are no
514 * guarantees that the island returned is the last island */
515static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
516 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
517 while (next != NULL) {
518 pi = next;
519 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
520 }
521
522 return pi;
523}
524
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700525/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700526 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700527 returned polling island's mu.
528 Usage: To lock/unlock polling island "pi", do the following:
529 polling_island *pi_latest = polling_island_lock(pi);
530 ...
531 ... critical section ..
532 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700533 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
534static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700535 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700536
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700537 while (true) {
538 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
539 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700540 /* Looks like 'pi' is the last node in the linked list but unless we check
541 this by holding the pi->mu lock, we cannot be sure (i.e without the
542 pi->mu lock, we don't prevent island merges).
543 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700544 gpr_mu_lock(&pi->mu);
545 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
546 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700547 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700548 break;
549 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700550
551 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
552 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700553 gpr_mu_unlock(&pi->mu);
554 }
555
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700556 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700557 }
558
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700559 return pi;
560}
561
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700562/* Gets the lock on the *latest* polling islands in the linked lists pointed by
563 *p and *q (and also updates *p and *q to point to the latest polling islands)
564
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700565 This function is needed because calling the following block of code to obtain
566 locks on polling islands (*p and *q) is prone to deadlocks.
567 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700568 polling_island_lock(*p, true);
569 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700570 }
571
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700572 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700573 polling_island *p1;
574 polling_island *p2;
575 ..
576 polling_island_lock_pair(&p1, &p2);
577 ..
578 .. Critical section with both p1 and p2 locked
579 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700580 // Release locks: Always call polling_island_unlock_pair() to release locks
581 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700582*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700583static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700584 polling_island *pi_1 = *p;
585 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700586 polling_island *next_1 = NULL;
587 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700588
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700589 /* The algorithm is simple:
590 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
591 keep updating pi_1 and pi_2)
592 - Then obtain locks on the islands by following a lock order rule of
593 locking polling_island with lower address first
594 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
595 pointing to the same island. If that is the case, we can just call
596 polling_island_lock()
597 - After obtaining both the locks, double check that the polling islands
598 are still the last polling islands in their respective linked lists
599 (this is because there might have been polling island merges before
600 we got the lock)
601 - If the polling islands are the last islands, we are done. If not,
602 release the locks and continue the process from the first step */
603 while (true) {
604 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
605 while (next_1 != NULL) {
606 pi_1 = next_1;
607 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700608 }
609
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700610 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
611 while (next_2 != NULL) {
612 pi_2 = next_2;
613 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
614 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700615
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700616 if (pi_1 == pi_2) {
617 pi_1 = pi_2 = polling_island_lock(pi_1);
618 break;
619 }
620
621 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700622 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700623 gpr_mu_lock(&pi_2->mu);
624 } else {
625 gpr_mu_lock(&pi_2->mu);
626 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700627 }
628
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700629 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
630 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
631 if (next_1 == NULL && next_2 == NULL) {
632 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700633 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700634
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700635 gpr_mu_unlock(&pi_1->mu);
636 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700637 }
638
639 *p = pi_1;
640 *q = pi_2;
641}
642
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700643static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
644 if (p == q) {
645 gpr_mu_unlock(&p->mu);
646 } else {
647 gpr_mu_unlock(&p->mu);
648 gpr_mu_unlock(&q->mu);
649 }
650}
651
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700652static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700653 polling_island *q,
654 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700655 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700656 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700657
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700658 if (p != q) {
659 /* Make sure that p points to the polling island with fewer fds than q */
660 if (p->fd_cnt > q->fd_cnt) {
661 GPR_SWAP(polling_island *, p, q);
662 }
663
664 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
665 Note that the refcounts on the fds being moved will not change here.
666 This is why the last param in the following two functions is 'false') */
667 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
668 polling_island_remove_all_fds_locked(p, false, error);
669
670 /* Wakeup all the pollers (if any) on p so that they pickup this change */
671 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
672
673 /* Add the 'merged_to' link from p --> q */
674 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
675 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700676 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700677 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700678
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700679 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700680
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700681 /* Return the merged polling island (Note that no merge would have happened
682 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700683 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700684}
685
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700686static grpc_error *polling_island_global_init() {
687 grpc_error *error = GRPC_ERROR_NONE;
688
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700689 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
690 if (error == GRPC_ERROR_NONE) {
691 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
692 }
693
694 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700695}
696
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700697static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700698 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700699}
700
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700701/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700702 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700703 */
704
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700705/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700706 * but instead so that implementations with multiple threads in (for example)
707 * epoll_wait deal with the race between pollset removal and incoming poll
708 * notifications.
709 *
710 * The problem is that the poller ultimately holds a reference to this
711 * object, so it is very difficult to know when is safe to free it, at least
712 * without some expensive synchronization.
713 *
714 * If we keep the object freelisted, in the worst case losing this race just
715 * becomes a spurious read notification on a reused fd.
716 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700717
718/* The alarm system needs to be able to wakeup 'some poller' sometimes
719 * (specifically when a new alarm needs to be triggered earlier than the next
720 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
721 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700722
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700723static grpc_fd *fd_freelist = NULL;
724static gpr_mu fd_freelist_mu;
725
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700726#ifdef GRPC_FD_REF_COUNT_DEBUG
727#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
728#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
729static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
730 int line) {
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700731 gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd,
732 (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700733 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
734#else
735#define REF_BY(fd, n, reason) ref_by(fd, n)
736#define UNREF_BY(fd, n, reason) unref_by(fd, n)
737static void ref_by(grpc_fd *fd, int n) {
738#endif
739 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
740}
741
742#ifdef GRPC_FD_REF_COUNT_DEBUG
743static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
744 int line) {
745 gpr_atm old;
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700746 gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd,
747 (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700748 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
749#else
750static void unref_by(grpc_fd *fd, int n) {
751 gpr_atm old;
752#endif
753 old = gpr_atm_full_fetch_add(&fd->refst, -n);
754 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700755 /* Add the fd to the freelist */
756 gpr_mu_lock(&fd_freelist_mu);
757 fd->freelist_next = fd_freelist;
758 fd_freelist = fd;
759 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800760
Craig Tiller376887d2017-04-06 08:27:03 -0700761 grpc_lfev_destroy(&fd->read_closure);
762 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700763
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700764 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700765 } else {
766 GPR_ASSERT(old > n);
767 }
768}
769
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700770/* Increment refcount by two to avoid changing the orphan bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700771#ifdef GRPC_FD_REF_COUNT_DEBUG
772static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
773 int line) {
774 ref_by(fd, 2, reason, file, line);
775}
776
777static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
778 int line) {
779 unref_by(fd, 2, reason, file, line);
780}
781#else
782static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700783static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
784#endif
785
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700786static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
787
788static void fd_global_shutdown(void) {
789 gpr_mu_lock(&fd_freelist_mu);
790 gpr_mu_unlock(&fd_freelist_mu);
791 while (fd_freelist != NULL) {
792 grpc_fd *fd = fd_freelist;
793 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800794 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700795 gpr_free(fd);
796 }
797 gpr_mu_destroy(&fd_freelist_mu);
798}
799
800static grpc_fd *fd_create(int fd, const char *name) {
801 grpc_fd *new_fd = NULL;
802
803 gpr_mu_lock(&fd_freelist_mu);
804 if (fd_freelist != NULL) {
805 new_fd = fd_freelist;
806 fd_freelist = fd_freelist->freelist_next;
807 }
808 gpr_mu_unlock(&fd_freelist_mu);
809
810 if (new_fd == NULL) {
811 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800812 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700813 }
814
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800815 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
816 * is a newly created fd (or an fd we got from the freelist), no one else
817 * would be holding a lock to it anyway. */
818 gpr_mu_lock(&new_fd->po.mu);
819 new_fd->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800820#ifdef PO_DEBUG
821 new_fd->po.obj_type = POLL_OBJ_FD;
822#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700823
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700824 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700825 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700826 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700827 grpc_lfev_init(&new_fd->read_closure);
828 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800829 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800830
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700831 new_fd->freelist_next = NULL;
832 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700833
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800834 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700835
836 char *fd_name;
837 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
838 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700839#ifdef GRPC_FD_REF_COUNT_DEBUG
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700840 gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841#endif
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700842 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700843 return new_fd;
844}
845
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700846static int fd_wrapped_fd(grpc_fd *fd) {
847 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800848 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700849 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700850 ret_fd = fd->fd;
851 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800852 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700853
854 return ret_fd;
855}
856
857static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
858 grpc_closure *on_done, int *release_fd,
859 const char *reason) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700860 bool is_fd_closed = false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700861 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700862 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700863
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800864 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700865 fd->on_done_closure = on_done;
866
867 /* If release_fd is not NULL, we should be relinquishing control of the file
868 descriptor fd->fd (but we still own the grpc_fd structure). */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700869 if (release_fd != NULL) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700870 *release_fd = fd->fd;
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700871 } else {
872 close(fd->fd);
873 is_fd_closed = true;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700874 }
875
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700876 fd->orphaned = true;
877
878 /* Remove the active status but keep referenced. We want this grpc_fd struct
879 to be alive (and not added to freelist) until the end of this function */
880 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700881
882 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700883 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800884 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700885 would actually contain the fd
886 - Remove the fd from the latest polling island
887 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800888 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700889 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800890 if (fd->po.pi != NULL) {
891 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700892 polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700893 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700894
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800895 unref_pi = fd->po.pi;
896 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700897 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700898
ncteisen969b46e2017-06-08 14:57:11 -0700899 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700900
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800901 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700902 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700903 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700904 /* Unref stale polling island here, outside the fd lock above.
905 The polling island owns a workqueue which owns an fd, and unreffing
906 inside the lock can cause an eventual lock loop that makes TSAN very
907 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700908 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
909 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700910 if (error != GRPC_ERROR_NONE) {
911 const char *msg = grpc_error_string(error);
912 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
913 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700914 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700915}
916
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700917static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
918 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800919 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800920 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700921}
922
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700923static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700924 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700925}
926
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700927/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800928static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700929 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
930 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700931 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700932 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700933 }
Craig Tiller376887d2017-04-06 08:27:03 -0700934 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700935}
936
937static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
938 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700939 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700940}
941
942static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
943 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700944 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700945}
946
947/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700948 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700949 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700950GPR_TLS_DECL(g_current_thread_pollset);
951GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700952static __thread bool g_initialized_sigmask;
953static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700954
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700955static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700956#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700957 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700958#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700959}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700960
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700961static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700962
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700963/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700964static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700965 gpr_tls_init(&g_current_thread_pollset);
966 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700967 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700968 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700969}
970
971static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700972 gpr_tls_destroy(&g_current_thread_pollset);
973 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700974}
975
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700976static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
977 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700978
979 /* Kick the worker only if it was not already kicked */
980 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
981 GRPC_POLLING_TRACE(
982 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700983 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700984 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
985 if (err_num != 0) {
986 err = GRPC_OS_ERROR(err_num, "pthread_kill");
987 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700988 }
989 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700990}
991
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700992/* Return 1 if the pollset has active threads in pollset_work (pollset must
993 * be locked) */
994static int pollset_has_workers(grpc_pollset *p) {
995 return p->root_worker.next != &p->root_worker;
996}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700997
998static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
999 worker->prev->next = worker->next;
1000 worker->next->prev = worker->prev;
1001}
1002
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001003static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1004 if (pollset_has_workers(p)) {
1005 grpc_pollset_worker *w = p->root_worker.next;
1006 remove_worker(p, w);
1007 return w;
1008 } else {
1009 return NULL;
1010 }
1011}
1012
1013static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1014 worker->next = &p->root_worker;
1015 worker->prev = worker->next->prev;
1016 worker->prev->next = worker->next->prev = worker;
1017}
1018
1019static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1020 worker->prev = &p->root_worker;
1021 worker->next = worker->prev->next;
1022 worker->prev->next = worker->next->prev = worker;
1023}
1024
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001025/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001026static grpc_error *pollset_kick(grpc_pollset *p,
1027 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001028 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001029 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001030 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001031 grpc_pollset_worker *worker = specific_worker;
1032 if (worker != NULL) {
1033 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001035 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001036 for (worker = p->root_worker.next; worker != &p->root_worker;
1037 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001038 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001039 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001040 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001041 }
Craig Tillera218a062016-06-26 09:58:37 -07001042 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001043 } else {
1044 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001045 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001046 } else {
1047 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001048 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001049 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001050 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001051 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1053 /* Since worker == NULL, it means that we can kick "any" worker on this
1054 pollset 'p'. If 'p' happens to be the same pollset this thread is
1055 currently polling (i.e in pollset_work() function), then there is no need
1056 to kick any other worker since the current thread can just absorb the
1057 kick. This is the reason why we enter this case only when
1058 g_current_thread_pollset is != p */
1059
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001060 GPR_TIMER_MARK("kick_anonymous", 0);
1061 worker = pop_front_worker(p);
1062 if (worker != NULL) {
1063 GPR_TIMER_MARK("finally_kick", 0);
1064 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001065 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001066 } else {
1067 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001068 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001069 }
1070 }
1071
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001072 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001073 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1074 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001075}
1076
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001077static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001078 gpr_mu_init(&pollset->po.mu);
1079 *mu = &pollset->po.mu;
1080 pollset->po.pi = NULL;
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001081#ifdef PO_DEBUG
1082 pollset->po.obj_type = POLL_OBJ_POLLSET;
1083#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001084
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001085 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001086 pollset->kicked_without_pollers = false;
1087
1088 pollset->shutting_down = false;
1089 pollset->finish_shutdown_called = false;
1090 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001091}
1092
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001093/* Convert a timespec to milliseconds:
1094 - Very small or negative poll times are clamped to zero to do a non-blocking
1095 poll (which becomes spin polling)
1096 - Other small values are rounded up to one millisecond
1097 - Longer than a millisecond polls are rounded up to the next nearest
1098 millisecond to avoid spinning
1099 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001100static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1101 gpr_timespec now) {
1102 gpr_timespec timeout;
1103 static const int64_t max_spin_polling_us = 10;
1104 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1105 return -1;
1106 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001107
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001108 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1109 max_spin_polling_us,
1110 GPR_TIMESPAN))) <= 0) {
1111 return 0;
1112 }
1113 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001114 int millis = gpr_time_to_millis(gpr_time_add(
1115 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1116 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001117}
1118
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001119static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1120 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001121 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001122
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001123 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001124 different 'notifier's when an fd becomes readable and it is in two epoll
1125 sets (This can happen briefly during polling island merges). In such cases
1126 it does not really matter which notifer is set as the read_notifier_pollset
1127 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001128 /* Use release store to match with acquire load in fd_get_read_notifier */
1129 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001130}
1131
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001132static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001133 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001134}
1135
Craig Tillerb39307d2016-06-30 15:39:13 -07001136static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1137 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001138 if (ps->po.pi != NULL) {
1139 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001140 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001141 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001142}
1143
1144static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1145 grpc_pollset *pollset) {
1146 /* The pollset cannot have any workers if we are at this stage */
1147 GPR_ASSERT(!pollset_has_workers(pollset));
1148
1149 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001150
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001151 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001152 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001153 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001154}
1155
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001156/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001157static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1158 grpc_closure *closure) {
1159 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1160 GPR_ASSERT(!pollset->shutting_down);
1161 pollset->shutting_down = true;
1162 pollset->shutdown_done = closure;
1163 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1164
1165 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1166 because it would release the underlying polling island. In such a case, we
1167 let the last worker call finish_shutdown_locked() from pollset_work() */
1168 if (!pollset_has_workers(pollset)) {
1169 GPR_ASSERT(!pollset->finish_shutdown_called);
1170 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1171 finish_shutdown_locked(exec_ctx, pollset);
1172 }
1173 GPR_TIMER_END("pollset_shutdown", 0);
1174}
1175
1176/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1177 * than destroying the mutexes, there is nothing special that needs to be done
1178 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001179static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001180 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001181 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001182}
1183
Craig Tiller84ea3412016-09-08 14:57:56 -07001184#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001185/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1186static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001187 grpc_pollset *pollset,
1188 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001189 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001190 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001191 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001192 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001193 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001194 char *err_msg;
1195 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001196 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1197
1198 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001199 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001200
1201 Since epoll_fd is immutable, we can read it without obtaining the polling
1202 island lock. There is however a possibility that the polling island (from
1203 which we got the epoll_fd) got merged with another island while we are
1204 in this function. This is still okay because in such a case, we will wakeup
1205 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001206 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001207
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001208 if (pollset->po.pi == NULL) {
1209 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1210 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001211 GPR_TIMER_END("pollset_work_and_unlock", 0);
1212 return; /* Fatal error. We cannot continue */
1213 }
1214
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001215 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001216 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001217 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001218 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001219
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001220 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001221 epoll_fd = pi->epoll_fd;
1222
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001223 /* Update the pollset->po.pi since the island being pointed by
1224 pollset->po.pi maybe older than the one pointed by pi) */
1225 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001226 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1227 polling island to be deleted */
1228 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001229 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1230 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001231 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001232
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001233 /* Add an extra ref so that the island does not get destroyed (which means
1234 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1235 epoll_fd */
1236 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001237 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001238
Craig Tiller61f96c12017-05-12 13:36:39 -07001239 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1240 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001241
Craig Tiller61f96c12017-05-12 13:36:39 -07001242 GRPC_SCHEDULING_START_BLOCKING_REGION;
1243 ep_rv =
1244 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1245 GRPC_SCHEDULING_END_BLOCKING_REGION;
1246 if (ep_rv < 0) {
1247 if (errno != EINTR) {
1248 gpr_asprintf(&err_msg,
1249 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1250 epoll_fd, errno, strerror(errno));
1251 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1252 } else {
1253 /* We were interrupted. Save an interation by doing a zero timeout
1254 epoll_wait to see if there are any other events of interest */
1255 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1256 (void *)pollset, (void *)worker);
1257 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001258 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001259 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001260
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001261#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001262 /* See the definition of g_poll_sync for more details */
1263 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001264#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001265
Craig Tiller61f96c12017-05-12 13:36:39 -07001266 for (int i = 0; i < ep_rv; ++i) {
1267 void *data_ptr = ep_ev[i].data.ptr;
1268 if (data_ptr == &polling_island_wakeup_fd) {
1269 GRPC_POLLING_TRACE(
1270 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1271 "%d) got merged",
1272 (void *)pollset, (void *)worker, epoll_fd);
1273 /* This means that our polling island is merged with a different
1274 island. We do not have to do anything here since the subsequent call
1275 to the function pollset_work_and_unlock() will pick up the correct
1276 epoll_fd */
1277 } else {
1278 grpc_fd *fd = data_ptr;
1279 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1280 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1281 int write_ev = ep_ev[i].events & EPOLLOUT;
1282 if (read_ev || cancel) {
1283 fd_become_readable(exec_ctx, fd, pollset);
1284 }
1285 if (write_ev || cancel) {
1286 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001287 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001288 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001289 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001290
Craig Tiller61f96c12017-05-12 13:36:39 -07001291 g_current_thread_polling_island = NULL;
1292 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1293
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001294 GPR_ASSERT(pi != NULL);
1295
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001296 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001297 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001298 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001299 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001300 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001301 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001302
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001303 GPR_TIMER_END("pollset_work_and_unlock", 0);
1304}
1305
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001306/* pollset->po.mu lock must be held by the caller before calling this.
1307 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001308 during the course of its execution but it will always re-acquire the lock and
1309 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001310static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1311 grpc_pollset_worker **worker_hdl,
1312 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001313 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001314 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001315 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1316
1317 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001318
1319 grpc_pollset_worker worker;
1320 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001321 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001322 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001323
Craig Tiller557c88c2017-04-05 17:20:18 -07001324 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001325
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001326 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1327 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001328
1329 if (pollset->kicked_without_pollers) {
1330 /* If the pollset was kicked without pollers, pretend that the current
1331 worker got the kick and skip polling. A kick indicates that there is some
1332 work that needs attention like an event on the completion queue or an
1333 alarm */
1334 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1335 pollset->kicked_without_pollers = 0;
1336 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001337 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001338 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1339 worker that there is some pending work that needs immediate attention
1340 (like an event on the completion queue, or a polling island merge that
1341 results in a new epoll-fd to wait on) and that the worker should not
1342 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001343
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001344 A worker can be kicked anytime from the point it is added to the pollset
1345 via push_front_worker() (or push_back_worker()) to the point it is
1346 removed via remove_worker().
1347 If the worker is kicked before/during it calls epoll_pwait(), it should
1348 immediately exit from epoll_wait(). If the worker is kicked after it
1349 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001350
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001351 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001352 times *except* when it is in epoll_pwait(). This way, the worker never
1353 misses acting on a kick */
1354
Craig Tiller19196992016-06-27 18:45:56 -07001355 if (!g_initialized_sigmask) {
1356 sigemptyset(&new_mask);
1357 sigaddset(&new_mask, grpc_wakeup_signal);
1358 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1359 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1360 g_initialized_sigmask = true;
1361 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1362 This is the mask used at all times *except during
1363 epoll_wait()*"
1364 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001365 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001366
Craig Tiller19196992016-06-27 18:45:56 -07001367 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001368 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001369 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001370
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001371 push_front_worker(pollset, &worker); /* Add worker to pollset */
1372
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001373 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1374 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001375 grpc_exec_ctx_flush(exec_ctx);
1376
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001377 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001378
1379 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1380 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001381 remove_worker(pollset, &worker);
1382 }
1383
1384 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1385 false at this point) and the pollset is shutting down, we may have to
1386 finish the shutdown process by calling finish_shutdown_locked().
1387 See pollset_shutdown() for more details.
1388
1389 Note: Continuing to access pollset here is safe; it is the caller's
1390 responsibility to not destroy a pollset when it has outstanding calls to
1391 pollset_work() */
1392 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1393 !pollset->finish_shutdown_called) {
1394 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1395 finish_shutdown_locked(exec_ctx, pollset);
1396
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001397 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001398 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001399 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001400 }
1401
Craig Tiller557c88c2017-04-05 17:20:18 -07001402 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001403
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001404 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1405 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001406
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001407 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001408
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001409 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1410 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001411}
1412
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001413static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001414 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001415 poll_obj_type item_type) {
1416 GPR_TIMER_BEGIN("add_poll_object", 0);
1417
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001418#ifdef PO_DEBUG
1419 GPR_ASSERT(item->obj_type == item_type);
1420 GPR_ASSERT(bag->obj_type == bag_type);
1421#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001422
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001423 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001424 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001425
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001426 gpr_mu_lock(&bag->mu);
1427 gpr_mu_lock(&item->mu);
1428
Craig Tiller7212c232016-07-06 13:11:09 -07001429retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001430 /*
1431 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1432 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1433 * a refcount of 2) and point item->pi and bag->pi to the new island
1434 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1435 * the other's non-NULL pi
1436 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1437 * polling islands and update item->pi and bag->pi to point to the new
1438 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001439 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001440
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001441 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1442 * orphaned */
1443 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1444 gpr_mu_unlock(&item->mu);
1445 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001446 return;
1447 }
1448
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001449 if (item->pi == bag->pi) {
1450 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001451 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001452 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001453
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001454 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1455 * we need to do some extra work to make TSAN happy */
1456 if (item_type == POLL_OBJ_FD) {
1457 /* Unlock before creating a new polling island: the polling island will
1458 create a workqueue which creates a file descriptor, and holding an fd
1459 lock here can eventually cause a loop to appear to TSAN (making it
1460 unhappy). We don't think it's a real loop (there's an epoch point
1461 where that loop possibility disappears), but the advantages of
1462 keeping TSAN happy outweigh any performance advantage we might have
1463 by keeping the lock held. */
1464 gpr_mu_unlock(&item->mu);
1465 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1466 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001467
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001468 /* Need to reverify any assumptions made between the initial lock and
1469 getting to this branch: if they've changed, we need to throw away our
1470 work and figure things out again. */
1471 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001472 GRPC_POLLING_TRACE(
1473 "add_poll_object: Raced creating new polling island. pi_new: %p "
1474 "(fd: %d, %s: %p)",
1475 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1476 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001477 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001478 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001479 polling_island_remove_all_fds_locked(pi_new, true, &error);
1480
1481 /* Ref and unref so that the polling island gets deleted during unref
1482 */
1483 PI_ADD_REF(pi_new, "dance_of_destruction");
1484 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1485 goto retry;
1486 }
Craig Tiller27da6422016-07-06 13:14:46 -07001487 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001488 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001489 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001490
1491 GRPC_POLLING_TRACE(
1492 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1493 "%s: %p)",
1494 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1495 poll_obj_string(bag_type), (void *)bag);
1496 } else {
1497 GRPC_POLLING_TRACE(
1498 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1499 (void *)pi_new, poll_obj_string(item_type),
1500 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001501 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001502 } else if (item->pi == NULL) {
1503 /* GPR_ASSERT(bag->pi != NULL) */
1504 /* Make pi_new point to latest pi*/
1505 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001506
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001507 if (item_type == POLL_OBJ_FD) {
1508 grpc_fd *fd = FD_FROM_PO(item);
1509 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1510 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001511
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001512 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001513 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001514 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1515 "bag(%s): %p)",
1516 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1517 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001518 } else if (bag->pi == NULL) {
1519 /* GPR_ASSERT(item->pi != NULL) */
1520 /* Make pi_new to point to latest pi */
1521 pi_new = polling_island_lock(item->pi);
1522 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001523 GRPC_POLLING_TRACE(
1524 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1525 "bag(%s): %p)",
1526 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1527 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001528 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001529 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001530 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001531 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1532 "bag(%s): %p)",
1533 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1534 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001535 }
1536
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001537 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1538 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001539
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001540 if (item->pi != pi_new) {
1541 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1542 if (item->pi != NULL) {
1543 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001544 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001545 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001546 }
1547
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001548 if (bag->pi != pi_new) {
1549 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1550 if (bag->pi != NULL) {
1551 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001552 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001553 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001554 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001555
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001556 gpr_mu_unlock(&item->mu);
1557 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001558
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001559 GRPC_LOG_IF_ERROR("add_poll_object", error);
1560 GPR_TIMER_END("add_poll_object", 0);
1561}
Craig Tiller57726ca2016-09-12 11:59:45 -07001562
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001563static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1564 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001565 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001566 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001567}
1568
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001569/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001570 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001571 */
1572
1573static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001574 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001575 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001576 pss->po.pi = NULL;
1577#ifdef PO_DEBUG
1578 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1579#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001580 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001581}
1582
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001583static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1584 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001585 gpr_mu_destroy(&pss->po.mu);
1586
1587 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001588 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001589 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001590
1591 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001592}
1593
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001594static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1595 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001596 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001597 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001598}
1599
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001600static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1601 grpc_fd *fd) {
1602 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001603}
1604
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001605static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001606 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001607 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001608 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001609}
1610
1611static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001612 grpc_pollset_set *pss, grpc_pollset *ps) {
1613 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001614}
1615
1616static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1617 grpc_pollset_set *bag,
1618 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001619 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001620 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001621}
1622
1623static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1624 grpc_pollset_set *bag,
1625 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001626 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001627}
1628
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001629/* Test helper functions
1630 * */
1631void *grpc_fd_get_polling_island(grpc_fd *fd) {
1632 polling_island *pi;
1633
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001634 gpr_mu_lock(&fd->po.mu);
1635 pi = fd->po.pi;
1636 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001637
1638 return pi;
1639}
1640
1641void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1642 polling_island *pi;
1643
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001644 gpr_mu_lock(&ps->po.mu);
1645 pi = ps->po.pi;
1646 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001647
1648 return pi;
1649}
1650
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001651bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001652 polling_island *p1 = p;
1653 polling_island *p2 = q;
1654
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001655 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1656 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001657 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001658 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001659
1660 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001661}
1662
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001663/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001664 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001665 */
1666
1667static void shutdown_engine(void) {
1668 fd_global_shutdown();
1669 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001670 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001671}
1672
1673static const grpc_event_engine_vtable vtable = {
1674 .pollset_size = sizeof(grpc_pollset),
1675
1676 .fd_create = fd_create,
1677 .fd_wrapped_fd = fd_wrapped_fd,
1678 .fd_orphan = fd_orphan,
1679 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001680 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001681 .fd_notify_on_read = fd_notify_on_read,
1682 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001683 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001684
1685 .pollset_init = pollset_init,
1686 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001687 .pollset_destroy = pollset_destroy,
1688 .pollset_work = pollset_work,
1689 .pollset_kick = pollset_kick,
1690 .pollset_add_fd = pollset_add_fd,
1691
1692 .pollset_set_create = pollset_set_create,
1693 .pollset_set_destroy = pollset_set_destroy,
1694 .pollset_set_add_pollset = pollset_set_add_pollset,
1695 .pollset_set_del_pollset = pollset_set_del_pollset,
1696 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1697 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1698 .pollset_set_add_fd = pollset_set_add_fd,
1699 .pollset_set_del_fd = pollset_set_del_fd,
1700
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001701 .shutdown_engine = shutdown_engine,
1702};
1703
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001704/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1705 * Create a dummy epoll_fd to make sure epoll support is available */
1706static bool is_epoll_available() {
1707 int fd = epoll_create1(EPOLL_CLOEXEC);
1708 if (fd < 0) {
1709 gpr_log(
1710 GPR_ERROR,
1711 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1712 fd);
1713 return false;
1714 }
1715 close(fd);
1716 return true;
1717}
1718
Craig Tillerf8382b82017-04-27 15:09:48 -07001719const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1720 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001721 /* If use of signals is disabled, we cannot use epoll engine*/
1722 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1723 return NULL;
1724 }
1725
Ken Paysoncd7d0472016-10-11 12:24:20 -07001726 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001727 return NULL;
1728 }
1729
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001730 if (!is_epoll_available()) {
1731 return NULL;
1732 }
1733
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001734 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001735 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1736 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001737 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001738 grpc_use_signal(SIGRTMIN + 6);
1739 } else {
1740 return NULL;
1741 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001742 }
1743
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001744 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001745
1746 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1747 return NULL;
1748 }
1749
1750 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1751 polling_island_global_init())) {
1752 return NULL;
1753 }
1754
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001755 return &vtable;
1756}
1757
murgatroid99623dd4f2016-08-08 17:31:27 -07001758#else /* defined(GRPC_LINUX_EPOLL) */
1759#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001760#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001761/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001762 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001763const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1764 bool explicit_request) {
1765 return NULL;
1766}
murgatroid99623dd4f2016-08-08 17:31:27 -07001767#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001768
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001769void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001770#endif /* !defined(GRPC_LINUX_EPOLL) */