blob: 0d969dccce967bd9f879e76e845c44cd884de8a6 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070021/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070022#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070023
Craig Tiller4509c472017-04-27 19:05:13 +000024#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
26#include <assert.h>
27#include <errno.h>
28#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070029#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030#include <signal.h>
31#include <string.h>
32#include <sys/epoll.h>
33#include <sys/socket.h>
34#include <unistd.h>
35
36#include <grpc/support/alloc.h>
37#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
42#include "src/core/lib/iomgr/ev_posix.h"
43#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070044#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070045#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070046#include "src/core/lib/iomgr/wakeup_fd_posix.h"
47#include "src/core/lib/profiling/timers.h"
48#include "src/core/lib/support/block_annotate.h"
49
Craig Tillere24b24d2017-04-06 16:05:45 -070050#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
51
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070052#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070053 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070054 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070055 }
56
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070057static int grpc_wakeup_signal = -1;
58static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070059
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070060/* Implements the function defined in grpc_posix.h. This function might be
61 * called before even calling grpc_init() to set either a different signal to
62 * use. If signum == -1, then the use of signals is disabled */
63void grpc_use_signal(int signum) {
64 grpc_wakeup_signal = signum;
65 is_grpc_wakeup_signal_initialized = true;
66
67 if (grpc_wakeup_signal < 0) {
68 gpr_log(GPR_INFO,
69 "Use of signals is disabled. Epoll engine will not be used");
70 } else {
71 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
72 grpc_wakeup_signal);
73 }
74}
75
76struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070077
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080078typedef enum {
79 POLL_OBJ_FD,
80 POLL_OBJ_POLLSET,
81 POLL_OBJ_POLLSET_SET
82} poll_obj_type;
83
84typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040085#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080086 poll_obj_type obj_type;
87#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080088 gpr_mu mu;
89 struct polling_island *pi;
90} poll_obj;
91
92const char *poll_obj_string(poll_obj_type po_type) {
93 switch (po_type) {
94 case POLL_OBJ_FD:
95 return "fd";
96 case POLL_OBJ_POLLSET:
97 return "pollset";
98 case POLL_OBJ_POLLSET_SET:
99 return "pollset_set";
100 }
101
102 GPR_UNREACHABLE_CODE(return "UNKNOWN");
103}
104
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700105/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700106 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700107 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800108
109#define FD_FROM_PO(po) ((grpc_fd *)(po))
110
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700111struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800112 poll_obj po;
113
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700114 int fd;
115 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700116 bit 0 : 1=Active / 0=Orphaned
117 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700118 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700119 gpr_atm refst;
120
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800121 /* The fd is either closed or we relinquished control of it. In either
122 cases, this indicates that the 'fd' on this structure is no longer
123 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700124 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700125
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800126 gpr_atm read_closure;
127 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700128
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700129 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700130 grpc_closure *on_done_closure;
131
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800132 /* The pollset that last noticed that the fd is readable. The actual type
133 * stored in this is (grpc_pollset *) */
134 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700135
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700136 grpc_iomgr_object iomgr_object;
137};
138
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700139/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700140#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700141static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
142static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
143 int line);
144#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
145#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
146#else
147static void fd_ref(grpc_fd *fd);
148static void fd_unref(grpc_fd *fd);
149#define GRPC_FD_REF(fd, reason) fd_ref(fd)
150#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
151#endif
152
153static void fd_global_init(void);
154static void fd_global_shutdown(void);
155
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700156/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700157 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700158 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700159
ncteisena1354852017-06-08 16:25:53 -0700160#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700162#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700163#define PI_UNREF(exec_ctx, p, r) \
164 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
ncteisend39010e2017-06-08 17:08:07 -0700166#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700167
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700168#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700169#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700170
ncteisena1354852017-06-08 16:25:53 -0700171#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700172
Craig Tiller460502e2016-10-13 10:02:08 -0700173/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700174typedef struct polling_island {
175 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
177 the refcount.
178 Once the ref count becomes zero, this structure is destroyed which means
179 we should ensure that there is never a scenario where a PI_ADD_REF() is
180 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700181 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700182
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700183 /* Pointer to the polling_island this merged into.
184 * merged_to value is only set once in polling_island's lifetime (and that too
185 * only if the island is merged with another island). Because of this, we can
186 * use gpr_atm type here so that we can do atomic access on this and reduce
187 * lock contention on 'mu' mutex.
188 *
189 * Note that if this field is not NULL (i.e not 0), all the remaining fields
190 * (except mu and ref_count) are invalid and must be ignored. */
191 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700192
Craig Tiller460502e2016-10-13 10:02:08 -0700193 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700194 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700195
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196 /* The fd of the underlying epoll set */
197 int epoll_fd;
198
199 /* The file descriptors in the epoll set */
200 size_t fd_cnt;
201 size_t fd_capacity;
202 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700203} polling_island;
204
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700205/*******************************************************************************
206 * Pollset Declarations
207 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700208struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700209 /* Thread id of this worker */
210 pthread_t pt_id;
211
212 /* Used to prevent a worker from getting kicked multiple times */
213 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700214 struct grpc_pollset_worker *next;
215 struct grpc_pollset_worker *prev;
216};
217
218struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800219 poll_obj po;
220
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700221 grpc_pollset_worker root_worker;
222 bool kicked_without_pollers;
223
224 bool shutting_down; /* Is the pollset shutting down ? */
225 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
226 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700227};
228
229/*******************************************************************************
230 * Pollset-set Declarations
231 */
232struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800233 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700234};
235
236/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700237 * Common helpers
238 */
239
Craig Tillerf975f742016-07-01 14:56:27 -0700240static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700242 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700243 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700244 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700245 }
246 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700247 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700248}
249
250/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700251 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700252 */
253
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700254/* The wakeup fd that is used to wake up all threads in a Polling island. This
255 is useful in the polling island merge operation where we need to wakeup all
256 the threads currently polling the smaller polling island (so that they can
257 start polling the new/merged polling island)
258
259 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
260 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
261static grpc_wakeup_fd polling_island_wakeup_fd;
262
Craig Tiller2e620132016-10-10 15:27:44 -0700263/* The polling island being polled right now.
264 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700265static __thread polling_island *g_current_thread_polling_island;
266
Craig Tillerb39307d2016-06-30 15:39:13 -0700267/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700268static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700269
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700270#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700271/* Currently TSAN may incorrectly flag data races between epoll_ctl and
272 epoll_wait for any grpc_fd structs that are added to the epoll set via
273 epoll_ctl and are returned (within a very short window) via epoll_wait().
274
275 To work-around this race, we establish a happens-before relation between
276 the code just-before epoll_ctl() and the code after epoll_wait() by using
277 this atomic */
278gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700279#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700280
Craig Tillerb39307d2016-06-30 15:39:13 -0700281static void pi_add_ref(polling_island *pi);
282static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700283
ncteisena1354852017-06-08 16:25:53 -0700284#ifndef NDEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700285static void pi_add_ref_dbg(polling_island *pi, const char *reason,
286 const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700287 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700288 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
289 gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
290 " (%s) - (%s, %d)",
291 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700292 }
293 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700294}
295
Craig Tillerb39307d2016-06-30 15:39:13 -0700296static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700297 const char *reason, const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700298 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700299 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
300 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
301 " (%s) - (%s, %d)",
302 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700303 }
304 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700305}
306#endif
307
Craig Tiller15007612016-07-06 09:36:16 -0700308static void pi_add_ref(polling_island *pi) {
309 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
310}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700311
Craig Tillerb39307d2016-06-30 15:39:13 -0700312static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700313 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700314 Note that this deletion not be done under a lock. Once the ref count goes
315 to zero, we are guaranteed that no one else holds a reference to the
316 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700317
318 Also, if we are deleting the polling island and the merged_to field is
319 non-empty, we should remove a ref to the merged_to polling island
320 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700321 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
322 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
323 polling_island_delete(exec_ctx, pi);
324 if (next != NULL) {
325 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700326 }
327 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700328}
329
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700330/* The caller is expected to hold pi->mu lock before calling this function */
331static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700332 size_t fd_count, bool add_fd_refs,
333 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700334 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700335 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700336 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700337 char *err_msg;
338 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700339
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700340#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700341 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700342 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700343#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700344
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700345 for (i = 0; i < fd_count; i++) {
346 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
347 ev.data.ptr = fds[i];
348 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700349
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700350 if (err < 0) {
351 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700352 gpr_asprintf(
353 &err_msg,
354 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
355 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
356 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
357 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700358 }
359
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700360 continue;
361 }
362
363 if (pi->fd_cnt == pi->fd_capacity) {
364 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
365 pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
366 }
367
368 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700369 if (add_fd_refs) {
370 GRPC_FD_REF(fds[i], "polling_island");
371 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700372 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700373}
374
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700375/* The caller is expected to hold pi->mu before calling this */
376static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700377 grpc_wakeup_fd *wakeup_fd,
378 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700379 struct epoll_event ev;
380 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700381 char *err_msg;
382 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700383
384 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
385 ev.data.ptr = wakeup_fd;
386 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
387 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700388 if (err < 0 && errno != EEXIST) {
389 gpr_asprintf(&err_msg,
390 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
391 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700392 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
393 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700394 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
395 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700396 }
397}
398
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700399/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700400static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700401 bool remove_fd_refs,
402 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700403 int err;
404 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700405 char *err_msg;
406 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700407
408 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700409 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700410 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700411 gpr_asprintf(&err_msg,
412 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
413 "error: %d (%s)",
414 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
415 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
416 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700417 }
418
419 if (remove_fd_refs) {
420 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700421 }
422 }
423
424 pi->fd_cnt = 0;
425}
426
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700427/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700428static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700429 bool is_fd_closed,
430 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700431 int err;
432 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700433 char *err_msg;
434 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700435
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700436 /* If fd is already closed, then it would have been automatically been removed
437 from the epoll set */
438 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700439 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
440 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700441 gpr_asprintf(
442 &err_msg,
443 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
444 pi->epoll_fd, fd->fd, errno, strerror(errno));
445 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
446 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700447 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700448 }
449
450 for (i = 0; i < pi->fd_cnt; i++) {
451 if (pi->fds[i] == fd) {
452 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700453 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700454 break;
455 }
456 }
457}
458
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700459/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700460static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
461 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700462 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700463 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700464 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700465
Craig Tillerb39307d2016-06-30 15:39:13 -0700466 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700467
Craig Tillerb39307d2016-06-30 15:39:13 -0700468 pi = gpr_malloc(sizeof(*pi));
469 gpr_mu_init(&pi->mu);
470 pi->fd_cnt = 0;
471 pi->fd_capacity = 0;
472 pi->fds = NULL;
473 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700474
Craig Tiller15007612016-07-06 09:36:16 -0700475 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700476 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700477 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700478
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700479 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700480
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700481 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700482 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
483 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700484 }
485
Craig Tillerb39307d2016-06-30 15:39:13 -0700486 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700487 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700488 }
489
Craig Tillerb39307d2016-06-30 15:39:13 -0700490done:
491 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700492 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700493 pi = NULL;
494 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700495 return pi;
496}
497
Craig Tillerb39307d2016-06-30 15:39:13 -0700498static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700499 GPR_ASSERT(pi->fd_cnt == 0);
500
Craig Tiller0a06cd72016-07-14 13:21:24 -0700501 if (pi->epoll_fd >= 0) {
502 close(pi->epoll_fd);
503 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700504 gpr_mu_destroy(&pi->mu);
505 gpr_free(pi->fds);
506 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700507}
508
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700509/* Attempts to gets the last polling island in the linked list (liked by the
510 * 'merged_to' field). Since this does not lock the polling island, there are no
511 * guarantees that the island returned is the last island */
512static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
513 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
514 while (next != NULL) {
515 pi = next;
516 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
517 }
518
519 return pi;
520}
521
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700522/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700523 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700524 returned polling island's mu.
525 Usage: To lock/unlock polling island "pi", do the following:
526 polling_island *pi_latest = polling_island_lock(pi);
527 ...
528 ... critical section ..
529 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700530 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
531static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700532 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700533
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700534 while (true) {
535 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
536 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700537 /* Looks like 'pi' is the last node in the linked list but unless we check
538 this by holding the pi->mu lock, we cannot be sure (i.e without the
539 pi->mu lock, we don't prevent island merges).
540 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700541 gpr_mu_lock(&pi->mu);
542 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
543 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700544 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700545 break;
546 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700547
548 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
549 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700550 gpr_mu_unlock(&pi->mu);
551 }
552
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700553 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700554 }
555
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700556 return pi;
557}
558
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700559/* Gets the lock on the *latest* polling islands in the linked lists pointed by
560 *p and *q (and also updates *p and *q to point to the latest polling islands)
561
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700562 This function is needed because calling the following block of code to obtain
563 locks on polling islands (*p and *q) is prone to deadlocks.
564 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700565 polling_island_lock(*p, true);
566 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700567 }
568
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700569 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700570 polling_island *p1;
571 polling_island *p2;
572 ..
573 polling_island_lock_pair(&p1, &p2);
574 ..
575 .. Critical section with both p1 and p2 locked
576 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700577 // Release locks: Always call polling_island_unlock_pair() to release locks
578 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700579*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700580static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700581 polling_island *pi_1 = *p;
582 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700583 polling_island *next_1 = NULL;
584 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700585
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700586 /* The algorithm is simple:
587 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
588 keep updating pi_1 and pi_2)
589 - Then obtain locks on the islands by following a lock order rule of
590 locking polling_island with lower address first
591 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
592 pointing to the same island. If that is the case, we can just call
593 polling_island_lock()
594 - After obtaining both the locks, double check that the polling islands
595 are still the last polling islands in their respective linked lists
596 (this is because there might have been polling island merges before
597 we got the lock)
598 - If the polling islands are the last islands, we are done. If not,
599 release the locks and continue the process from the first step */
600 while (true) {
601 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
602 while (next_1 != NULL) {
603 pi_1 = next_1;
604 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700605 }
606
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700607 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
608 while (next_2 != NULL) {
609 pi_2 = next_2;
610 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
611 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700612
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700613 if (pi_1 == pi_2) {
614 pi_1 = pi_2 = polling_island_lock(pi_1);
615 break;
616 }
617
618 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700619 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700620 gpr_mu_lock(&pi_2->mu);
621 } else {
622 gpr_mu_lock(&pi_2->mu);
623 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700624 }
625
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700626 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
627 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
628 if (next_1 == NULL && next_2 == NULL) {
629 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700630 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700631
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700632 gpr_mu_unlock(&pi_1->mu);
633 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700634 }
635
636 *p = pi_1;
637 *q = pi_2;
638}
639
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700640static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
641 if (p == q) {
642 gpr_mu_unlock(&p->mu);
643 } else {
644 gpr_mu_unlock(&p->mu);
645 gpr_mu_unlock(&q->mu);
646 }
647}
648
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700649static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700650 polling_island *q,
651 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700652 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700653 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700654
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700655 if (p != q) {
656 /* Make sure that p points to the polling island with fewer fds than q */
657 if (p->fd_cnt > q->fd_cnt) {
658 GPR_SWAP(polling_island *, p, q);
659 }
660
661 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
662 Note that the refcounts on the fds being moved will not change here.
663 This is why the last param in the following two functions is 'false') */
664 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
665 polling_island_remove_all_fds_locked(p, false, error);
666
667 /* Wakeup all the pollers (if any) on p so that they pickup this change */
668 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
669
670 /* Add the 'merged_to' link from p --> q */
671 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
672 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700673 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700674 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700675
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700676 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700677
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700678 /* Return the merged polling island (Note that no merge would have happened
679 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700680 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700681}
682
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700683static grpc_error *polling_island_global_init() {
684 grpc_error *error = GRPC_ERROR_NONE;
685
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700686 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
687 if (error == GRPC_ERROR_NONE) {
688 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
689 }
690
691 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700692}
693
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700694static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700695 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700696}
697
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700698/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700699 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700700 */
701
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700702/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700703 * but instead so that implementations with multiple threads in (for example)
704 * epoll_wait deal with the race between pollset removal and incoming poll
705 * notifications.
706 *
707 * The problem is that the poller ultimately holds a reference to this
708 * object, so it is very difficult to know when is safe to free it, at least
709 * without some expensive synchronization.
710 *
711 * If we keep the object freelisted, in the worst case losing this race just
712 * becomes a spurious read notification on a reused fd.
713 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700714
715/* The alarm system needs to be able to wakeup 'some poller' sometimes
716 * (specifically when a new alarm needs to be triggered earlier than the next
717 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
718 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700719
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700720static grpc_fd *fd_freelist = NULL;
721static gpr_mu fd_freelist_mu;
722
ncteisend39010e2017-06-08 17:08:07 -0700723#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700724#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
725#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
726static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
727 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700728 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700729 gpr_log(GPR_DEBUG,
730 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700731 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700732 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
733 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700734#else
735#define REF_BY(fd, n, reason) ref_by(fd, n)
736#define UNREF_BY(fd, n, reason) unref_by(fd, n)
737static void ref_by(grpc_fd *fd, int n) {
738#endif
739 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
740}
741
ncteisend39010e2017-06-08 17:08:07 -0700742#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700743static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
744 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700745 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700746 gpr_log(GPR_DEBUG,
747 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700748 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700749 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
750 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700751#else
752static void unref_by(grpc_fd *fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700753#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700754 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700755 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700756 /* Add the fd to the freelist */
757 gpr_mu_lock(&fd_freelist_mu);
758 fd->freelist_next = fd_freelist;
759 fd_freelist = fd;
760 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800761
Craig Tiller376887d2017-04-06 08:27:03 -0700762 grpc_lfev_destroy(&fd->read_closure);
763 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700764
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700765 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700766 } else {
767 GPR_ASSERT(old > n);
768 }
769}
770
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700771/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700772#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700773static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
774 int line) {
775 ref_by(fd, 2, reason, file, line);
776}
777
778static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
779 int line) {
780 unref_by(fd, 2, reason, file, line);
781}
782#else
783static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700784static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
785#endif
786
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700787static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
788
789static void fd_global_shutdown(void) {
790 gpr_mu_lock(&fd_freelist_mu);
791 gpr_mu_unlock(&fd_freelist_mu);
792 while (fd_freelist != NULL) {
793 grpc_fd *fd = fd_freelist;
794 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800795 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700796 gpr_free(fd);
797 }
798 gpr_mu_destroy(&fd_freelist_mu);
799}
800
801static grpc_fd *fd_create(int fd, const char *name) {
802 grpc_fd *new_fd = NULL;
803
804 gpr_mu_lock(&fd_freelist_mu);
805 if (fd_freelist != NULL) {
806 new_fd = fd_freelist;
807 fd_freelist = fd_freelist->freelist_next;
808 }
809 gpr_mu_unlock(&fd_freelist_mu);
810
811 if (new_fd == NULL) {
812 new_fd = gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800813 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700814 }
815
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800816 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
817 * is a newly created fd (or an fd we got from the freelist), no one else
818 * would be holding a lock to it anyway. */
819 gpr_mu_lock(&new_fd->po.mu);
820 new_fd->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -0400821#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800822 new_fd->po.obj_type = POLL_OBJ_FD;
823#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700824
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700825 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700826 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700827 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700828 grpc_lfev_init(&new_fd->read_closure);
829 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800830 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800831
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700832 new_fd->freelist_next = NULL;
833 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700834
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800835 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700836
837 char *fd_name;
838 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
839 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700840 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841 return new_fd;
842}
843
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700844static int fd_wrapped_fd(grpc_fd *fd) {
845 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800846 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700847 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700848 ret_fd = fd->fd;
849 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800850 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700851
852 return ret_fd;
853}
854
855static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
856 grpc_closure *on_done, int *release_fd,
857 const char *reason) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700858 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700859 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700860
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800861 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700862 fd->on_done_closure = on_done;
863
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700864 /* Remove the active status but keep referenced. We want this grpc_fd struct
865 to be alive (and not added to freelist) until the end of this function */
866 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700867
868 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700869 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800870 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700871 would actually contain the fd
872 - Remove the fd from the latest polling island
873 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800874 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700875 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800876 if (fd->po.pi != NULL) {
877 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700878 polling_island_remove_fd_locked(pi_latest, fd, false /* is_fd_closed */,
879 &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700880 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700881
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800882 unref_pi = fd->po.pi;
883 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700884 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700885
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700886 /* If release_fd is not NULL, we should be relinquishing control of the file
887 descriptor fd->fd (but we still own the grpc_fd structure). */
888 if (release_fd != NULL) {
889 *release_fd = fd->fd;
890 } else {
891 close(fd->fd);
892 }
893
894 fd->orphaned = true;
895
ncteisen969b46e2017-06-08 14:57:11 -0700896 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700897
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800898 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700899 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700900 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700901 /* Unref stale polling island here, outside the fd lock above.
902 The polling island owns a workqueue which owns an fd, and unreffing
903 inside the lock can cause an eventual lock loop that makes TSAN very
904 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700905 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
906 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700907 if (error != GRPC_ERROR_NONE) {
908 const char *msg = grpc_error_string(error);
909 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
910 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700911 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700912}
913
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700914static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
915 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800916 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800917 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700918}
919
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700920static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700921 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700922}
923
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700924/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800925static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700926 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
927 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700928 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700929 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700930 }
Craig Tiller376887d2017-04-06 08:27:03 -0700931 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700932}
933
934static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
935 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700936 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700937}
938
939static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
940 grpc_closure *closure) {
Craig Tiller70652142017-04-06 08:31:23 -0700941 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700942}
943
944/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700945 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700946 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700947GPR_TLS_DECL(g_current_thread_pollset);
948GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700949static __thread bool g_initialized_sigmask;
950static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700951
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700952static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700953#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700954 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700955#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700956}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700957
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700958static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700959
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700960/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700961static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700962 gpr_tls_init(&g_current_thread_pollset);
963 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700964 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700965 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700966}
967
968static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700969 gpr_tls_destroy(&g_current_thread_pollset);
970 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700971}
972
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700973static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
974 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700975
976 /* Kick the worker only if it was not already kicked */
977 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
978 GRPC_POLLING_TRACE(
979 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700980 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700981 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
982 if (err_num != 0) {
983 err = GRPC_OS_ERROR(err_num, "pthread_kill");
984 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700985 }
986 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700987}
988
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700989/* Return 1 if the pollset has active threads in pollset_work (pollset must
990 * be locked) */
991static int pollset_has_workers(grpc_pollset *p) {
992 return p->root_worker.next != &p->root_worker;
993}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700994
995static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
996 worker->prev->next = worker->next;
997 worker->next->prev = worker->prev;
998}
999
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001000static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1001 if (pollset_has_workers(p)) {
1002 grpc_pollset_worker *w = p->root_worker.next;
1003 remove_worker(p, w);
1004 return w;
1005 } else {
1006 return NULL;
1007 }
1008}
1009
1010static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1011 worker->next = &p->root_worker;
1012 worker->prev = worker->next->prev;
1013 worker->prev->next = worker->next->prev = worker;
1014}
1015
1016static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1017 worker->prev = &p->root_worker;
1018 worker->next = worker->prev->next;
1019 worker->prev->next = worker->next->prev = worker;
1020}
1021
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001022/* p->mu must be held before calling this function */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001023static grpc_error *pollset_kick(grpc_pollset *p,
1024 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001025 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001026 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001027 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001028 grpc_pollset_worker *worker = specific_worker;
1029 if (worker != NULL) {
1030 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001031 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001032 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001033 for (worker = p->root_worker.next; worker != &p->root_worker;
1034 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001035 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001036 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001037 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001038 }
Craig Tillera218a062016-06-26 09:58:37 -07001039 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 } else {
1041 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001042 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001043 } else {
1044 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001045 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001046 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001047 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001048 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001049 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1050 /* Since worker == NULL, it means that we can kick "any" worker on this
1051 pollset 'p'. If 'p' happens to be the same pollset this thread is
1052 currently polling (i.e in pollset_work() function), then there is no need
1053 to kick any other worker since the current thread can just absorb the
1054 kick. This is the reason why we enter this case only when
1055 g_current_thread_pollset is != p */
1056
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001057 GPR_TIMER_MARK("kick_anonymous", 0);
1058 worker = pop_front_worker(p);
1059 if (worker != NULL) {
1060 GPR_TIMER_MARK("finally_kick", 0);
1061 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001062 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001063 } else {
1064 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001065 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001066 }
1067 }
1068
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001069 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001070 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1071 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001072}
1073
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001074static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001075 gpr_mu_init(&pollset->po.mu);
1076 *mu = &pollset->po.mu;
1077 pollset->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001078#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001079 pollset->po.obj_type = POLL_OBJ_POLLSET;
1080#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001081
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001082 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001083 pollset->kicked_without_pollers = false;
1084
1085 pollset->shutting_down = false;
1086 pollset->finish_shutdown_called = false;
1087 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001088}
1089
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090/* Convert a timespec to milliseconds:
1091 - Very small or negative poll times are clamped to zero to do a non-blocking
1092 poll (which becomes spin polling)
1093 - Other small values are rounded up to one millisecond
1094 - Longer than a millisecond polls are rounded up to the next nearest
1095 millisecond to avoid spinning
1096 - Infinite timeouts are converted to -1 */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001097static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
1098 gpr_timespec now) {
1099 gpr_timespec timeout;
1100 static const int64_t max_spin_polling_us = 10;
1101 if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
1102 return -1;
1103 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001104
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001105 if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
1106 max_spin_polling_us,
1107 GPR_TIMESPAN))) <= 0) {
1108 return 0;
1109 }
1110 timeout = gpr_time_sub(deadline, now);
Craig Tiller799e7e82017-03-27 12:42:34 -07001111 int millis = gpr_time_to_millis(gpr_time_add(
1112 timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
1113 return millis >= 1 ? millis : 1;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001114}
1115
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001116static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1117 grpc_pollset *notifier) {
Craig Tiller70652142017-04-06 08:31:23 -07001118 grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001119
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001120 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001121 different 'notifier's when an fd becomes readable and it is in two epoll
1122 sets (This can happen briefly during polling island merges). In such cases
1123 it does not really matter which notifer is set as the read_notifier_pollset
1124 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001125 /* Use release store to match with acquire load in fd_get_read_notifier */
1126 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001127}
1128
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001129static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tillere16372b2017-04-06 08:51:39 -07001130 grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001131}
1132
Craig Tillerb39307d2016-06-30 15:39:13 -07001133static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
1134 grpc_pollset *ps, char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001135 if (ps->po.pi != NULL) {
1136 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001137 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001138 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001139}
1140
1141static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1142 grpc_pollset *pollset) {
1143 /* The pollset cannot have any workers if we are at this stage */
1144 GPR_ASSERT(!pollset_has_workers(pollset));
1145
1146 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001147
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001148 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001149 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001150 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001151}
1152
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001153/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001154static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1155 grpc_closure *closure) {
1156 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1157 GPR_ASSERT(!pollset->shutting_down);
1158 pollset->shutting_down = true;
1159 pollset->shutdown_done = closure;
1160 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
1161
1162 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1163 because it would release the underlying polling island. In such a case, we
1164 let the last worker call finish_shutdown_locked() from pollset_work() */
1165 if (!pollset_has_workers(pollset)) {
1166 GPR_ASSERT(!pollset->finish_shutdown_called);
1167 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1168 finish_shutdown_locked(exec_ctx, pollset);
1169 }
1170 GPR_TIMER_END("pollset_shutdown", 0);
1171}
1172
1173/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1174 * than destroying the mutexes, there is nothing special that needs to be done
1175 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001176static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001177 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001178 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001179}
1180
Craig Tiller84ea3412016-09-08 14:57:56 -07001181#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001182/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1183static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001184 grpc_pollset *pollset,
1185 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001186 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001187 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001188 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001189 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001190 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001191 char *err_msg;
1192 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001193 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1194
1195 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001196 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001197
1198 Since epoll_fd is immutable, we can read it without obtaining the polling
1199 island lock. There is however a possibility that the polling island (from
1200 which we got the epoll_fd) got merged with another island while we are
1201 in this function. This is still okay because in such a case, we will wakeup
1202 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001203 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001204
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001205 if (pollset->po.pi == NULL) {
1206 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1207 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001208 GPR_TIMER_END("pollset_work_and_unlock", 0);
1209 return; /* Fatal error. We cannot continue */
1210 }
1211
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001212 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001213 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001214 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001215 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001216
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001217 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001218 epoll_fd = pi->epoll_fd;
1219
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001220 /* Update the pollset->po.pi since the island being pointed by
1221 pollset->po.pi maybe older than the one pointed by pi) */
1222 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001223 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1224 polling island to be deleted */
1225 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001226 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1227 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001228 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001229
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001230 /* Add an extra ref so that the island does not get destroyed (which means
1231 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1232 epoll_fd */
1233 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001234 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001235
Craig Tiller61f96c12017-05-12 13:36:39 -07001236 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1237 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001238
Craig Tiller61f96c12017-05-12 13:36:39 -07001239 GRPC_SCHEDULING_START_BLOCKING_REGION;
1240 ep_rv =
1241 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
1242 GRPC_SCHEDULING_END_BLOCKING_REGION;
1243 if (ep_rv < 0) {
1244 if (errno != EINTR) {
1245 gpr_asprintf(&err_msg,
1246 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1247 epoll_fd, errno, strerror(errno));
1248 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1249 } else {
1250 /* We were interrupted. Save an interation by doing a zero timeout
1251 epoll_wait to see if there are any other events of interest */
1252 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1253 (void *)pollset, (void *)worker);
1254 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001255 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001256 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001257
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001258#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001259 /* See the definition of g_poll_sync for more details */
1260 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001261#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001262
Craig Tiller61f96c12017-05-12 13:36:39 -07001263 for (int i = 0; i < ep_rv; ++i) {
1264 void *data_ptr = ep_ev[i].data.ptr;
1265 if (data_ptr == &polling_island_wakeup_fd) {
1266 GRPC_POLLING_TRACE(
1267 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1268 "%d) got merged",
1269 (void *)pollset, (void *)worker, epoll_fd);
1270 /* This means that our polling island is merged with a different
1271 island. We do not have to do anything here since the subsequent call
1272 to the function pollset_work_and_unlock() will pick up the correct
1273 epoll_fd */
1274 } else {
1275 grpc_fd *fd = data_ptr;
1276 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1277 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1278 int write_ev = ep_ev[i].events & EPOLLOUT;
1279 if (read_ev || cancel) {
1280 fd_become_readable(exec_ctx, fd, pollset);
1281 }
1282 if (write_ev || cancel) {
1283 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001284 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001285 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001286 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001287
Craig Tiller61f96c12017-05-12 13:36:39 -07001288 g_current_thread_polling_island = NULL;
1289 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1290
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001291 GPR_ASSERT(pi != NULL);
1292
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001293 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001294 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001295 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001296 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001297 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001298 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001299
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001300 GPR_TIMER_END("pollset_work_and_unlock", 0);
1301}
1302
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001303/* pollset->po.mu lock must be held by the caller before calling this.
1304 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001305 during the course of its execution but it will always re-acquire the lock and
1306 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001307static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1308 grpc_pollset_worker **worker_hdl,
1309 gpr_timespec now, gpr_timespec deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001310 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001311 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001312 int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
1313
1314 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001315
1316 grpc_pollset_worker worker;
1317 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001318 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001319 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001320
Craig Tiller557c88c2017-04-05 17:20:18 -07001321 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001322
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001323 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1324 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001325
1326 if (pollset->kicked_without_pollers) {
1327 /* If the pollset was kicked without pollers, pretend that the current
1328 worker got the kick and skip polling. A kick indicates that there is some
1329 work that needs attention like an event on the completion queue or an
1330 alarm */
1331 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1332 pollset->kicked_without_pollers = 0;
1333 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001334 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001335 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1336 worker that there is some pending work that needs immediate attention
1337 (like an event on the completion queue, or a polling island merge that
1338 results in a new epoll-fd to wait on) and that the worker should not
1339 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001340
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001341 A worker can be kicked anytime from the point it is added to the pollset
1342 via push_front_worker() (or push_back_worker()) to the point it is
1343 removed via remove_worker().
1344 If the worker is kicked before/during it calls epoll_pwait(), it should
1345 immediately exit from epoll_wait(). If the worker is kicked after it
1346 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001347
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001348 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001349 times *except* when it is in epoll_pwait(). This way, the worker never
1350 misses acting on a kick */
1351
Craig Tiller19196992016-06-27 18:45:56 -07001352 if (!g_initialized_sigmask) {
1353 sigemptyset(&new_mask);
1354 sigaddset(&new_mask, grpc_wakeup_signal);
1355 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1356 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1357 g_initialized_sigmask = true;
1358 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1359 This is the mask used at all times *except during
1360 epoll_wait()*"
1361 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001362 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001363
Craig Tiller19196992016-06-27 18:45:56 -07001364 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001365 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001366 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001367
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001368 push_front_worker(pollset, &worker); /* Add worker to pollset */
1369
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001370 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1371 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001372 grpc_exec_ctx_flush(exec_ctx);
1373
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001374 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001375
1376 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1377 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001378 remove_worker(pollset, &worker);
1379 }
1380
1381 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1382 false at this point) and the pollset is shutting down, we may have to
1383 finish the shutdown process by calling finish_shutdown_locked().
1384 See pollset_shutdown() for more details.
1385
1386 Note: Continuing to access pollset here is safe; it is the caller's
1387 responsibility to not destroy a pollset when it has outstanding calls to
1388 pollset_work() */
1389 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1390 !pollset->finish_shutdown_called) {
1391 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1392 finish_shutdown_locked(exec_ctx, pollset);
1393
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001394 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001395 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001396 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001397 }
1398
Craig Tiller557c88c2017-04-05 17:20:18 -07001399 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001400
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001401 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1402 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001403
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001404 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001405
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001406 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1407 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001408}
1409
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001410static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001411 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001412 poll_obj_type item_type) {
1413 GPR_TIMER_BEGIN("add_poll_object", 0);
1414
ncteisene9cd8a82017-06-29 06:03:52 -04001415#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001416 GPR_ASSERT(item->obj_type == item_type);
1417 GPR_ASSERT(bag->obj_type == bag_type);
1418#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001419
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001420 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001421 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001422
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001423 gpr_mu_lock(&bag->mu);
1424 gpr_mu_lock(&item->mu);
1425
Craig Tiller7212c232016-07-06 13:11:09 -07001426retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001427 /*
1428 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1429 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1430 * a refcount of 2) and point item->pi and bag->pi to the new island
1431 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1432 * the other's non-NULL pi
1433 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1434 * polling islands and update item->pi and bag->pi to point to the new
1435 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001436 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001437
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001438 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1439 * orphaned */
1440 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1441 gpr_mu_unlock(&item->mu);
1442 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001443 return;
1444 }
1445
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001446 if (item->pi == bag->pi) {
1447 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001448 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001449 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001450
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001451 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1452 * we need to do some extra work to make TSAN happy */
1453 if (item_type == POLL_OBJ_FD) {
1454 /* Unlock before creating a new polling island: the polling island will
1455 create a workqueue which creates a file descriptor, and holding an fd
1456 lock here can eventually cause a loop to appear to TSAN (making it
1457 unhappy). We don't think it's a real loop (there's an epoch point
1458 where that loop possibility disappears), but the advantages of
1459 keeping TSAN happy outweigh any performance advantage we might have
1460 by keeping the lock held. */
1461 gpr_mu_unlock(&item->mu);
1462 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1463 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001464
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001465 /* Need to reverify any assumptions made between the initial lock and
1466 getting to this branch: if they've changed, we need to throw away our
1467 work and figure things out again. */
1468 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001469 GRPC_POLLING_TRACE(
1470 "add_poll_object: Raced creating new polling island. pi_new: %p "
1471 "(fd: %d, %s: %p)",
1472 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1473 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001474 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001475 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001476 polling_island_remove_all_fds_locked(pi_new, true, &error);
1477
1478 /* Ref and unref so that the polling island gets deleted during unref
1479 */
1480 PI_ADD_REF(pi_new, "dance_of_destruction");
1481 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1482 goto retry;
1483 }
Craig Tiller27da6422016-07-06 13:14:46 -07001484 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001485 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001486 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001487
1488 GRPC_POLLING_TRACE(
1489 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1490 "%s: %p)",
1491 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1492 poll_obj_string(bag_type), (void *)bag);
1493 } else {
1494 GRPC_POLLING_TRACE(
1495 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1496 (void *)pi_new, poll_obj_string(item_type),
1497 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001498 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001499 } else if (item->pi == NULL) {
1500 /* GPR_ASSERT(bag->pi != NULL) */
1501 /* Make pi_new point to latest pi*/
1502 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001503
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001504 if (item_type == POLL_OBJ_FD) {
1505 grpc_fd *fd = FD_FROM_PO(item);
1506 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1507 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001508
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001509 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001510 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001511 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1512 "bag(%s): %p)",
1513 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1514 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001515 } else if (bag->pi == NULL) {
1516 /* GPR_ASSERT(item->pi != NULL) */
1517 /* Make pi_new to point to latest pi */
1518 pi_new = polling_island_lock(item->pi);
1519 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001520 GRPC_POLLING_TRACE(
1521 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1522 "bag(%s): %p)",
1523 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1524 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001525 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001526 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001527 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001528 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1529 "bag(%s): %p)",
1530 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1531 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001532 }
1533
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001534 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1535 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001536
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001537 if (item->pi != pi_new) {
1538 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1539 if (item->pi != NULL) {
1540 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001541 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001542 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001543 }
1544
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001545 if (bag->pi != pi_new) {
1546 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1547 if (bag->pi != NULL) {
1548 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001549 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001550 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001551 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001552
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001553 gpr_mu_unlock(&item->mu);
1554 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001555
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001556 GRPC_LOG_IF_ERROR("add_poll_object", error);
1557 GPR_TIMER_END("add_poll_object", 0);
1558}
Craig Tiller57726ca2016-09-12 11:59:45 -07001559
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001560static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1561 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001562 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001563 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001564}
1565
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001566/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001567 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001568 */
1569
1570static grpc_pollset_set *pollset_set_create(void) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001571 grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001572 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001573 pss->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001574#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001575 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1576#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001577 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001578}
1579
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001580static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1581 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001582 gpr_mu_destroy(&pss->po.mu);
1583
1584 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001585 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001586 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001587
1588 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001589}
1590
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001591static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1592 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001593 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001594 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001595}
1596
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001597static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1598 grpc_fd *fd) {
1599 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001600}
1601
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001602static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001603 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001604 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001605 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001606}
1607
1608static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001609 grpc_pollset_set *pss, grpc_pollset *ps) {
1610 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001611}
1612
1613static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1614 grpc_pollset_set *bag,
1615 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001616 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001617 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001618}
1619
1620static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1621 grpc_pollset_set *bag,
1622 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001623 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001624}
1625
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001626/* Test helper functions
1627 * */
1628void *grpc_fd_get_polling_island(grpc_fd *fd) {
1629 polling_island *pi;
1630
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001631 gpr_mu_lock(&fd->po.mu);
1632 pi = fd->po.pi;
1633 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001634
1635 return pi;
1636}
1637
1638void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1639 polling_island *pi;
1640
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001641 gpr_mu_lock(&ps->po.mu);
1642 pi = ps->po.pi;
1643 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001644
1645 return pi;
1646}
1647
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001648bool grpc_are_polling_islands_equal(void *p, void *q) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001649 polling_island *p1 = p;
1650 polling_island *p2 = q;
1651
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001652 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1653 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001654 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001655 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001656
1657 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001658}
1659
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001660/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001661 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001662 */
1663
1664static void shutdown_engine(void) {
1665 fd_global_shutdown();
1666 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001667 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001668}
1669
1670static const grpc_event_engine_vtable vtable = {
1671 .pollset_size = sizeof(grpc_pollset),
1672
1673 .fd_create = fd_create,
1674 .fd_wrapped_fd = fd_wrapped_fd,
1675 .fd_orphan = fd_orphan,
1676 .fd_shutdown = fd_shutdown,
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -07001677 .fd_is_shutdown = fd_is_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001678 .fd_notify_on_read = fd_notify_on_read,
1679 .fd_notify_on_write = fd_notify_on_write,
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001680 .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001681
1682 .pollset_init = pollset_init,
1683 .pollset_shutdown = pollset_shutdown,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001684 .pollset_destroy = pollset_destroy,
1685 .pollset_work = pollset_work,
1686 .pollset_kick = pollset_kick,
1687 .pollset_add_fd = pollset_add_fd,
1688
1689 .pollset_set_create = pollset_set_create,
1690 .pollset_set_destroy = pollset_set_destroy,
1691 .pollset_set_add_pollset = pollset_set_add_pollset,
1692 .pollset_set_del_pollset = pollset_set_del_pollset,
1693 .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
1694 .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
1695 .pollset_set_add_fd = pollset_set_add_fd,
1696 .pollset_set_del_fd = pollset_set_del_fd,
1697
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001698 .shutdown_engine = shutdown_engine,
1699};
1700
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001701/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1702 * Create a dummy epoll_fd to make sure epoll support is available */
1703static bool is_epoll_available() {
1704 int fd = epoll_create1(EPOLL_CLOEXEC);
1705 if (fd < 0) {
1706 gpr_log(
1707 GPR_ERROR,
1708 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1709 fd);
1710 return false;
1711 }
1712 close(fd);
1713 return true;
1714}
1715
Craig Tillerf8382b82017-04-27 15:09:48 -07001716const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1717 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001718 /* If use of signals is disabled, we cannot use epoll engine*/
1719 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1720 return NULL;
1721 }
1722
Ken Paysoncd7d0472016-10-11 12:24:20 -07001723 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001724 return NULL;
1725 }
1726
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001727 if (!is_epoll_available()) {
1728 return NULL;
1729 }
1730
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001731 if (!is_grpc_wakeup_signal_initialized) {
Craig Tillerbc0ab082017-05-05 10:42:44 -07001732 /* TODO(ctiller): when other epoll engines are ready, remove the true || to
1733 * force this to be explitly chosen if needed */
Craig Tiller924353a2017-05-05 17:36:31 +00001734 if (true || explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001735 grpc_use_signal(SIGRTMIN + 6);
1736 } else {
1737 return NULL;
1738 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001739 }
1740
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001741 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001742
1743 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1744 return NULL;
1745 }
1746
1747 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1748 polling_island_global_init())) {
1749 return NULL;
1750 }
1751
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001752 return &vtable;
1753}
1754
murgatroid99623dd4f2016-08-08 17:31:27 -07001755#else /* defined(GRPC_LINUX_EPOLL) */
1756#if defined(GRPC_POSIX_SOCKET)
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001757#include "src/core/lib/iomgr/ev_posix.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001758/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001759 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001760const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1761 bool explicit_request) {
1762 return NULL;
1763}
murgatroid99623dd4f2016-08-08 17:31:27 -07001764#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001765
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001766void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001767#endif /* !defined(GRPC_LINUX_EPOLL) */