blob: 0473db9b9556a37cc4b1ef3f187c5d62e21b5250 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070021#include <grpc/grpc_posix.h>
22
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070023/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070024#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
Craig Tiller4509c472017-04-27 19:05:13 +000026#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070027
28#include <assert.h>
29#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070030#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070031#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070032#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070033#include <signal.h>
34#include <string.h>
35#include <sys/epoll.h>
36#include <sys/socket.h>
37#include <unistd.h>
38
39#include <grpc/support/alloc.h>
40#include <grpc/support/log.h>
41#include <grpc/support/string_util.h>
42#include <grpc/support/tls.h>
43#include <grpc/support/useful.h>
44
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070046#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070047#include "src/core/lib/iomgr/ev_posix.h"
48#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070049#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070050#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070051#include "src/core/lib/iomgr/wakeup_fd_posix.h"
52#include "src/core/lib/profiling/timers.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070053
Craig Tillere24b24d2017-04-06 16:05:45 -070054#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
55
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070056#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070057 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070058 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070059 }
60
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070061static int grpc_wakeup_signal = -1;
62static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070063
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070064/* Implements the function defined in grpc_posix.h. This function might be
65 * called before even calling grpc_init() to set either a different signal to
66 * use. If signum == -1, then the use of signals is disabled */
67void grpc_use_signal(int signum) {
68 grpc_wakeup_signal = signum;
69 is_grpc_wakeup_signal_initialized = true;
70
71 if (grpc_wakeup_signal < 0) {
72 gpr_log(GPR_INFO,
73 "Use of signals is disabled. Epoll engine will not be used");
74 } else {
75 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
76 grpc_wakeup_signal);
77 }
78}
79
80struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070081
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080082typedef enum {
83 POLL_OBJ_FD,
84 POLL_OBJ_POLLSET,
85 POLL_OBJ_POLLSET_SET
86} poll_obj_type;
87
88typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040089#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080090 poll_obj_type obj_type;
91#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080092 gpr_mu mu;
93 struct polling_island *pi;
94} poll_obj;
95
96const char *poll_obj_string(poll_obj_type po_type) {
97 switch (po_type) {
98 case POLL_OBJ_FD:
99 return "fd";
100 case POLL_OBJ_POLLSET:
101 return "pollset";
102 case POLL_OBJ_POLLSET_SET:
103 return "pollset_set";
104 }
105
106 GPR_UNREACHABLE_CODE(return "UNKNOWN");
107}
108
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700109/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700110 * Fd Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700111 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800112
113#define FD_FROM_PO(po) ((grpc_fd *)(po))
114
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700115struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800116 poll_obj po;
117
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700118 int fd;
119 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700120 bit 0 : 1=Active / 0=Orphaned
121 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700122 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700123 gpr_atm refst;
124
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800125 /* The fd is either closed or we relinquished control of it. In either
126 cases, this indicates that the 'fd' on this structure is no longer
127 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700128 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700129
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800130 gpr_atm read_closure;
131 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133 struct grpc_fd *freelist_next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700134 grpc_closure *on_done_closure;
135
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800136 /* The pollset that last noticed that the fd is readable. The actual type
137 * stored in this is (grpc_pollset *) */
138 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700139
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700140 grpc_iomgr_object iomgr_object;
141};
142
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700143/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700144#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700145static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
146static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
147 int line);
148#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
149#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
150#else
151static void fd_ref(grpc_fd *fd);
152static void fd_unref(grpc_fd *fd);
153#define GRPC_FD_REF(fd, reason) fd_ref(fd)
154#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
155#endif
156
157static void fd_global_init(void);
158static void fd_global_shutdown(void);
159
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700162 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700163
ncteisena1354852017-06-08 16:25:53 -0700164#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700166#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700167#define PI_UNREF(exec_ctx, p, r) \
168 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisend39010e2017-06-08 17:08:07 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700173#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
206 grpc_fd **fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700218 struct grpc_pollset_worker *next;
219 struct grpc_pollset_worker *prev;
220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
230 grpc_closure *shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerf975f742016-07-01 14:56:27 -0700244static bool append_error(grpc_error **composite, grpc_error *error,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700245 const char *desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700269static __thread polling_island *g_current_thread_polling_island;
270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Craig Tiller2b49ea92016-07-01 13:21:27 -0700272static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerb39307d2016-06-30 15:39:13 -0700285static void pi_add_ref(polling_island *pi);
286static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
Craig Tillera10b0b12016-09-09 16:20:07 -0700289static void pi_add_ref_dbg(polling_island *pi, const char *reason,
290 const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700291 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700292 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
293 gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
294 " (%s) - (%s, %d)",
295 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700296 }
297 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700298}
299
Craig Tillerb39307d2016-06-30 15:39:13 -0700300static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
Craig Tillera10b0b12016-09-09 16:20:07 -0700301 const char *reason, const char *file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700302 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700303 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
304 gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
305 " (%s) - (%s, %d)",
306 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700307 }
308 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700309}
310#endif
311
Craig Tiller15007612016-07-06 09:36:16 -0700312static void pi_add_ref(polling_island *pi) {
313 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
314}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700315
Craig Tillerb39307d2016-06-30 15:39:13 -0700316static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700317 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700318 Note that this deletion not be done under a lock. Once the ref count goes
319 to zero, we are guaranteed that no one else holds a reference to the
320 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700321
322 Also, if we are deleting the polling island and the merged_to field is
323 non-empty, we should remove a ref to the merged_to polling island
324 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700325 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
326 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
327 polling_island_delete(exec_ctx, pi);
328 if (next != NULL) {
329 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700330 }
331 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700332}
333
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700334/* The caller is expected to hold pi->mu lock before calling this function */
335static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700336 size_t fd_count, bool add_fd_refs,
337 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700338 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700339 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700340 struct epoll_event ev;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700341 char *err_msg;
342 const char *err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700343
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700344#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700345 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700346 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700347#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700348
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700349 for (i = 0; i < fd_count; i++) {
350 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
351 ev.data.ptr = fds[i];
352 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700353
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700354 if (err < 0) {
355 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700356 gpr_asprintf(
357 &err_msg,
358 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
359 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
360 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
361 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700362 }
363
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700364 continue;
365 }
366
367 if (pi->fd_cnt == pi->fd_capacity) {
368 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700369 pi->fds =
370 (grpc_fd **)gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700371 }
372
373 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700374 if (add_fd_refs) {
375 GRPC_FD_REF(fds[i], "polling_island");
376 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700377 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700378}
379
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700380/* The caller is expected to hold pi->mu before calling this */
381static void polling_island_add_wakeup_fd_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700382 grpc_wakeup_fd *wakeup_fd,
383 grpc_error **error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700384 struct epoll_event ev;
385 int err;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700386 char *err_msg;
387 const char *err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700388
389 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
390 ev.data.ptr = wakeup_fd;
391 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
392 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700393 if (err < 0 && errno != EEXIST) {
394 gpr_asprintf(&err_msg,
395 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
396 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700397 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
398 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700399 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
400 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700401 }
402}
403
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700404/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700405static void polling_island_remove_all_fds_locked(polling_island *pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700406 bool remove_fd_refs,
407 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700408 int err;
409 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700410 char *err_msg;
411 const char *err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700412
413 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700414 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700415 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700416 gpr_asprintf(&err_msg,
417 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
418 "error: %d (%s)",
419 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
420 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
421 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700422 }
423
424 if (remove_fd_refs) {
425 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700426 }
427 }
428
429 pi->fd_cnt = 0;
430}
431
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700432/* The caller is expected to hold pi->mu lock before calling this function */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700433static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700434 bool is_fd_closed,
435 grpc_error **error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700436 int err;
437 size_t i;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700438 char *err_msg;
439 const char *err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700440
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700441 /* If fd is already closed, then it would have been automatically been removed
442 from the epoll set */
443 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700444 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
445 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700446 gpr_asprintf(
447 &err_msg,
448 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
449 pi->epoll_fd, fd->fd, errno, strerror(errno));
450 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
451 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700452 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700453 }
454
455 for (i = 0; i < pi->fd_cnt; i++) {
456 if (pi->fds[i] == fd) {
457 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700458 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700459 break;
460 }
461 }
462}
463
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700464/* Might return NULL in case of an error */
Craig Tillerb39307d2016-06-30 15:39:13 -0700465static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
466 grpc_fd *initial_fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700467 grpc_error **error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700468 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700469 const char *err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700470
Craig Tillerb39307d2016-06-30 15:39:13 -0700471 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700472
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700473 pi = (polling_island *)gpr_malloc(sizeof(*pi));
Craig Tillerb39307d2016-06-30 15:39:13 -0700474 gpr_mu_init(&pi->mu);
475 pi->fd_cnt = 0;
476 pi->fd_capacity = 0;
477 pi->fds = NULL;
478 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700479
Craig Tiller15007612016-07-06 09:36:16 -0700480 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700481 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700482 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700483
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700484 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700485
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700486 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700487 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
488 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700489 }
490
Craig Tillerb39307d2016-06-30 15:39:13 -0700491 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700492 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700493 }
494
Craig Tillerb39307d2016-06-30 15:39:13 -0700495done:
496 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700497 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700498 pi = NULL;
499 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700500 return pi;
501}
502
Craig Tillerb39307d2016-06-30 15:39:13 -0700503static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700504 GPR_ASSERT(pi->fd_cnt == 0);
505
Craig Tiller0a06cd72016-07-14 13:21:24 -0700506 if (pi->epoll_fd >= 0) {
507 close(pi->epoll_fd);
508 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700509 gpr_mu_destroy(&pi->mu);
510 gpr_free(pi->fds);
511 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700512}
513
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700514/* Attempts to gets the last polling island in the linked list (liked by the
515 * 'merged_to' field). Since this does not lock the polling island, there are no
516 * guarantees that the island returned is the last island */
517static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
518 polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
519 while (next != NULL) {
520 pi = next;
521 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
522 }
523
524 return pi;
525}
526
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700527/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700528 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700529 returned polling island's mu.
530 Usage: To lock/unlock polling island "pi", do the following:
531 polling_island *pi_latest = polling_island_lock(pi);
532 ...
533 ... critical section ..
534 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700535 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
536static polling_island *polling_island_lock(polling_island *pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700537 polling_island *next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700539 while (true) {
540 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
541 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700542 /* Looks like 'pi' is the last node in the linked list but unless we check
543 this by holding the pi->mu lock, we cannot be sure (i.e without the
544 pi->mu lock, we don't prevent island merges).
545 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700546 gpr_mu_lock(&pi->mu);
547 next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
548 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700549 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700550 break;
551 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700552
553 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
554 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700555 gpr_mu_unlock(&pi->mu);
556 }
557
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700558 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700559 }
560
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700561 return pi;
562}
563
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700564/* Gets the lock on the *latest* polling islands in the linked lists pointed by
565 *p and *q (and also updates *p and *q to point to the latest polling islands)
566
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700567 This function is needed because calling the following block of code to obtain
568 locks on polling islands (*p and *q) is prone to deadlocks.
569 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700570 polling_island_lock(*p, true);
571 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700572 }
573
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700574 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700575 polling_island *p1;
576 polling_island *p2;
577 ..
578 polling_island_lock_pair(&p1, &p2);
579 ..
580 .. Critical section with both p1 and p2 locked
581 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700582 // Release locks: Always call polling_island_unlock_pair() to release locks
583 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700584*/
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700585static void polling_island_lock_pair(polling_island **p, polling_island **q) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700586 polling_island *pi_1 = *p;
587 polling_island *pi_2 = *q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700588 polling_island *next_1 = NULL;
589 polling_island *next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700590
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700591 /* The algorithm is simple:
592 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
593 keep updating pi_1 and pi_2)
594 - Then obtain locks on the islands by following a lock order rule of
595 locking polling_island with lower address first
596 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
597 pointing to the same island. If that is the case, we can just call
598 polling_island_lock()
599 - After obtaining both the locks, double check that the polling islands
600 are still the last polling islands in their respective linked lists
601 (this is because there might have been polling island merges before
602 we got the lock)
603 - If the polling islands are the last islands, we are done. If not,
604 release the locks and continue the process from the first step */
605 while (true) {
606 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
607 while (next_1 != NULL) {
608 pi_1 = next_1;
609 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700610 }
611
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700612 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
613 while (next_2 != NULL) {
614 pi_2 = next_2;
615 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
616 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700617
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700618 if (pi_1 == pi_2) {
619 pi_1 = pi_2 = polling_island_lock(pi_1);
620 break;
621 }
622
623 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700624 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700625 gpr_mu_lock(&pi_2->mu);
626 } else {
627 gpr_mu_lock(&pi_2->mu);
628 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700629 }
630
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700631 next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
632 next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
633 if (next_1 == NULL && next_2 == NULL) {
634 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700635 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700636
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700637 gpr_mu_unlock(&pi_1->mu);
638 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700639 }
640
641 *p = pi_1;
642 *q = pi_2;
643}
644
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700645static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
646 if (p == q) {
647 gpr_mu_unlock(&p->mu);
648 } else {
649 gpr_mu_unlock(&p->mu);
650 gpr_mu_unlock(&q->mu);
651 }
652}
653
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700654static polling_island *polling_island_merge(polling_island *p,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700655 polling_island *q,
656 grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700657 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700658 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700659
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700660 if (p != q) {
661 /* Make sure that p points to the polling island with fewer fds than q */
662 if (p->fd_cnt > q->fd_cnt) {
663 GPR_SWAP(polling_island *, p, q);
664 }
665
666 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
667 Note that the refcounts on the fds being moved will not change here.
668 This is why the last param in the following two functions is 'false') */
669 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
670 polling_island_remove_all_fds_locked(p, false, error);
671
672 /* Wakeup all the pollers (if any) on p so that they pickup this change */
673 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
674
675 /* Add the 'merged_to' link from p --> q */
676 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
677 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700678 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700679 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700680
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700681 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700682
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700683 /* Return the merged polling island (Note that no merge would have happened
684 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700685 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700686}
687
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700688static grpc_error *polling_island_global_init() {
689 grpc_error *error = GRPC_ERROR_NONE;
690
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700691 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
692 if (error == GRPC_ERROR_NONE) {
693 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
694 }
695
696 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700697}
698
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700699static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700700 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700701}
702
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700703/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700704 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700705 */
706
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700707/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700708 * but instead so that implementations with multiple threads in (for example)
709 * epoll_wait deal with the race between pollset removal and incoming poll
710 * notifications.
711 *
712 * The problem is that the poller ultimately holds a reference to this
713 * object, so it is very difficult to know when is safe to free it, at least
714 * without some expensive synchronization.
715 *
716 * If we keep the object freelisted, in the worst case losing this race just
717 * becomes a spurious read notification on a reused fd.
718 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700719
720/* The alarm system needs to be able to wakeup 'some poller' sometimes
721 * (specifically when a new alarm needs to be triggered earlier than the next
722 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
723 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700724
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700725static grpc_fd *fd_freelist = NULL;
726static gpr_mu fd_freelist_mu;
727
ncteisend39010e2017-06-08 17:08:07 -0700728#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700729#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
730#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
731static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
732 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700733 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700734 gpr_log(GPR_DEBUG,
735 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700736 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700737 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
738 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700739#else
740#define REF_BY(fd, n, reason) ref_by(fd, n)
741#define UNREF_BY(fd, n, reason) unref_by(fd, n)
742static void ref_by(grpc_fd *fd, int n) {
743#endif
744 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
745}
746
ncteisend39010e2017-06-08 17:08:07 -0700747#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700748static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
749 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700750 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700751 gpr_log(GPR_DEBUG,
752 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700753 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700754 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
755 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700756#else
757static void unref_by(grpc_fd *fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700758#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700759 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700760 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700761 /* Add the fd to the freelist */
762 gpr_mu_lock(&fd_freelist_mu);
763 fd->freelist_next = fd_freelist;
764 fd_freelist = fd;
765 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800766
Craig Tiller376887d2017-04-06 08:27:03 -0700767 grpc_lfev_destroy(&fd->read_closure);
768 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700769
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700770 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700771 } else {
772 GPR_ASSERT(old > n);
773 }
774}
775
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700776/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700777#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700778static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
779 int line) {
780 ref_by(fd, 2, reason, file, line);
781}
782
783static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
784 int line) {
785 unref_by(fd, 2, reason, file, line);
786}
787#else
788static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700789static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
790#endif
791
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700792static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
793
794static void fd_global_shutdown(void) {
795 gpr_mu_lock(&fd_freelist_mu);
796 gpr_mu_unlock(&fd_freelist_mu);
797 while (fd_freelist != NULL) {
798 grpc_fd *fd = fd_freelist;
799 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800800 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700801 gpr_free(fd);
802 }
803 gpr_mu_destroy(&fd_freelist_mu);
804}
805
806static grpc_fd *fd_create(int fd, const char *name) {
807 grpc_fd *new_fd = NULL;
808
809 gpr_mu_lock(&fd_freelist_mu);
810 if (fd_freelist != NULL) {
811 new_fd = fd_freelist;
812 fd_freelist = fd_freelist->freelist_next;
813 }
814 gpr_mu_unlock(&fd_freelist_mu);
815
816 if (new_fd == NULL) {
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700817 new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800818 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700819 }
820
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800821 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
822 * is a newly created fd (or an fd we got from the freelist), no one else
823 * would be holding a lock to it anyway. */
824 gpr_mu_lock(&new_fd->po.mu);
825 new_fd->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -0400826#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800827 new_fd->po.obj_type = POLL_OBJ_FD;
828#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700829
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700830 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700831 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700832 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700833 grpc_lfev_init(&new_fd->read_closure);
834 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800835 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800836
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700837 new_fd->freelist_next = NULL;
838 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700839
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800840 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841
842 char *fd_name;
843 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
844 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700845 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700846 return new_fd;
847}
848
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700849static int fd_wrapped_fd(grpc_fd *fd) {
850 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800851 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700852 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700853 ret_fd = fd->fd;
854 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800855 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700856
857 return ret_fd;
858}
859
860static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
861 grpc_closure *on_done, int *release_fd,
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700862 bool already_closed, const char *reason) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700863 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller15007612016-07-06 09:36:16 -0700864 polling_island *unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700865
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800866 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700867 fd->on_done_closure = on_done;
868
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700869 /* Remove the active status but keep referenced. We want this grpc_fd struct
870 to be alive (and not added to freelist) until the end of this function */
871 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700872
873 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700874 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800875 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700876 would actually contain the fd
877 - Remove the fd from the latest polling island
878 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800879 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700880 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800881 if (fd->po.pi != NULL) {
882 polling_island *pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700883 polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700884 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700885
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800886 unref_pi = fd->po.pi;
887 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700888 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700889
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700890 /* If release_fd is not NULL, we should be relinquishing control of the file
891 descriptor fd->fd (but we still own the grpc_fd structure). */
892 if (release_fd != NULL) {
893 *release_fd = fd->fd;
894 } else {
895 close(fd->fd);
896 }
897
898 fd->orphaned = true;
899
ncteisen969b46e2017-06-08 14:57:11 -0700900 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700901
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800902 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700903 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700904 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700905 /* Unref stale polling island here, outside the fd lock above.
906 The polling island owns a workqueue which owns an fd, and unreffing
907 inside the lock can cause an eventual lock loop that makes TSAN very
908 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700909 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
910 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700911 if (error != GRPC_ERROR_NONE) {
912 const char *msg = grpc_error_string(error);
913 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
914 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700915 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700916}
917
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700918static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
919 grpc_fd *fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800920 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800921 return (grpc_pollset *)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700922}
923
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700924static bool fd_is_shutdown(grpc_fd *fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700925 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700926}
927
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700928/* Might be called multiple times */
Craig Tillercda759d2017-01-27 11:37:37 -0800929static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700930 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
931 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700932 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700933 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700934 }
Craig Tiller376887d2017-04-06 08:27:03 -0700935 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700936}
937
938static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
939 grpc_closure *closure) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700940 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700941}
942
943static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
944 grpc_closure *closure) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700945 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700946}
947
948/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700949 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700950 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700951GPR_TLS_DECL(g_current_thread_pollset);
952GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700953static __thread bool g_initialized_sigmask;
954static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700955
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700956static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700957#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700958 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700959#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700960}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700961
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700962static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700963
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700964/* Global state management */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700965static grpc_error *pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700966 gpr_tls_init(&g_current_thread_pollset);
967 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700968 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700969 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700970}
971
972static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700973 gpr_tls_destroy(&g_current_thread_pollset);
974 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700975}
976
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700977static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
978 grpc_error *err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700979
980 /* Kick the worker only if it was not already kicked */
981 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
982 GRPC_POLLING_TRACE(
983 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Ken Payson975b5102017-03-30 17:38:40 -0700984 (void *)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700985 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
986 if (err_num != 0) {
987 err = GRPC_OS_ERROR(err_num, "pthread_kill");
988 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700989 }
990 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700991}
992
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700993/* Return 1 if the pollset has active threads in pollset_work (pollset must
994 * be locked) */
995static int pollset_has_workers(grpc_pollset *p) {
996 return p->root_worker.next != &p->root_worker;
997}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700998
999static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1000 worker->prev->next = worker->next;
1001 worker->next->prev = worker->prev;
1002}
1003
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001004static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
1005 if (pollset_has_workers(p)) {
1006 grpc_pollset_worker *w = p->root_worker.next;
1007 remove_worker(p, w);
1008 return w;
1009 } else {
1010 return NULL;
1011 }
1012}
1013
1014static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1015 worker->next = &p->root_worker;
1016 worker->prev = worker->next->prev;
1017 worker->prev->next = worker->next->prev = worker;
1018}
1019
1020static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
1021 worker->prev = &p->root_worker;
1022 worker->next = worker->prev->next;
1023 worker->prev->next = worker->next->prev = worker;
1024}
1025
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001026/* p->mu must be held before calling this function */
Craig Tiller0ff222a2017-09-01 09:41:43 -07001027static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p,
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001028 grpc_pollset_worker *specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001029 GPR_TIMER_BEGIN("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001030 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001031 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001032 const char *err_desc = "Kick Failure";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001033 grpc_pollset_worker *worker = specific_worker;
1034 if (worker != NULL) {
1035 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001036 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001037 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001038 for (worker = p->root_worker.next; worker != &p->root_worker;
1039 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001040 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001041 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001042 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001043 }
Craig Tillera218a062016-06-26 09:58:37 -07001044 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001045 } else {
1046 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001047 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001048 } else {
1049 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001050 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001051 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001053 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1055 /* Since worker == NULL, it means that we can kick "any" worker on this
1056 pollset 'p'. If 'p' happens to be the same pollset this thread is
1057 currently polling (i.e in pollset_work() function), then there is no need
1058 to kick any other worker since the current thread can just absorb the
1059 kick. This is the reason why we enter this case only when
1060 g_current_thread_pollset is != p */
1061
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001062 GPR_TIMER_MARK("kick_anonymous", 0);
1063 worker = pop_front_worker(p);
1064 if (worker != NULL) {
1065 GPR_TIMER_MARK("finally_kick", 0);
1066 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001067 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001068 } else {
1069 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001070 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001071 }
1072 }
1073
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001074 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001075 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1076 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001077}
1078
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001079static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001080 gpr_mu_init(&pollset->po.mu);
1081 *mu = &pollset->po.mu;
1082 pollset->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001083#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001084 pollset->po.obj_type = POLL_OBJ_POLLSET;
1085#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001086
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001087 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001088 pollset->kicked_without_pollers = false;
1089
1090 pollset->shutting_down = false;
1091 pollset->finish_shutdown_called = false;
1092 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001093}
1094
Craig Tiller20397792017-07-18 11:35:27 -07001095static int poll_deadline_to_millis_timeout(grpc_exec_ctx *exec_ctx,
1096 grpc_millis millis) {
1097 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
1098 grpc_millis delta = millis - grpc_exec_ctx_now(exec_ctx);
1099 if (delta > INT_MAX)
1100 return INT_MAX;
1101 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001102 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001103 else
1104 return (int)delta;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001105}
1106
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001107static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
1108 grpc_pollset *notifier) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001109 grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read");
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001110
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001111 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001112 different 'notifier's when an fd becomes readable and it is in two epoll
1113 sets (This can happen briefly during polling island merges). In such cases
1114 it does not really matter which notifer is set as the read_notifier_pollset
1115 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001116 /* Use release store to match with acquire load in fd_get_read_notifier */
1117 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001118}
1119
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001120static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001121 grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001122}
1123
Craig Tillerb39307d2016-06-30 15:39:13 -07001124static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
Yash Tibrewal533d1182017-09-18 10:48:22 -07001125 grpc_pollset *ps,
1126 const char *reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001127 if (ps->po.pi != NULL) {
1128 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001129 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001130 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001131}
1132
1133static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
1134 grpc_pollset *pollset) {
1135 /* The pollset cannot have any workers if we are at this stage */
1136 GPR_ASSERT(!pollset_has_workers(pollset));
1137
1138 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001139
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001140 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001141 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001142 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001143}
1144
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001145/* pollset->po.mu lock must be held by the caller before calling this */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001146static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1147 grpc_closure *closure) {
1148 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1149 GPR_ASSERT(!pollset->shutting_down);
1150 pollset->shutting_down = true;
1151 pollset->shutdown_done = closure;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001152 pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001153
1154 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1155 because it would release the underlying polling island. In such a case, we
1156 let the last worker call finish_shutdown_locked() from pollset_work() */
1157 if (!pollset_has_workers(pollset)) {
1158 GPR_ASSERT(!pollset->finish_shutdown_called);
1159 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1160 finish_shutdown_locked(exec_ctx, pollset);
1161 }
1162 GPR_TIMER_END("pollset_shutdown", 0);
1163}
1164
1165/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1166 * than destroying the mutexes, there is nothing special that needs to be done
1167 * here */
Craig Tillerf8401102017-04-17 09:47:28 -07001168static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001169 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001170 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001171}
1172
Craig Tiller84ea3412016-09-08 14:57:56 -07001173#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001174/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
1175static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001176 grpc_pollset *pollset,
1177 grpc_pollset_worker *worker, int timeout_ms,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001178 sigset_t *sig_mask, grpc_error **error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001179 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001180 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001181 int ep_rv;
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001182 polling_island *pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001183 char *err_msg;
1184 const char *err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001185 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1186
1187 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001188 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001189
1190 Since epoll_fd is immutable, we can read it without obtaining the polling
1191 island lock. There is however a possibility that the polling island (from
1192 which we got the epoll_fd) got merged with another island while we are
1193 in this function. This is still okay because in such a case, we will wakeup
1194 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001195 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001196
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001197 if (pollset->po.pi == NULL) {
1198 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1199 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001200 GPR_TIMER_END("pollset_work_and_unlock", 0);
1201 return; /* Fatal error. We cannot continue */
1202 }
1203
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001204 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001205 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001206 (void *)pollset, (void *)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001207 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001208
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001209 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001210 epoll_fd = pi->epoll_fd;
1211
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001212 /* Update the pollset->po.pi since the island being pointed by
1213 pollset->po.pi maybe older than the one pointed by pi) */
1214 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001215 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1216 polling island to be deleted */
1217 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001218 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1219 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001220 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001221
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001222 /* Add an extra ref so that the island does not get destroyed (which means
1223 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1224 epoll_fd */
1225 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001226 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001227
Craig Tiller61f96c12017-05-12 13:36:39 -07001228 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1229 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001230
Craig Tiller61f96c12017-05-12 13:36:39 -07001231 GRPC_SCHEDULING_START_BLOCKING_REGION;
Craig Tillerb4bb1cd2017-07-20 14:18:17 -07001232 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001233 ep_rv =
1234 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Craig Tiller781e91a2017-07-17 16:21:00 -07001235 GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001236 if (ep_rv < 0) {
1237 if (errno != EINTR) {
1238 gpr_asprintf(&err_msg,
1239 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1240 epoll_fd, errno, strerror(errno));
1241 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1242 } else {
1243 /* We were interrupted. Save an interation by doing a zero timeout
1244 epoll_wait to see if there are any other events of interest */
1245 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
1246 (void *)pollset, (void *)worker);
1247 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001248 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001249 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001250
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001251#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001252 /* See the definition of g_poll_sync for more details */
1253 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001254#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001255
Craig Tiller61f96c12017-05-12 13:36:39 -07001256 for (int i = 0; i < ep_rv; ++i) {
1257 void *data_ptr = ep_ev[i].data.ptr;
1258 if (data_ptr == &polling_island_wakeup_fd) {
1259 GRPC_POLLING_TRACE(
1260 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1261 "%d) got merged",
1262 (void *)pollset, (void *)worker, epoll_fd);
1263 /* This means that our polling island is merged with a different
1264 island. We do not have to do anything here since the subsequent call
1265 to the function pollset_work_and_unlock() will pick up the correct
1266 epoll_fd */
1267 } else {
Yash Tibrewalca3c1c02017-09-07 22:47:16 -07001268 grpc_fd *fd = (grpc_fd *)data_ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001269 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1270 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1271 int write_ev = ep_ev[i].events & EPOLLOUT;
1272 if (read_ev || cancel) {
1273 fd_become_readable(exec_ctx, fd, pollset);
1274 }
1275 if (write_ev || cancel) {
1276 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001277 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001278 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001279 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001280
Craig Tiller61f96c12017-05-12 13:36:39 -07001281 g_current_thread_polling_island = NULL;
1282 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1283
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001284 GPR_ASSERT(pi != NULL);
1285
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001286 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001287 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001288 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001289 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001290 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001291 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001292
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001293 GPR_TIMER_END("pollset_work_and_unlock", 0);
1294}
1295
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001296/* pollset->po.mu lock must be held by the caller before calling this.
1297 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001298 during the course of its execution but it will always re-acquire the lock and
1299 ensure that it is held by the time the function returns */
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001300static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1301 grpc_pollset_worker **worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001302 grpc_millis deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001303 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001304 grpc_error *error = GRPC_ERROR_NONE;
Craig Tiller20397792017-07-18 11:35:27 -07001305 int timeout_ms = poll_deadline_to_millis_timeout(exec_ctx, deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001306
1307 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001308
1309 grpc_pollset_worker worker;
1310 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001311 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001312 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001313
Craig Tiller557c88c2017-04-05 17:20:18 -07001314 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001315
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001316 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1317 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001318
1319 if (pollset->kicked_without_pollers) {
1320 /* If the pollset was kicked without pollers, pretend that the current
1321 worker got the kick and skip polling. A kick indicates that there is some
1322 work that needs attention like an event on the completion queue or an
1323 alarm */
1324 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1325 pollset->kicked_without_pollers = 0;
1326 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001327 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001328 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1329 worker that there is some pending work that needs immediate attention
1330 (like an event on the completion queue, or a polling island merge that
1331 results in a new epoll-fd to wait on) and that the worker should not
1332 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001333
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001334 A worker can be kicked anytime from the point it is added to the pollset
1335 via push_front_worker() (or push_back_worker()) to the point it is
1336 removed via remove_worker().
1337 If the worker is kicked before/during it calls epoll_pwait(), it should
1338 immediately exit from epoll_wait(). If the worker is kicked after it
1339 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001340
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001341 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001342 times *except* when it is in epoll_pwait(). This way, the worker never
1343 misses acting on a kick */
1344
Craig Tiller19196992016-06-27 18:45:56 -07001345 if (!g_initialized_sigmask) {
1346 sigemptyset(&new_mask);
1347 sigaddset(&new_mask, grpc_wakeup_signal);
1348 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1349 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1350 g_initialized_sigmask = true;
1351 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1352 This is the mask used at all times *except during
1353 epoll_wait()*"
1354 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001355 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001356
Craig Tiller19196992016-06-27 18:45:56 -07001357 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001358 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001359 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001360
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001361 push_front_worker(pollset, &worker); /* Add worker to pollset */
1362
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001363 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1364 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001365 grpc_exec_ctx_flush(exec_ctx);
1366
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001367 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001368
1369 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1370 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001371 remove_worker(pollset, &worker);
1372 }
1373
1374 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1375 false at this point) and the pollset is shutting down, we may have to
1376 finish the shutdown process by calling finish_shutdown_locked().
1377 See pollset_shutdown() for more details.
1378
1379 Note: Continuing to access pollset here is safe; it is the caller's
1380 responsibility to not destroy a pollset when it has outstanding calls to
1381 pollset_work() */
1382 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1383 !pollset->finish_shutdown_called) {
1384 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1385 finish_shutdown_locked(exec_ctx, pollset);
1386
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001387 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001388 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001389 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001390 }
1391
Craig Tiller557c88c2017-04-05 17:20:18 -07001392 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001393
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001394 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1395 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001396
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001397 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001398
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001399 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1400 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001401}
1402
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001403static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001404 poll_obj_type bag_type, poll_obj *item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001405 poll_obj_type item_type) {
1406 GPR_TIMER_BEGIN("add_poll_object", 0);
1407
ncteisene9cd8a82017-06-29 06:03:52 -04001408#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001409 GPR_ASSERT(item->obj_type == item_type);
1410 GPR_ASSERT(bag->obj_type == bag_type);
1411#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001412
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001413 grpc_error *error = GRPC_ERROR_NONE;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001414 polling_island *pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001415
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001416 gpr_mu_lock(&bag->mu);
1417 gpr_mu_lock(&item->mu);
1418
Craig Tiller7212c232016-07-06 13:11:09 -07001419retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001420 /*
1421 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1422 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1423 * a refcount of 2) and point item->pi and bag->pi to the new island
1424 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1425 * the other's non-NULL pi
1426 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1427 * polling islands and update item->pi and bag->pi to point to the new
1428 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001429 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001430
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001431 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1432 * orphaned */
1433 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1434 gpr_mu_unlock(&item->mu);
1435 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001436 return;
1437 }
1438
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001439 if (item->pi == bag->pi) {
1440 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001441 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001442 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001443
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001444 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1445 * we need to do some extra work to make TSAN happy */
1446 if (item_type == POLL_OBJ_FD) {
1447 /* Unlock before creating a new polling island: the polling island will
1448 create a workqueue which creates a file descriptor, and holding an fd
1449 lock here can eventually cause a loop to appear to TSAN (making it
1450 unhappy). We don't think it's a real loop (there's an epoch point
1451 where that loop possibility disappears), but the advantages of
1452 keeping TSAN happy outweigh any performance advantage we might have
1453 by keeping the lock held. */
1454 gpr_mu_unlock(&item->mu);
1455 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1456 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001457
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001458 /* Need to reverify any assumptions made between the initial lock and
1459 getting to this branch: if they've changed, we need to throw away our
1460 work and figure things out again. */
1461 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001462 GRPC_POLLING_TRACE(
1463 "add_poll_object: Raced creating new polling island. pi_new: %p "
1464 "(fd: %d, %s: %p)",
1465 (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1466 (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001467 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001468 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001469 polling_island_remove_all_fds_locked(pi_new, true, &error);
1470
1471 /* Ref and unref so that the polling island gets deleted during unref
1472 */
1473 PI_ADD_REF(pi_new, "dance_of_destruction");
1474 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1475 goto retry;
1476 }
Craig Tiller27da6422016-07-06 13:14:46 -07001477 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001478 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001479 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001480
1481 GRPC_POLLING_TRACE(
1482 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1483 "%s: %p)",
1484 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1485 poll_obj_string(bag_type), (void *)bag);
1486 } else {
1487 GRPC_POLLING_TRACE(
1488 "add_poll_object: Same polling island. pi: %p (%s, %s)",
1489 (void *)pi_new, poll_obj_string(item_type),
1490 poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001491 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001492 } else if (item->pi == NULL) {
1493 /* GPR_ASSERT(bag->pi != NULL) */
1494 /* Make pi_new point to latest pi*/
1495 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001496
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001497 if (item_type == POLL_OBJ_FD) {
1498 grpc_fd *fd = FD_FROM_PO(item);
1499 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1500 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001501
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001502 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001503 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001504 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1505 "bag(%s): %p)",
1506 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1507 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001508 } else if (bag->pi == NULL) {
1509 /* GPR_ASSERT(item->pi != NULL) */
1510 /* Make pi_new to point to latest pi */
1511 pi_new = polling_island_lock(item->pi);
1512 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001513 GRPC_POLLING_TRACE(
1514 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1515 "bag(%s): %p)",
1516 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1517 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001518 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001519 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001520 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001521 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1522 "bag(%s): %p)",
1523 (void *)pi_new, poll_obj_string(item_type), (void *)item,
1524 poll_obj_string(bag_type), (void *)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001525 }
1526
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001527 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1528 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001529
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001530 if (item->pi != pi_new) {
1531 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1532 if (item->pi != NULL) {
1533 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001534 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001535 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001536 }
1537
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001538 if (bag->pi != pi_new) {
1539 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1540 if (bag->pi != NULL) {
1541 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001542 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001543 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001544 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001545
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001546 gpr_mu_unlock(&item->mu);
1547 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001548
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001549 GRPC_LOG_IF_ERROR("add_poll_object", error);
1550 GPR_TIMER_END("add_poll_object", 0);
1551}
Craig Tiller57726ca2016-09-12 11:59:45 -07001552
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001553static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
1554 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001555 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001556 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001557}
1558
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001559/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001560 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001561 */
1562
1563static grpc_pollset_set *pollset_set_create(void) {
Yash Tibrewalca3c1c02017-09-07 22:47:16 -07001564 grpc_pollset_set *pss = (grpc_pollset_set *)gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001565 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001566 pss->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001567#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001568 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1569#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001570 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001571}
1572
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001573static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
1574 grpc_pollset_set *pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001575 gpr_mu_destroy(&pss->po.mu);
1576
1577 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001578 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001579 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001580
1581 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001582}
1583
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001584static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1585 grpc_fd *fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001586 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001587 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001588}
1589
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001590static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
1591 grpc_fd *fd) {
1592 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001593}
1594
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001595static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001596 grpc_pollset_set *pss, grpc_pollset *ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001597 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001598 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001599}
1600
1601static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001602 grpc_pollset_set *pss, grpc_pollset *ps) {
1603 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001604}
1605
1606static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
1607 grpc_pollset_set *bag,
1608 grpc_pollset_set *item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001609 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001610 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001611}
1612
1613static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
1614 grpc_pollset_set *bag,
1615 grpc_pollset_set *item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001616 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001617}
1618
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001619/* Test helper functions
1620 * */
1621void *grpc_fd_get_polling_island(grpc_fd *fd) {
1622 polling_island *pi;
1623
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001624 gpr_mu_lock(&fd->po.mu);
1625 pi = fd->po.pi;
1626 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001627
1628 return pi;
1629}
1630
1631void *grpc_pollset_get_polling_island(grpc_pollset *ps) {
1632 polling_island *pi;
1633
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001634 gpr_mu_lock(&ps->po.mu);
1635 pi = ps->po.pi;
1636 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001637
1638 return pi;
1639}
1640
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001641bool grpc_are_polling_islands_equal(void *p, void *q) {
Yash Tibrewalca3c1c02017-09-07 22:47:16 -07001642 polling_island *p1 = (polling_island *)p;
1643 polling_island *p2 = (polling_island *)q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001644
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001645 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1646 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001647 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001648 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001649
1650 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001651}
1652
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001653/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001654 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001655 */
1656
1657static void shutdown_engine(void) {
1658 fd_global_shutdown();
1659 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001660 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001661}
1662
1663static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001664 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001665
Yash Tibrewal533d1182017-09-18 10:48:22 -07001666 fd_create,
1667 fd_wrapped_fd,
1668 fd_orphan,
1669 fd_shutdown,
1670 fd_notify_on_read,
1671 fd_notify_on_write,
1672 fd_is_shutdown,
1673 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001674
Yash Tibrewal533d1182017-09-18 10:48:22 -07001675 pollset_init,
1676 pollset_shutdown,
1677 pollset_destroy,
1678 pollset_work,
1679 pollset_kick,
1680 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001681
Yash Tibrewal533d1182017-09-18 10:48:22 -07001682 pollset_set_create,
1683 pollset_set_destroy,
1684 pollset_set_add_pollset,
1685 pollset_set_del_pollset,
1686 pollset_set_add_pollset_set,
1687 pollset_set_del_pollset_set,
1688 pollset_set_add_fd,
1689 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001690
Yash Tibrewal533d1182017-09-18 10:48:22 -07001691 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001692};
1693
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001694/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1695 * Create a dummy epoll_fd to make sure epoll support is available */
1696static bool is_epoll_available() {
1697 int fd = epoll_create1(EPOLL_CLOEXEC);
1698 if (fd < 0) {
1699 gpr_log(
1700 GPR_ERROR,
1701 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1702 fd);
1703 return false;
1704 }
1705 close(fd);
1706 return true;
1707}
1708
Craig Tillerf8382b82017-04-27 15:09:48 -07001709const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1710 bool explicit_request) {
yang-g30101b02017-11-06 14:35:30 -08001711 const char *error_msg = NULL;
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001712 /* If use of signals is disabled, we cannot use epoll engine*/
1713 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
yang-g30101b02017-11-06 14:35:30 -08001714 gpr_log(GPR_ERROR, "Skipping epollsig because use of signals is disabled.");
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001715 return NULL;
1716 }
1717
Ken Paysoncd7d0472016-10-11 12:24:20 -07001718 if (!grpc_has_wakeup_fd()) {
yang-g30101b02017-11-06 14:35:30 -08001719 gpr_log(GPR_ERROR, "Skipping epollsig because of no wakeup fd.");
Ken Paysonbc544be2016-10-06 19:23:47 -07001720 return NULL;
1721 }
1722
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001723 if (!is_epoll_available()) {
yang-g30101b02017-11-06 14:35:30 -08001724 gpr_log(GPR_ERROR, "Skipping epollsig because epoll is unavailable.");
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001725 return NULL;
1726 }
1727
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001728 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001729 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001730 grpc_use_signal(SIGRTMIN + 6);
1731 } else {
yang-g30101b02017-11-06 14:35:30 -08001732 gpr_log(GPR_ERROR,
1733 "Skipping epollsig because uninitialized wakeup signal.");
Craig Tillerf8382b82017-04-27 15:09:48 -07001734 return NULL;
1735 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001736 }
1737
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001738 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001739
1740 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1741 return NULL;
1742 }
1743
1744 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1745 polling_island_global_init())) {
1746 return NULL;
1747 }
1748
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001749 return &vtable;
1750}
1751
murgatroid99623dd4f2016-08-08 17:31:27 -07001752#else /* defined(GRPC_LINUX_EPOLL) */
1753#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001754#include "src/core/lib/iomgr/ev_epollsig_linux.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001755/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001756 * NULL */
Craig Tillerf8382b82017-04-27 15:09:48 -07001757const grpc_event_engine_vtable *grpc_init_epollsig_linux(
1758 bool explicit_request) {
yang-g30101b02017-11-06 14:35:30 -08001759 gpr_log(GPR_ERROR,
1760 "Skipping epollsig becuase GRPC_LINUX_EPOLL is not defined.");
Craig Tillerf8382b82017-04-27 15:09:48 -07001761 return NULL;
1762}
murgatroid99623dd4f2016-08-08 17:31:27 -07001763#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001764
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001765void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001766#endif /* !defined(GRPC_LINUX_EPOLL) */