blob: d51c665e8c01c23cae7188bbcf3a454afa7ea465 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070021#include <grpc/grpc_posix.h>
yang-gceb24752017-11-07 12:06:37 -080022#include <grpc/support/log.h>
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070023
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070024/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070025#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070026
Craig Tiller4509c472017-04-27 19:05:13 +000027#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070028
29#include <assert.h>
30#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070031#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070032#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070033#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070034#include <signal.h>
35#include <string.h>
36#include <sys/epoll.h>
37#include <sys/socket.h>
38#include <unistd.h>
39
40#include <grpc/support/alloc.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070041#include <grpc/support/string_util.h>
42#include <grpc/support/tls.h>
43#include <grpc/support/useful.h>
44
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070046#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070047#include "src/core/lib/iomgr/ev_posix.h"
48#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070049#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070050#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070051#include "src/core/lib/iomgr/wakeup_fd_posix.h"
52#include "src/core/lib/profiling/timers.h"
Craig Tillerfbf61bb2017-11-08 11:50:14 -080053#include "src/core/lib/support/manual_constructor.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070054
Craig Tillerbaa14a92017-11-03 09:09:36 -070055#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070056
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070057#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070058 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070059 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070060 }
61
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070062static int grpc_wakeup_signal = -1;
63static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070064
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070065/* Implements the function defined in grpc_posix.h. This function might be
66 * called before even calling grpc_init() to set either a different signal to
67 * use. If signum == -1, then the use of signals is disabled */
68void grpc_use_signal(int signum) {
69 grpc_wakeup_signal = signum;
70 is_grpc_wakeup_signal_initialized = true;
71
72 if (grpc_wakeup_signal < 0) {
73 gpr_log(GPR_INFO,
74 "Use of signals is disabled. Epoll engine will not be used");
75 } else {
76 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
77 grpc_wakeup_signal);
78 }
79}
80
81struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070082
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080083typedef enum {
84 POLL_OBJ_FD,
85 POLL_OBJ_POLLSET,
86 POLL_OBJ_POLLSET_SET
87} poll_obj_type;
88
89typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040090#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080091 poll_obj_type obj_type;
92#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080093 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070094 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080095} poll_obj;
96
Craig Tillerbaa14a92017-11-03 09:09:36 -070097const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080098 switch (po_type) {
99 case POLL_OBJ_FD:
100 return "fd";
101 case POLL_OBJ_POLLSET:
102 return "pollset";
103 case POLL_OBJ_POLLSET_SET:
104 return "pollset_set";
105 }
106
107 GPR_UNREACHABLE_CODE(return "UNKNOWN");
108}
109
Craig Tillerbaa14a92017-11-03 09:09:36 -0700110 /*******************************************************************************
111 * Fd Declarations
112 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800113
Craig Tillerbaa14a92017-11-03 09:09:36 -0700114#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700116struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800117 poll_obj po;
118
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700119 int fd;
120 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700121 bit 0 : 1=Active / 0=Orphaned
122 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700123 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700124 gpr_atm refst;
125
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800126 /* The fd is either closed or we relinquished control of it. In either
127 cases, this indicates that the 'fd' on this structure is no longer
128 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700129 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700130
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800131 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
132 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133
Craig Tillerbaa14a92017-11-03 09:09:36 -0700134 struct grpc_fd* freelist_next;
135 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700136
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800137 /* The pollset that last noticed that the fd is readable. The actual type
138 * stored in this is (grpc_pollset *) */
139 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700140
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700141 grpc_iomgr_object iomgr_object;
142};
143
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700144/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700145#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700146static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
147static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700148 int line);
149#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
150#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
151#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700152static void fd_ref(grpc_fd* fd);
153static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700154#define GRPC_FD_REF(fd, reason) fd_ref(fd)
155#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
156#endif
157
158static void fd_global_init(void);
159static void fd_global_shutdown(void);
160
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700161/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700162 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700163 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700164
ncteisena1354852017-06-08 16:25:53 -0700165#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700166
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700167#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700168#define PI_UNREF(exec_ctx, p, r) \
169 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700170
ncteisend39010e2017-06-08 17:08:07 -0700171#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700172
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700173#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700174#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700175
ncteisena1354852017-06-08 16:25:53 -0700176#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700177
Craig Tiller460502e2016-10-13 10:02:08 -0700178/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700179typedef struct polling_island {
180 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700181 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
182 the refcount.
183 Once the ref count becomes zero, this structure is destroyed which means
184 we should ensure that there is never a scenario where a PI_ADD_REF() is
185 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700186 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700187
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700188 /* Pointer to the polling_island this merged into.
189 * merged_to value is only set once in polling_island's lifetime (and that too
190 * only if the island is merged with another island). Because of this, we can
191 * use gpr_atm type here so that we can do atomic access on this and reduce
192 * lock contention on 'mu' mutex.
193 *
194 * Note that if this field is not NULL (i.e not 0), all the remaining fields
195 * (except mu and ref_count) are invalid and must be ignored. */
196 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700197
Craig Tiller460502e2016-10-13 10:02:08 -0700198 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700199 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700200
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700201 /* The fd of the underlying epoll set */
202 int epoll_fd;
203
204 /* The file descriptors in the epoll set */
205 size_t fd_cnt;
206 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700207 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700208} polling_island;
209
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700210/*******************************************************************************
211 * Pollset Declarations
212 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700213struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700214 /* Thread id of this worker */
215 pthread_t pt_id;
216
217 /* Used to prevent a worker from getting kicked multiple times */
218 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700219 struct grpc_pollset_worker* next;
220 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700221};
222
223struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800224 poll_obj po;
225
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700226 grpc_pollset_worker root_worker;
227 bool kicked_without_pollers;
228
229 bool shutting_down; /* Is the pollset shutting down ? */
230 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700231 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700232};
233
234/*******************************************************************************
235 * Pollset-set Declarations
236 */
237struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800238 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700239};
240
241/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700242 * Common helpers
243 */
244
Craig Tillerbaa14a92017-11-03 09:09:36 -0700245static bool append_error(grpc_error** composite, grpc_error* error,
246 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700247 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700248 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700249 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700250 }
251 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700252 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700253}
254
255/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700256 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700257 */
258
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700259/* The wakeup fd that is used to wake up all threads in a Polling island. This
260 is useful in the polling island merge operation where we need to wakeup all
261 the threads currently polling the smaller polling island (so that they can
262 start polling the new/merged polling island)
263
264 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
265 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
266static grpc_wakeup_fd polling_island_wakeup_fd;
267
Craig Tiller2e620132016-10-10 15:27:44 -0700268/* The polling island being polled right now.
269 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700270static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700271
Craig Tillerb39307d2016-06-30 15:39:13 -0700272/* Forward declaration */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700273static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700274
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700275#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700276/* Currently TSAN may incorrectly flag data races between epoll_ctl and
277 epoll_wait for any grpc_fd structs that are added to the epoll set via
278 epoll_ctl and are returned (within a very short window) via epoll_wait().
279
280 To work-around this race, we establish a happens-before relation between
281 the code just-before epoll_ctl() and the code after epoll_wait() by using
282 this atomic */
283gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700284#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700285
Craig Tillerbaa14a92017-11-03 09:09:36 -0700286static void pi_add_ref(polling_island* pi);
287static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700288
ncteisena1354852017-06-08 16:25:53 -0700289#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700290static void pi_add_ref_dbg(polling_island* pi, const char* reason,
291 const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700292 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700293 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700294 gpr_log(GPR_DEBUG,
295 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
296 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700297 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700298 }
299 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700300}
301
Craig Tillerbaa14a92017-11-03 09:09:36 -0700302static void pi_unref_dbg(grpc_exec_ctx* exec_ctx, polling_island* pi,
303 const char* reason, const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700304 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700305 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700306 gpr_log(GPR_DEBUG,
307 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
308 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700309 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700310 }
311 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700312}
313#endif
314
Craig Tillerbaa14a92017-11-03 09:09:36 -0700315static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700316 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
317}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700318
Craig Tillerbaa14a92017-11-03 09:09:36 -0700319static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700320 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700321 Note that this deletion not be done under a lock. Once the ref count goes
322 to zero, we are guaranteed that no one else holds a reference to the
323 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700324
325 Also, if we are deleting the polling island and the merged_to field is
326 non-empty, we should remove a ref to the merged_to polling island
327 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700328 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700329 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700330 polling_island_delete(exec_ctx, pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800331 if (next != nullptr) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700332 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700333 }
334 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700335}
336
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700337/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700338static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700339 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700340 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700341 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700342 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700343 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700344 char* err_msg;
345 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700346
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700347#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700348 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700349 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700350#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700351
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700352 for (i = 0; i < fd_count; i++) {
353 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
354 ev.data.ptr = fds[i];
355 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700356
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700357 if (err < 0) {
358 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700359 gpr_asprintf(
360 &err_msg,
361 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
362 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
363 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
364 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700365 }
366
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700367 continue;
368 }
369
370 if (pi->fd_cnt == pi->fd_capacity) {
371 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700372 pi->fds =
Craig Tillerbaa14a92017-11-03 09:09:36 -0700373 (grpc_fd**)gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700374 }
375
376 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700377 if (add_fd_refs) {
378 GRPC_FD_REF(fds[i], "polling_island");
379 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700380 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700381}
382
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700383/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700384static void polling_island_add_wakeup_fd_locked(polling_island* pi,
385 grpc_wakeup_fd* wakeup_fd,
386 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700387 struct epoll_event ev;
388 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700389 char* err_msg;
390 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700391
392 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
393 ev.data.ptr = wakeup_fd;
394 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
395 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700396 if (err < 0 && errno != EEXIST) {
397 gpr_asprintf(&err_msg,
398 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
399 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700400 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
401 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700402 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
403 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700404 }
405}
406
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700407/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700408static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700409 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700410 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700411 int err;
412 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700413 char* err_msg;
414 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700415
416 for (i = 0; i < pi->fd_cnt; i++) {
Craig Tiller4782d922017-11-10 09:53:21 -0800417 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, nullptr);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700418 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700419 gpr_asprintf(&err_msg,
420 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
421 "error: %d (%s)",
422 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
423 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
424 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700425 }
426
427 if (remove_fd_refs) {
428 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700429 }
430 }
431
432 pi->fd_cnt = 0;
433}
434
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700435/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700436static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700437 bool is_fd_closed,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700438 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700439 int err;
440 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700441 char* err_msg;
442 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700443
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700444 /* If fd is already closed, then it would have been automatically been removed
445 from the epoll set */
446 if (!is_fd_closed) {
Craig Tiller4782d922017-11-10 09:53:21 -0800447 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, nullptr);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700448 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700449 gpr_asprintf(
450 &err_msg,
451 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
452 pi->epoll_fd, fd->fd, errno, strerror(errno));
453 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
454 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700455 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700456 }
457
458 for (i = 0; i < pi->fd_cnt; i++) {
459 if (pi->fds[i] == fd) {
460 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700461 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700462 break;
463 }
464 }
465}
466
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700467/* Might return NULL in case of an error */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700468static polling_island* polling_island_create(grpc_exec_ctx* exec_ctx,
469 grpc_fd* initial_fd,
470 grpc_error** error) {
Craig Tiller4782d922017-11-10 09:53:21 -0800471 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700472 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700473
Craig Tillerb39307d2016-06-30 15:39:13 -0700474 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700475
Craig Tillerbaa14a92017-11-03 09:09:36 -0700476 pi = (polling_island*)gpr_malloc(sizeof(*pi));
Craig Tillerb39307d2016-06-30 15:39:13 -0700477 gpr_mu_init(&pi->mu);
478 pi->fd_cnt = 0;
479 pi->fd_capacity = 0;
Craig Tiller4782d922017-11-10 09:53:21 -0800480 pi->fds = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700481 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700482
Craig Tiller15007612016-07-06 09:36:16 -0700483 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700484 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700485 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700486
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700487 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700488
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700489 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700490 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
491 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700492 }
493
Craig Tiller4782d922017-11-10 09:53:21 -0800494 if (initial_fd != nullptr) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700495 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700496 }
497
Craig Tillerb39307d2016-06-30 15:39:13 -0700498done:
499 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700500 polling_island_delete(exec_ctx, pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800501 pi = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700502 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700503 return pi;
504}
505
Craig Tillerbaa14a92017-11-03 09:09:36 -0700506static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700507 GPR_ASSERT(pi->fd_cnt == 0);
508
Craig Tiller0a06cd72016-07-14 13:21:24 -0700509 if (pi->epoll_fd >= 0) {
510 close(pi->epoll_fd);
511 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700512 gpr_mu_destroy(&pi->mu);
513 gpr_free(pi->fds);
514 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700515}
516
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700517/* Attempts to gets the last polling island in the linked list (liked by the
518 * 'merged_to' field). Since this does not lock the polling island, there are no
519 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700520static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
521 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800522 while (next != nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700523 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700524 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700525 }
526
527 return pi;
528}
529
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700530/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700531 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700532 returned polling island's mu.
533 Usage: To lock/unlock polling island "pi", do the following:
534 polling_island *pi_latest = polling_island_lock(pi);
535 ...
536 ... critical section ..
537 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700539static polling_island* polling_island_lock(polling_island* pi) {
Craig Tiller4782d922017-11-10 09:53:21 -0800540 polling_island* next = nullptr;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700541
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700542 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700543 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800544 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700545 /* Looks like 'pi' is the last node in the linked list but unless we check
546 this by holding the pi->mu lock, we cannot be sure (i.e without the
547 pi->mu lock, we don't prevent island merges).
548 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700549 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700550 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800551 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700552 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700553 break;
554 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700555
556 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
557 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700558 gpr_mu_unlock(&pi->mu);
559 }
560
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700561 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700562 }
563
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700564 return pi;
565}
566
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700567/* Gets the lock on the *latest* polling islands in the linked lists pointed by
568 *p and *q (and also updates *p and *q to point to the latest polling islands)
569
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700570 This function is needed because calling the following block of code to obtain
571 locks on polling islands (*p and *q) is prone to deadlocks.
572 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700573 polling_island_lock(*p, true);
574 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700575 }
576
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700577 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700578 polling_island *p1;
579 polling_island *p2;
580 ..
581 polling_island_lock_pair(&p1, &p2);
582 ..
583 .. Critical section with both p1 and p2 locked
584 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700585 // Release locks: Always call polling_island_unlock_pair() to release locks
586 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700587*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700588static void polling_island_lock_pair(polling_island** p, polling_island** q) {
589 polling_island* pi_1 = *p;
590 polling_island* pi_2 = *q;
Craig Tiller4782d922017-11-10 09:53:21 -0800591 polling_island* next_1 = nullptr;
592 polling_island* next_2 = nullptr;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700593
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700594 /* The algorithm is simple:
595 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
596 keep updating pi_1 and pi_2)
597 - Then obtain locks on the islands by following a lock order rule of
598 locking polling_island with lower address first
599 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
600 pointing to the same island. If that is the case, we can just call
601 polling_island_lock()
602 - After obtaining both the locks, double check that the polling islands
603 are still the last polling islands in their respective linked lists
604 (this is because there might have been polling island merges before
605 we got the lock)
606 - If the polling islands are the last islands, we are done. If not,
607 release the locks and continue the process from the first step */
608 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700609 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800610 while (next_1 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700611 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700612 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700613 }
614
Craig Tillerbaa14a92017-11-03 09:09:36 -0700615 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800616 while (next_2 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700617 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700618 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700619 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700620
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700621 if (pi_1 == pi_2) {
622 pi_1 = pi_2 = polling_island_lock(pi_1);
623 break;
624 }
625
626 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700627 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700628 gpr_mu_lock(&pi_2->mu);
629 } else {
630 gpr_mu_lock(&pi_2->mu);
631 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700632 }
633
Craig Tillerbaa14a92017-11-03 09:09:36 -0700634 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
635 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800636 if (next_1 == nullptr && next_2 == nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700637 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700638 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700639
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700640 gpr_mu_unlock(&pi_1->mu);
641 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700642 }
643
644 *p = pi_1;
645 *q = pi_2;
646}
647
Craig Tillerbaa14a92017-11-03 09:09:36 -0700648static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700649 if (p == q) {
650 gpr_mu_unlock(&p->mu);
651 } else {
652 gpr_mu_unlock(&p->mu);
653 gpr_mu_unlock(&q->mu);
654 }
655}
656
Craig Tillerbaa14a92017-11-03 09:09:36 -0700657static polling_island* polling_island_merge(polling_island* p,
658 polling_island* q,
659 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700660 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700661 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700662
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700663 if (p != q) {
664 /* Make sure that p points to the polling island with fewer fds than q */
665 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700666 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700667 }
668
669 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
670 Note that the refcounts on the fds being moved will not change here.
671 This is why the last param in the following two functions is 'false') */
672 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
673 polling_island_remove_all_fds_locked(p, false, error);
674
675 /* Wakeup all the pollers (if any) on p so that they pickup this change */
676 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
677
678 /* Add the 'merged_to' link from p --> q */
679 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
680 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700681 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700682 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700683
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700684 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700685
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700686 /* Return the merged polling island (Note that no merge would have happened
687 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700688 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700689}
690
Craig Tillerbaa14a92017-11-03 09:09:36 -0700691static grpc_error* polling_island_global_init() {
692 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700693
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700694 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
695 if (error == GRPC_ERROR_NONE) {
696 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
697 }
698
699 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700700}
701
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700702static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700703 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700704}
705
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700706/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700707 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700708 */
709
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700710/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700711 * but instead so that implementations with multiple threads in (for example)
712 * epoll_wait deal with the race between pollset removal and incoming poll
713 * notifications.
714 *
715 * The problem is that the poller ultimately holds a reference to this
716 * object, so it is very difficult to know when is safe to free it, at least
717 * without some expensive synchronization.
718 *
719 * If we keep the object freelisted, in the worst case losing this race just
720 * becomes a spurious read notification on a reused fd.
721 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700722
723/* The alarm system needs to be able to wakeup 'some poller' sometimes
724 * (specifically when a new alarm needs to be triggered earlier than the next
725 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
726 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700727
Craig Tiller4782d922017-11-10 09:53:21 -0800728static grpc_fd* fd_freelist = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700729static gpr_mu fd_freelist_mu;
730
ncteisend39010e2017-06-08 17:08:07 -0700731#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700732#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
733#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700734static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700735 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700736 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700737 gpr_log(GPR_DEBUG,
738 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700739 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700740 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
741 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700742#else
743#define REF_BY(fd, n, reason) ref_by(fd, n)
744#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700745static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700746#endif
747 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
748}
749
ncteisend39010e2017-06-08 17:08:07 -0700750#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700751static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700752 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700753 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700754 gpr_log(GPR_DEBUG,
755 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700756 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700757 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
758 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700759#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700760static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700761#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700762 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700763 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700764 /* Add the fd to the freelist */
765 gpr_mu_lock(&fd_freelist_mu);
766 fd->freelist_next = fd_freelist;
767 fd_freelist = fd;
768 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800769
yang-ged49fe52017-11-20 13:49:54 -0800770 fd->read_closure->DestroyEvent();
771 fd->write_closure->DestroyEvent();
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700772
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700773 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700774 } else {
775 GPR_ASSERT(old > n);
776 }
777}
778
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700779/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700780#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700781static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700782 int line) {
783 ref_by(fd, 2, reason, file, line);
784}
785
Craig Tillerbaa14a92017-11-03 09:09:36 -0700786static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700787 int line) {
788 unref_by(fd, 2, reason, file, line);
789}
790#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700791static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
792static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700793#endif
794
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700795static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
796
797static void fd_global_shutdown(void) {
798 gpr_mu_lock(&fd_freelist_mu);
799 gpr_mu_unlock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800800 while (fd_freelist != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700801 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700802 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800803 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700804 gpr_free(fd);
805 }
806 gpr_mu_destroy(&fd_freelist_mu);
807}
808
Craig Tillerbaa14a92017-11-03 09:09:36 -0700809static grpc_fd* fd_create(int fd, const char* name) {
Craig Tiller4782d922017-11-10 09:53:21 -0800810 grpc_fd* new_fd = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700811
812 gpr_mu_lock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800813 if (fd_freelist != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700814 new_fd = fd_freelist;
815 fd_freelist = fd_freelist->freelist_next;
816 }
817 gpr_mu_unlock(&fd_freelist_mu);
818
Craig Tiller4782d922017-11-10 09:53:21 -0800819 if (new_fd == nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700820 new_fd = (grpc_fd*)gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800821 gpr_mu_init(&new_fd->po.mu);
yang-g26521b32017-11-17 17:15:37 -0800822 new_fd->read_closure.Init();
823 new_fd->write_closure.Init();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700824 }
825
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800826 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
827 * is a newly created fd (or an fd we got from the freelist), no one else
828 * would be holding a lock to it anyway. */
829 gpr_mu_lock(&new_fd->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800830 new_fd->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -0400831#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800832 new_fd->po.obj_type = POLL_OBJ_FD;
833#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700834
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700835 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700836 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700837 new_fd->orphaned = false;
yang-ged49fe52017-11-20 13:49:54 -0800838 new_fd->read_closure->InitEvent();
839 new_fd->write_closure->InitEvent();
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800840 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800841
Craig Tiller4782d922017-11-10 09:53:21 -0800842 new_fd->freelist_next = nullptr;
843 new_fd->on_done_closure = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700844
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800845 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700846
Craig Tillerbaa14a92017-11-03 09:09:36 -0700847 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700848 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
849 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700850 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700851 return new_fd;
852}
853
Craig Tillerbaa14a92017-11-03 09:09:36 -0700854static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700855 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800856 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700857 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700858 ret_fd = fd->fd;
859 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800860 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700861
862 return ret_fd;
863}
864
Craig Tillerbaa14a92017-11-03 09:09:36 -0700865static void fd_orphan(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
866 grpc_closure* on_done, int* release_fd,
867 bool already_closed, const char* reason) {
868 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -0800869 polling_island* unref_pi = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700870
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800871 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700872 fd->on_done_closure = on_done;
873
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700874 /* Remove the active status but keep referenced. We want this grpc_fd struct
875 to be alive (and not added to freelist) until the end of this function */
876 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700877
878 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700879 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800880 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700881 would actually contain the fd
882 - Remove the fd from the latest polling island
883 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800884 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700885 before doing this.) */
Craig Tiller4782d922017-11-10 09:53:21 -0800886 if (fd->po.pi != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700887 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700888 polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700889 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700890
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800891 unref_pi = fd->po.pi;
Craig Tiller4782d922017-11-10 09:53:21 -0800892 fd->po.pi = nullptr;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700893 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700894
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700895 /* If release_fd is not NULL, we should be relinquishing control of the file
896 descriptor fd->fd (but we still own the grpc_fd structure). */
Craig Tiller4782d922017-11-10 09:53:21 -0800897 if (release_fd != nullptr) {
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700898 *release_fd = fd->fd;
899 } else {
900 close(fd->fd);
901 }
902
903 fd->orphaned = true;
904
ncteisen969b46e2017-06-08 14:57:11 -0700905 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700906
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800907 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700908 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller4782d922017-11-10 09:53:21 -0800909 if (unref_pi != nullptr) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700910 /* Unref stale polling island here, outside the fd lock above.
911 The polling island owns a workqueue which owns an fd, and unreffing
912 inside the lock can cause an eventual lock loop that makes TSAN very
913 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700914 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
915 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700916 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700917 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700918 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
919 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700920 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700921}
922
Craig Tillerbaa14a92017-11-03 09:09:36 -0700923static grpc_pollset* fd_get_read_notifier_pollset(grpc_exec_ctx* exec_ctx,
924 grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800925 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700926 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700927}
928
Craig Tillerbaa14a92017-11-03 09:09:36 -0700929static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800930 return fd->read_closure->IsShutdown();
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700931}
932
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700933/* Might be called multiple times */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700934static void fd_shutdown(grpc_exec_ctx* exec_ctx, grpc_fd* fd, grpc_error* why) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800935 if (fd->read_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700936 shutdown(fd->fd, SHUT_RDWR);
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800937 fd->write_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700938 }
Craig Tiller376887d2017-04-06 08:27:03 -0700939 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700940}
941
Craig Tillerbaa14a92017-11-03 09:09:36 -0700942static void fd_notify_on_read(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
943 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800944 fd->read_closure->NotifyOn(exec_ctx, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700945}
946
Craig Tillerbaa14a92017-11-03 09:09:36 -0700947static void fd_notify_on_write(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
948 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800949 fd->write_closure->NotifyOn(exec_ctx, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700950}
951
952/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700953 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700954 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700955GPR_TLS_DECL(g_current_thread_pollset);
956GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700957static __thread bool g_initialized_sigmask;
958static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700959
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700960static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700961#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700962 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700963#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700964}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700965
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700966static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700967
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700968/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700969static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700970 gpr_tls_init(&g_current_thread_pollset);
971 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700972 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700973 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700974}
975
976static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700977 gpr_tls_destroy(&g_current_thread_pollset);
978 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700979}
980
Craig Tillerbaa14a92017-11-03 09:09:36 -0700981static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
982 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700983
984 /* Kick the worker only if it was not already kicked */
985 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
986 GRPC_POLLING_TRACE(
987 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -0700988 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700989 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
990 if (err_num != 0) {
991 err = GRPC_OS_ERROR(err_num, "pthread_kill");
992 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700993 }
994 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700995}
996
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700997/* Return 1 if the pollset has active threads in pollset_work (pollset must
998 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700999static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001000 return p->root_worker.next != &p->root_worker;
1001}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001002
Craig Tillerbaa14a92017-11-03 09:09:36 -07001003static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001004 worker->prev->next = worker->next;
1005 worker->next->prev = worker->prev;
1006}
1007
Craig Tillerbaa14a92017-11-03 09:09:36 -07001008static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001009 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001010 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001011 remove_worker(p, w);
1012 return w;
1013 } else {
Craig Tiller4782d922017-11-10 09:53:21 -08001014 return nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001015 }
1016}
1017
Craig Tillerbaa14a92017-11-03 09:09:36 -07001018static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001019 worker->next = &p->root_worker;
1020 worker->prev = worker->next->prev;
1021 worker->prev->next = worker->next->prev = worker;
1022}
1023
Craig Tillerbaa14a92017-11-03 09:09:36 -07001024static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001025 worker->prev = &p->root_worker;
1026 worker->next = worker->prev->next;
1027 worker->prev->next = worker->next->prev = worker;
1028}
1029
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001030/* p->mu must be held before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001031static grpc_error* pollset_kick(grpc_exec_ctx* exec_ctx, grpc_pollset* p,
1032 grpc_pollset_worker* specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001033 GPR_TIMER_BEGIN("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001034 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001035 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001036 const char* err_desc = "Kick Failure";
1037 grpc_pollset_worker* worker = specific_worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001038 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001039 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001041 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001042 for (worker = p->root_worker.next; worker != &p->root_worker;
1043 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001044 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001045 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001046 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001047 }
Craig Tillera218a062016-06-26 09:58:37 -07001048 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001049 } else {
1050 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001051 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001052 } else {
1053 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001055 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001056 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001057 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001058 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1059 /* Since worker == NULL, it means that we can kick "any" worker on this
1060 pollset 'p'. If 'p' happens to be the same pollset this thread is
1061 currently polling (i.e in pollset_work() function), then there is no need
1062 to kick any other worker since the current thread can just absorb the
1063 kick. This is the reason why we enter this case only when
1064 g_current_thread_pollset is != p */
1065
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001066 GPR_TIMER_MARK("kick_anonymous", 0);
1067 worker = pop_front_worker(p);
Craig Tiller4782d922017-11-10 09:53:21 -08001068 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001069 GPR_TIMER_MARK("finally_kick", 0);
1070 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001071 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001072 } else {
1073 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001074 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001075 }
1076 }
1077
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001078 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001079 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1080 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001081}
1082
Craig Tillerbaa14a92017-11-03 09:09:36 -07001083static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001084 gpr_mu_init(&pollset->po.mu);
1085 *mu = &pollset->po.mu;
Craig Tiller4782d922017-11-10 09:53:21 -08001086 pollset->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001087#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001088 pollset->po.obj_type = POLL_OBJ_POLLSET;
1089#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001091 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001092 pollset->kicked_without_pollers = false;
1093
1094 pollset->shutting_down = false;
1095 pollset->finish_shutdown_called = false;
Craig Tiller4782d922017-11-10 09:53:21 -08001096 pollset->shutdown_done = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001097}
1098
Craig Tillerbaa14a92017-11-03 09:09:36 -07001099static int poll_deadline_to_millis_timeout(grpc_exec_ctx* exec_ctx,
Craig Tiller20397792017-07-18 11:35:27 -07001100 grpc_millis millis) {
1101 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
1102 grpc_millis delta = millis - grpc_exec_ctx_now(exec_ctx);
1103 if (delta > INT_MAX)
1104 return INT_MAX;
1105 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001106 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001107 else
1108 return (int)delta;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001109}
1110
Craig Tillerbaa14a92017-11-03 09:09:36 -07001111static void fd_become_readable(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
1112 grpc_pollset* notifier) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -08001113 fd->read_closure->SetReady(exec_ctx);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001114
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001115 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001116 different 'notifier's when an fd becomes readable and it is in two epoll
1117 sets (This can happen briefly during polling island merges). In such cases
1118 it does not really matter which notifer is set as the read_notifier_pollset
1119 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001120 /* Use release store to match with acquire load in fd_get_read_notifier */
1121 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001122}
1123
Craig Tillerbaa14a92017-11-03 09:09:36 -07001124static void fd_become_writable(grpc_exec_ctx* exec_ctx, grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -08001125 fd->write_closure->SetReady(exec_ctx);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001126}
1127
Craig Tillerbaa14a92017-11-03 09:09:36 -07001128static void pollset_release_polling_island(grpc_exec_ctx* exec_ctx,
1129 grpc_pollset* ps,
1130 const char* reason) {
Craig Tiller4782d922017-11-10 09:53:21 -08001131 if (ps->po.pi != nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001132 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001133 }
Craig Tiller4782d922017-11-10 09:53:21 -08001134 ps->po.pi = nullptr;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001135}
1136
Craig Tillerbaa14a92017-11-03 09:09:36 -07001137static void finish_shutdown_locked(grpc_exec_ctx* exec_ctx,
1138 grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001139 /* The pollset cannot have any workers if we are at this stage */
1140 GPR_ASSERT(!pollset_has_workers(pollset));
1141
1142 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001143
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001144 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001145 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001146 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001147}
1148
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001149/* pollset->po.mu lock must be held by the caller before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001150static void pollset_shutdown(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1151 grpc_closure* closure) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001152 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1153 GPR_ASSERT(!pollset->shutting_down);
1154 pollset->shutting_down = true;
1155 pollset->shutdown_done = closure;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001156 pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001157
1158 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1159 because it would release the underlying polling island. In such a case, we
1160 let the last worker call finish_shutdown_locked() from pollset_work() */
1161 if (!pollset_has_workers(pollset)) {
1162 GPR_ASSERT(!pollset->finish_shutdown_called);
1163 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1164 finish_shutdown_locked(exec_ctx, pollset);
1165 }
1166 GPR_TIMER_END("pollset_shutdown", 0);
1167}
1168
1169/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1170 * than destroying the mutexes, there is nothing special that needs to be done
1171 * here */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001172static void pollset_destroy(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001173 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001174 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001175}
1176
Craig Tiller84ea3412016-09-08 14:57:56 -07001177#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001178/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001179static void pollset_work_and_unlock(grpc_exec_ctx* exec_ctx,
1180 grpc_pollset* pollset,
1181 grpc_pollset_worker* worker, int timeout_ms,
1182 sigset_t* sig_mask, grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001183 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001184 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001185 int ep_rv;
Craig Tiller4782d922017-11-10 09:53:21 -08001186 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001187 char* err_msg;
1188 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001189 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1190
1191 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001192 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001193
1194 Since epoll_fd is immutable, we can read it without obtaining the polling
1195 island lock. There is however a possibility that the polling island (from
1196 which we got the epoll_fd) got merged with another island while we are
1197 in this function. This is still okay because in such a case, we will wakeup
1198 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001199 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001200
Craig Tiller4782d922017-11-10 09:53:21 -08001201 if (pollset->po.pi == nullptr) {
1202 pollset->po.pi = polling_island_create(exec_ctx, nullptr, error);
1203 if (pollset->po.pi == nullptr) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001204 GPR_TIMER_END("pollset_work_and_unlock", 0);
1205 return; /* Fatal error. We cannot continue */
1206 }
1207
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001208 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001209 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001210 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001211 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001212
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001213 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001214 epoll_fd = pi->epoll_fd;
1215
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001216 /* Update the pollset->po.pi since the island being pointed by
1217 pollset->po.pi maybe older than the one pointed by pi) */
1218 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001219 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1220 polling island to be deleted */
1221 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001222 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1223 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001224 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001225
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001226 /* Add an extra ref so that the island does not get destroyed (which means
1227 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1228 epoll_fd */
1229 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001230 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001231
Craig Tiller61f96c12017-05-12 13:36:39 -07001232 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1233 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001234
Craig Tiller61f96c12017-05-12 13:36:39 -07001235 GRPC_SCHEDULING_START_BLOCKING_REGION;
Craig Tillerb4bb1cd2017-07-20 14:18:17 -07001236 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001237 ep_rv =
1238 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Craig Tiller781e91a2017-07-17 16:21:00 -07001239 GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001240 if (ep_rv < 0) {
1241 if (errno != EINTR) {
1242 gpr_asprintf(&err_msg,
1243 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1244 epoll_fd, errno, strerror(errno));
1245 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1246 } else {
1247 /* We were interrupted. Save an interation by doing a zero timeout
1248 epoll_wait to see if there are any other events of interest */
1249 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001250 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001251 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001252 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001253 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001254
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001255#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001256 /* See the definition of g_poll_sync for more details */
1257 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001258#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001259
Craig Tiller61f96c12017-05-12 13:36:39 -07001260 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001261 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001262 if (data_ptr == &polling_island_wakeup_fd) {
1263 GRPC_POLLING_TRACE(
1264 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1265 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001266 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001267 /* This means that our polling island is merged with a different
1268 island. We do not have to do anything here since the subsequent call
1269 to the function pollset_work_and_unlock() will pick up the correct
1270 epoll_fd */
1271 } else {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001272 grpc_fd* fd = (grpc_fd*)data_ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001273 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1274 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1275 int write_ev = ep_ev[i].events & EPOLLOUT;
1276 if (read_ev || cancel) {
1277 fd_become_readable(exec_ctx, fd, pollset);
1278 }
1279 if (write_ev || cancel) {
1280 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001281 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001282 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001283 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001284
Craig Tiller4782d922017-11-10 09:53:21 -08001285 g_current_thread_polling_island = nullptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001286 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1287
Craig Tiller4782d922017-11-10 09:53:21 -08001288 GPR_ASSERT(pi != nullptr);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001289
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001290 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001291 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001292 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001293 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001294 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001295 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001296
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001297 GPR_TIMER_END("pollset_work_and_unlock", 0);
1298}
1299
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001300/* pollset->po.mu lock must be held by the caller before calling this.
1301 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001302 during the course of its execution but it will always re-acquire the lock and
1303 ensure that it is held by the time the function returns */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001304static grpc_error* pollset_work(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1305 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001306 grpc_millis deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001307 GPR_TIMER_BEGIN("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001308 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller20397792017-07-18 11:35:27 -07001309 int timeout_ms = poll_deadline_to_millis_timeout(exec_ctx, deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001310
1311 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001312
1313 grpc_pollset_worker worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001314 worker.next = worker.prev = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001315 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001316 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001317
Craig Tiller557c88c2017-04-05 17:20:18 -07001318 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001319
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001320 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1321 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001322
1323 if (pollset->kicked_without_pollers) {
1324 /* If the pollset was kicked without pollers, pretend that the current
1325 worker got the kick and skip polling. A kick indicates that there is some
1326 work that needs attention like an event on the completion queue or an
1327 alarm */
1328 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1329 pollset->kicked_without_pollers = 0;
1330 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001331 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001332 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1333 worker that there is some pending work that needs immediate attention
1334 (like an event on the completion queue, or a polling island merge that
1335 results in a new epoll-fd to wait on) and that the worker should not
1336 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001337
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001338 A worker can be kicked anytime from the point it is added to the pollset
1339 via push_front_worker() (or push_back_worker()) to the point it is
1340 removed via remove_worker().
1341 If the worker is kicked before/during it calls epoll_pwait(), it should
1342 immediately exit from epoll_wait(). If the worker is kicked after it
1343 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001344
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001345 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001346 times *except* when it is in epoll_pwait(). This way, the worker never
1347 misses acting on a kick */
1348
Craig Tiller19196992016-06-27 18:45:56 -07001349 if (!g_initialized_sigmask) {
1350 sigemptyset(&new_mask);
1351 sigaddset(&new_mask, grpc_wakeup_signal);
1352 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1353 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1354 g_initialized_sigmask = true;
1355 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1356 This is the mask used at all times *except during
1357 epoll_wait()*"
1358 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001359 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001360
Craig Tiller19196992016-06-27 18:45:56 -07001361 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001362 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001363 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001364
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001365 push_front_worker(pollset, &worker); /* Add worker to pollset */
1366
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001367 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1368 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001369 grpc_exec_ctx_flush(exec_ctx);
1370
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001371 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001372
1373 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1374 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001375 remove_worker(pollset, &worker);
1376 }
1377
1378 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1379 false at this point) and the pollset is shutting down, we may have to
1380 finish the shutdown process by calling finish_shutdown_locked().
1381 See pollset_shutdown() for more details.
1382
1383 Note: Continuing to access pollset here is safe; it is the caller's
1384 responsibility to not destroy a pollset when it has outstanding calls to
1385 pollset_work() */
1386 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1387 !pollset->finish_shutdown_called) {
1388 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1389 finish_shutdown_locked(exec_ctx, pollset);
1390
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001391 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001392 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001393 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001394 }
1395
Craig Tiller4782d922017-11-10 09:53:21 -08001396 if (worker_hdl) *worker_hdl = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001397
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001398 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1399 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001400
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001401 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001402
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001403 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1404 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001405}
1406
Craig Tillerbaa14a92017-11-03 09:09:36 -07001407static void add_poll_object(grpc_exec_ctx* exec_ctx, poll_obj* bag,
1408 poll_obj_type bag_type, poll_obj* item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001409 poll_obj_type item_type) {
1410 GPR_TIMER_BEGIN("add_poll_object", 0);
1411
ncteisene9cd8a82017-06-29 06:03:52 -04001412#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001413 GPR_ASSERT(item->obj_type == item_type);
1414 GPR_ASSERT(bag->obj_type == bag_type);
1415#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001416
Craig Tillerbaa14a92017-11-03 09:09:36 -07001417 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -08001418 polling_island* pi_new = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001419
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001420 gpr_mu_lock(&bag->mu);
1421 gpr_mu_lock(&item->mu);
1422
Craig Tiller7212c232016-07-06 13:11:09 -07001423retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001424 /*
1425 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1426 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1427 * a refcount of 2) and point item->pi and bag->pi to the new island
1428 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1429 * the other's non-NULL pi
1430 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1431 * polling islands and update item->pi and bag->pi to point to the new
1432 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001433 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001434
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001435 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1436 * orphaned */
1437 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1438 gpr_mu_unlock(&item->mu);
1439 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001440 return;
1441 }
1442
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001443 if (item->pi == bag->pi) {
1444 pi_new = item->pi;
Craig Tiller4782d922017-11-10 09:53:21 -08001445 if (pi_new == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001446 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001447
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001448 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1449 * we need to do some extra work to make TSAN happy */
1450 if (item_type == POLL_OBJ_FD) {
1451 /* Unlock before creating a new polling island: the polling island will
1452 create a workqueue which creates a file descriptor, and holding an fd
1453 lock here can eventually cause a loop to appear to TSAN (making it
1454 unhappy). We don't think it's a real loop (there's an epoch point
1455 where that loop possibility disappears), but the advantages of
1456 keeping TSAN happy outweigh any performance advantage we might have
1457 by keeping the lock held. */
1458 gpr_mu_unlock(&item->mu);
1459 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1460 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001461
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001462 /* Need to reverify any assumptions made between the initial lock and
1463 getting to this branch: if they've changed, we need to throw away our
1464 work and figure things out again. */
Craig Tiller4782d922017-11-10 09:53:21 -08001465 if (item->pi != nullptr) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001466 GRPC_POLLING_TRACE(
1467 "add_poll_object: Raced creating new polling island. pi_new: %p "
1468 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001469 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1470 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001471 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001472 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001473 polling_island_remove_all_fds_locked(pi_new, true, &error);
1474
1475 /* Ref and unref so that the polling island gets deleted during unref
1476 */
1477 PI_ADD_REF(pi_new, "dance_of_destruction");
1478 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1479 goto retry;
1480 }
Craig Tiller27da6422016-07-06 13:14:46 -07001481 } else {
Craig Tiller4782d922017-11-10 09:53:21 -08001482 pi_new = polling_island_create(exec_ctx, nullptr, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001483 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001484
1485 GRPC_POLLING_TRACE(
1486 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1487 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001488 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1489 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001490 } else {
1491 GRPC_POLLING_TRACE(
1492 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001493 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001494 }
Craig Tiller4782d922017-11-10 09:53:21 -08001495 } else if (item->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001496 /* GPR_ASSERT(bag->pi != NULL) */
1497 /* Make pi_new point to latest pi*/
1498 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001499
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001500 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001501 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001502 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1503 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001504
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001505 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001506 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001507 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1508 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001509 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1510 poll_obj_string(bag_type), (void*)bag);
Craig Tiller4782d922017-11-10 09:53:21 -08001511 } else if (bag->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001512 /* GPR_ASSERT(item->pi != NULL) */
1513 /* Make pi_new to point to latest pi */
1514 pi_new = polling_island_lock(item->pi);
1515 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001516 GRPC_POLLING_TRACE(
1517 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1518 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001519 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1520 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001521 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001522 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001523 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001524 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1525 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001526 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1527 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001528 }
1529
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001530 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1531 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001532
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001533 if (item->pi != pi_new) {
1534 PI_ADD_REF(pi_new, poll_obj_string(item_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001535 if (item->pi != nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001536 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001537 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001538 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001539 }
1540
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001541 if (bag->pi != pi_new) {
1542 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001543 if (bag->pi != nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001544 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001545 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001546 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001547 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001548
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001549 gpr_mu_unlock(&item->mu);
1550 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001551
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001552 GRPC_LOG_IF_ERROR("add_poll_object", error);
1553 GPR_TIMER_END("add_poll_object", 0);
1554}
Craig Tiller57726ca2016-09-12 11:59:45 -07001555
Craig Tillerbaa14a92017-11-03 09:09:36 -07001556static void pollset_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1557 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001558 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001559 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001560}
1561
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001562/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001563 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001564 */
1565
Craig Tillerbaa14a92017-11-03 09:09:36 -07001566static grpc_pollset_set* pollset_set_create(void) {
1567 grpc_pollset_set* pss = (grpc_pollset_set*)gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001568 gpr_mu_init(&pss->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -08001569 pss->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001570#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001571 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1572#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001573 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001574}
1575
Craig Tillerbaa14a92017-11-03 09:09:36 -07001576static void pollset_set_destroy(grpc_exec_ctx* exec_ctx,
1577 grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001578 gpr_mu_destroy(&pss->po.mu);
1579
Craig Tiller4782d922017-11-10 09:53:21 -08001580 if (pss->po.pi != nullptr) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001581 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001582 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001583
1584 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001585}
1586
Craig Tillerbaa14a92017-11-03 09:09:36 -07001587static void pollset_set_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1588 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001589 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001590 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001591}
1592
Craig Tillerbaa14a92017-11-03 09:09:36 -07001593static void pollset_set_del_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1594 grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001595 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001596}
1597
Craig Tillerbaa14a92017-11-03 09:09:36 -07001598static void pollset_set_add_pollset(grpc_exec_ctx* exec_ctx,
1599 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001600 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001601 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001602}
1603
Craig Tillerbaa14a92017-11-03 09:09:36 -07001604static void pollset_set_del_pollset(grpc_exec_ctx* exec_ctx,
1605 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001606 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001607}
1608
Craig Tillerbaa14a92017-11-03 09:09:36 -07001609static void pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx,
1610 grpc_pollset_set* bag,
1611 grpc_pollset_set* item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001612 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001613 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001614}
1615
Craig Tillerbaa14a92017-11-03 09:09:36 -07001616static void pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx,
1617 grpc_pollset_set* bag,
1618 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001619 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001620}
1621
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001622/* Test helper functions
1623 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001624void* grpc_fd_get_polling_island(grpc_fd* fd) {
1625 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001626
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001627 gpr_mu_lock(&fd->po.mu);
1628 pi = fd->po.pi;
1629 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001630
1631 return pi;
1632}
1633
Craig Tillerbaa14a92017-11-03 09:09:36 -07001634void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1635 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001636
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001637 gpr_mu_lock(&ps->po.mu);
1638 pi = ps->po.pi;
1639 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001640
1641 return pi;
1642}
1643
Craig Tillerbaa14a92017-11-03 09:09:36 -07001644bool grpc_are_polling_islands_equal(void* p, void* q) {
1645 polling_island* p1 = (polling_island*)p;
1646 polling_island* p2 = (polling_island*)q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001647
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001648 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1649 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001650 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001651 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001652
1653 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001654}
1655
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001656/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001657 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001658 */
1659
1660static void shutdown_engine(void) {
1661 fd_global_shutdown();
1662 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001663 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001664}
1665
1666static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001667 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001668
Yash Tibrewal533d1182017-09-18 10:48:22 -07001669 fd_create,
1670 fd_wrapped_fd,
1671 fd_orphan,
1672 fd_shutdown,
1673 fd_notify_on_read,
1674 fd_notify_on_write,
1675 fd_is_shutdown,
1676 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001677
Yash Tibrewal533d1182017-09-18 10:48:22 -07001678 pollset_init,
1679 pollset_shutdown,
1680 pollset_destroy,
1681 pollset_work,
1682 pollset_kick,
1683 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001684
Yash Tibrewal533d1182017-09-18 10:48:22 -07001685 pollset_set_create,
1686 pollset_set_destroy,
1687 pollset_set_add_pollset,
1688 pollset_set_del_pollset,
1689 pollset_set_add_pollset_set,
1690 pollset_set_del_pollset_set,
1691 pollset_set_add_fd,
1692 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001693
Yash Tibrewal533d1182017-09-18 10:48:22 -07001694 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001695};
1696
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001697/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1698 * Create a dummy epoll_fd to make sure epoll support is available */
1699static bool is_epoll_available() {
1700 int fd = epoll_create1(EPOLL_CLOEXEC);
1701 if (fd < 0) {
1702 gpr_log(
1703 GPR_ERROR,
1704 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1705 fd);
1706 return false;
1707 }
1708 close(fd);
1709 return true;
1710}
1711
Craig Tillerbaa14a92017-11-03 09:09:36 -07001712const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001713 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001714 /* If use of signals is disabled, we cannot use epoll engine*/
1715 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
yang-g30101b02017-11-06 14:35:30 -08001716 gpr_log(GPR_ERROR, "Skipping epollsig because use of signals is disabled.");
Craig Tiller4782d922017-11-10 09:53:21 -08001717 return nullptr;
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001718 }
1719
Ken Paysoncd7d0472016-10-11 12:24:20 -07001720 if (!grpc_has_wakeup_fd()) {
yang-g30101b02017-11-06 14:35:30 -08001721 gpr_log(GPR_ERROR, "Skipping epollsig because of no wakeup fd.");
Craig Tiller4782d922017-11-10 09:53:21 -08001722 return nullptr;
Ken Paysonbc544be2016-10-06 19:23:47 -07001723 }
1724
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001725 if (!is_epoll_available()) {
yang-g30101b02017-11-06 14:35:30 -08001726 gpr_log(GPR_ERROR, "Skipping epollsig because epoll is unavailable.");
Craig Tiller4782d922017-11-10 09:53:21 -08001727 return nullptr;
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001728 }
1729
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001730 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001731 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001732 grpc_use_signal(SIGRTMIN + 6);
1733 } else {
yang-g30101b02017-11-06 14:35:30 -08001734 gpr_log(GPR_ERROR,
1735 "Skipping epollsig because uninitialized wakeup signal.");
Craig Tiller4782d922017-11-10 09:53:21 -08001736 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001737 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001738 }
1739
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001740 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001741
1742 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001743 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001744 }
1745
1746 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1747 polling_island_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001748 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001749 }
1750
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001751 return &vtable;
1752}
1753
murgatroid99623dd4f2016-08-08 17:31:27 -07001754#else /* defined(GRPC_LINUX_EPOLL) */
1755#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001756#include "src/core/lib/iomgr/ev_epollsig_linux.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001757/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001758 * NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001759const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001760 bool explicit_request) {
yang-g30101b02017-11-06 14:35:30 -08001761 gpr_log(GPR_ERROR,
1762 "Skipping epollsig becuase GRPC_LINUX_EPOLL is not defined.");
Craig Tillerf8382b82017-04-27 15:09:48 -07001763 return NULL;
1764}
murgatroid99623dd4f2016-08-08 17:31:27 -07001765#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001766
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001767void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001768#endif /* !defined(GRPC_LINUX_EPOLL) */