blob: 7bdfa22b8e46ff5edceec84b768381fae16c6e68 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
Alexander Polcyndb3e8982018-02-21 16:59:24 -080019#include <grpc/support/port_platform.h>
20
murgatroid9954070892016-08-08 17:01:18 -070021#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070022
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070023#include <grpc/grpc_posix.h>
yang-gceb24752017-11-07 12:06:37 -080024#include <grpc/support/log.h>
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070025
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070026/* This polling engine is only relevant on linux kernels supporting epoll() */
Mehrdad Afsharifb669002018-01-17 15:37:56 -080027#ifdef GRPC_LINUX_EPOLL_CREATE1
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070028
Craig Tiller4509c472017-04-27 19:05:13 +000029#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030
31#include <assert.h>
32#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070033#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070034#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070035#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070036#include <signal.h>
37#include <string.h>
38#include <sys/epoll.h>
39#include <sys/socket.h>
40#include <unistd.h>
41
42#include <grpc/support/alloc.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070043#include <grpc/support/string_util.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070044
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Vijay Paib6cf1232018-01-25 21:02:26 -080046#include "src/core/lib/gpr/tls.h"
Vijay Paid4d0a302018-01-25 13:24:03 -080047#include "src/core/lib/gpr/useful.h"
Mark D. Roth4f2b0fd2018-01-19 12:12:23 -080048#include "src/core/lib/gprpp/manual_constructor.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070049#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070050#include "src/core/lib/iomgr/ev_posix.h"
51#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070052#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070053#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070054#include "src/core/lib/iomgr/wakeup_fd_posix.h"
55#include "src/core/lib/profiling/timers.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070056
Craig Tillerbaa14a92017-11-03 09:09:36 -070057#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070058
Noah Eisenc384d812017-11-12 20:14:27 -080059#define GRPC_POLLING_TRACE(...) \
ncteisen9ffb1492017-11-10 14:00:49 -080060 if (grpc_polling_trace.enabled()) { \
Noah Eisenc384d812017-11-12 20:14:27 -080061 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070062 }
63
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070064static int grpc_wakeup_signal = -1;
65static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070066
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070067/* Implements the function defined in grpc_posix.h. This function might be
68 * called before even calling grpc_init() to set either a different signal to
69 * use. If signum == -1, then the use of signals is disabled */
70void grpc_use_signal(int signum) {
71 grpc_wakeup_signal = signum;
72 is_grpc_wakeup_signal_initialized = true;
73
74 if (grpc_wakeup_signal < 0) {
75 gpr_log(GPR_INFO,
76 "Use of signals is disabled. Epoll engine will not be used");
77 } else {
78 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
79 grpc_wakeup_signal);
80 }
81}
82
83struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070084
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080085typedef enum {
86 POLL_OBJ_FD,
87 POLL_OBJ_POLLSET,
88 POLL_OBJ_POLLSET_SET
89} poll_obj_type;
90
91typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040092#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080093 poll_obj_type obj_type;
94#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080095 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070096 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080097} poll_obj;
98
Craig Tillerbaa14a92017-11-03 09:09:36 -070099const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800100 switch (po_type) {
101 case POLL_OBJ_FD:
102 return "fd";
103 case POLL_OBJ_POLLSET:
104 return "pollset";
105 case POLL_OBJ_POLLSET_SET:
106 return "pollset_set";
107 }
108
109 GPR_UNREACHABLE_CODE(return "UNKNOWN");
110}
111
Craig Tillerbaa14a92017-11-03 09:09:36 -0700112 /*******************************************************************************
113 * Fd Declarations
114 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115
Craig Tillerbaa14a92017-11-03 09:09:36 -0700116#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800117
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700118struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800119 poll_obj po;
120
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700121 int fd;
122 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700123 bit 0 : 1=Active / 0=Orphaned
124 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700125 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700126 gpr_atm refst;
127
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800128 /* The fd is either closed or we relinquished control of it. In either
129 cases, this indicates that the 'fd' on this structure is no longer
130 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700131 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800133 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
134 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700135 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> error_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700136
Craig Tillerbaa14a92017-11-03 09:09:36 -0700137 struct grpc_fd* freelist_next;
138 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700139
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800140 /* The pollset that last noticed that the fd is readable. The actual type
141 * stored in this is (grpc_pollset *) */
142 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700143
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700144 grpc_iomgr_object iomgr_object;
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700145
146 /* Do we need to track EPOLLERR events separately? */
147 bool track_err;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700148};
149
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700150/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700151#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700152static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
153static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700154 int line);
155#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
156#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
157#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700158static void fd_ref(grpc_fd* fd);
159static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160#define GRPC_FD_REF(fd, reason) fd_ref(fd)
161#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
162#endif
163
164static void fd_global_init(void);
165static void fd_global_shutdown(void);
166
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700167/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700168 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700169 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700170
ncteisena1354852017-06-08 16:25:53 -0700171#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700172
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700173#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800174#define PI_UNREF(p, r) pi_unref_dbg((p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700175
ncteisend39010e2017-06-08 17:08:07 -0700176#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700177
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700178#define PI_ADD_REF(p, r) pi_add_ref((p))
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800179#define PI_UNREF(p, r) pi_unref((p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180
ncteisena1354852017-06-08 16:25:53 -0700181#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700182
Craig Tiller460502e2016-10-13 10:02:08 -0700183/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700184typedef struct polling_island {
185 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700186 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
187 the refcount.
188 Once the ref count becomes zero, this structure is destroyed which means
189 we should ensure that there is never a scenario where a PI_ADD_REF() is
190 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700191 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700192
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700193 /* Pointer to the polling_island this merged into.
194 * merged_to value is only set once in polling_island's lifetime (and that too
195 * only if the island is merged with another island). Because of this, we can
196 * use gpr_atm type here so that we can do atomic access on this and reduce
197 * lock contention on 'mu' mutex.
198 *
199 * Note that if this field is not NULL (i.e not 0), all the remaining fields
200 * (except mu and ref_count) are invalid and must be ignored. */
201 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700202
Craig Tiller460502e2016-10-13 10:02:08 -0700203 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700204 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700205
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700206 /* The fd of the underlying epoll set */
207 int epoll_fd;
208
209 /* The file descriptors in the epoll set */
210 size_t fd_cnt;
211 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700212 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700213} polling_island;
214
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700215/*******************************************************************************
216 * Pollset Declarations
217 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700218struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700219 /* Thread id of this worker */
220 pthread_t pt_id;
221
222 /* Used to prevent a worker from getting kicked multiple times */
223 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700224 struct grpc_pollset_worker* next;
225 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700226};
227
228struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800229 poll_obj po;
230
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231 grpc_pollset_worker root_worker;
232 bool kicked_without_pollers;
233
234 bool shutting_down; /* Is the pollset shutting down ? */
235 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700236 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700237};
238
239/*******************************************************************************
240 * Pollset-set Declarations
241 */
242struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800243 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700244};
245
246/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 * Common helpers
248 */
249
Craig Tillerbaa14a92017-11-03 09:09:36 -0700250static bool append_error(grpc_error** composite, grpc_error* error,
251 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700252 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700253 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700254 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700255 }
256 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700257 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700258}
259
260/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700261 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700262 */
263
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700264/* The wakeup fd that is used to wake up all threads in a Polling island. This
265 is useful in the polling island merge operation where we need to wakeup all
266 the threads currently polling the smaller polling island (so that they can
267 start polling the new/merged polling island)
268
269 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
270 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
271static grpc_wakeup_fd polling_island_wakeup_fd;
272
Craig Tiller2e620132016-10-10 15:27:44 -0700273/* The polling island being polled right now.
274 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700275static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700276
Craig Tillerb39307d2016-06-30 15:39:13 -0700277/* Forward declaration */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800278static void polling_island_delete(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700279
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700280#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700281/* Currently TSAN may incorrectly flag data races between epoll_ctl and
282 epoll_wait for any grpc_fd structs that are added to the epoll set via
283 epoll_ctl and are returned (within a very short window) via epoll_wait().
284
285 To work-around this race, we establish a happens-before relation between
286 the code just-before epoll_ctl() and the code after epoll_wait() by using
287 this atomic */
288gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700289#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700290
Craig Tillerbaa14a92017-11-03 09:09:36 -0700291static void pi_add_ref(polling_island* pi);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800292static void pi_unref(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700293
ncteisena1354852017-06-08 16:25:53 -0700294#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700295static void pi_add_ref_dbg(polling_island* pi, const char* reason,
296 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800297 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700298 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Mark D. Roth48854d22018-04-25 13:05:26 -0700299 gpr_log(GPR_INFO,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700300 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
301 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700302 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700303 }
304 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700305}
306
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800307static void pi_unref_dbg(polling_island* pi, const char* reason,
308 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800309 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700310 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Mark D. Roth48854d22018-04-25 13:05:26 -0700311 gpr_log(GPR_INFO,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700312 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
313 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700314 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700315 }
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800316 pi_unref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700317}
318#endif
319
Craig Tillerbaa14a92017-11-03 09:09:36 -0700320static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700321 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
322}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700323
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800324static void pi_unref(polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700325 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700326 Note that this deletion not be done under a lock. Once the ref count goes
327 to zero, we are guaranteed that no one else holds a reference to the
328 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700329
330 Also, if we are deleting the polling island and the merged_to field is
331 non-empty, we should remove a ref to the merged_to polling island
332 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700333 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700334 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800335 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800336 if (next != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800337 PI_UNREF(next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700338 }
339 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700340}
341
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700342/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700343static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700344 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700345 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700346 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700347 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700348 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700349 char* err_msg;
350 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700351
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700352#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700353 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700354 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700355#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700356
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700357 for (i = 0; i < fd_count; i++) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800358 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLOUT | EPOLLET);
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700359 /* Use the least significant bit of ev.data.ptr to store track_err to avoid
360 * synchronization issues when accessing it after receiving an event */
361 ev.data.ptr = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(fds[i]) |
362 (fds[i]->track_err ? 1 : 0));
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700363 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700364
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700365 if (err < 0) {
366 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700367 gpr_asprintf(
368 &err_msg,
369 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
370 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
371 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
372 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700373 }
374
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700375 continue;
376 }
377
378 if (pi->fd_cnt == pi->fd_capacity) {
379 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Noah Eisen4d20a662018-02-09 09:34:04 -0800380 pi->fds = static_cast<grpc_fd**>(
381 gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity));
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700382 }
383
384 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700385 if (add_fd_refs) {
386 GRPC_FD_REF(fds[i], "polling_island");
387 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700388 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700389}
390
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700391/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700392static void polling_island_add_wakeup_fd_locked(polling_island* pi,
393 grpc_wakeup_fd* wakeup_fd,
394 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700395 struct epoll_event ev;
396 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700397 char* err_msg;
398 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700399
Noah Eisenbe82e642018-02-09 09:16:55 -0800400 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLET);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700401 ev.data.ptr = wakeup_fd;
402 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
403 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700404 if (err < 0 && errno != EEXIST) {
405 gpr_asprintf(&err_msg,
406 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
407 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700408 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
409 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700410 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
411 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700412 }
413}
414
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700415/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700416static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700417 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700418 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700419 int err;
420 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700421 char* err_msg;
422 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700423
424 for (i = 0; i < pi->fd_cnt; i++) {
Craig Tiller4782d922017-11-10 09:53:21 -0800425 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, nullptr);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700426 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700427 gpr_asprintf(&err_msg,
428 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
429 "error: %d (%s)",
430 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
431 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
432 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700433 }
434
435 if (remove_fd_refs) {
436 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700437 }
438 }
439
440 pi->fd_cnt = 0;
441}
442
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700443/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700444static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700445 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700446 int err;
447 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700448 char* err_msg;
449 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700450
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700451 /* If fd is already closed, then it would have been automatically been removed
452 from the epoll set */
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700453 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, nullptr);
454 if (err < 0 && errno != ENOENT) {
455 gpr_asprintf(
456 &err_msg,
457 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
458 pi->epoll_fd, fd->fd, errno, strerror(errno));
459 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
460 gpr_free(err_msg);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700461 }
462
463 for (i = 0; i < pi->fd_cnt; i++) {
464 if (pi->fds[i] == fd) {
465 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700466 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700467 break;
468 }
469 }
470}
471
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700472/* Might return NULL in case of an error */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800473static polling_island* polling_island_create(grpc_fd* initial_fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700474 grpc_error** error) {
Craig Tiller4782d922017-11-10 09:53:21 -0800475 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700476 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700477
Craig Tillerb39307d2016-06-30 15:39:13 -0700478 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700479
Noah Eisenbe82e642018-02-09 09:16:55 -0800480 pi = static_cast<polling_island*>(gpr_malloc(sizeof(*pi)));
Craig Tillerb39307d2016-06-30 15:39:13 -0700481 gpr_mu_init(&pi->mu);
482 pi->fd_cnt = 0;
483 pi->fd_capacity = 0;
Craig Tiller4782d922017-11-10 09:53:21 -0800484 pi->fds = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700485 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700486
Craig Tiller15007612016-07-06 09:36:16 -0700487 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700488 gpr_atm_rel_store(&pi->poller_count, 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800489 gpr_atm_rel_store(&pi->merged_to, (gpr_atm) nullptr);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700490
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700491 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700492
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700493 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700494 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
495 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700496 }
497
Craig Tiller4782d922017-11-10 09:53:21 -0800498 if (initial_fd != nullptr) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700499 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700500 }
501
Craig Tillerb39307d2016-06-30 15:39:13 -0700502done:
503 if (*error != GRPC_ERROR_NONE) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800504 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800505 pi = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700506 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700507 return pi;
508}
509
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800510static void polling_island_delete(polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700511 GPR_ASSERT(pi->fd_cnt == 0);
512
Craig Tiller0a06cd72016-07-14 13:21:24 -0700513 if (pi->epoll_fd >= 0) {
514 close(pi->epoll_fd);
515 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700516 gpr_mu_destroy(&pi->mu);
517 gpr_free(pi->fds);
518 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700519}
520
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700521/* Attempts to gets the last polling island in the linked list (liked by the
522 * 'merged_to' field). Since this does not lock the polling island, there are no
523 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700524static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
525 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800526 while (next != nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700527 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700528 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700529 }
530
531 return pi;
532}
533
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700534/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700535 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700536 returned polling island's mu.
537 Usage: To lock/unlock polling island "pi", do the following:
538 polling_island *pi_latest = polling_island_lock(pi);
539 ...
540 ... critical section ..
541 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700542 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700543static polling_island* polling_island_lock(polling_island* pi) {
Craig Tiller4782d922017-11-10 09:53:21 -0800544 polling_island* next = nullptr;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700545
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700546 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700547 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800548 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700549 /* Looks like 'pi' is the last node in the linked list but unless we check
550 this by holding the pi->mu lock, we cannot be sure (i.e without the
551 pi->mu lock, we don't prevent island merges).
552 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700553 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700554 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800555 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700556 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700557 break;
558 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700559
560 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
561 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700562 gpr_mu_unlock(&pi->mu);
563 }
564
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700565 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700566 }
567
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700568 return pi;
569}
570
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700571/* Gets the lock on the *latest* polling islands in the linked lists pointed by
572 *p and *q (and also updates *p and *q to point to the latest polling islands)
573
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700574 This function is needed because calling the following block of code to obtain
575 locks on polling islands (*p and *q) is prone to deadlocks.
576 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700577 polling_island_lock(*p, true);
578 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700579 }
580
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700581 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700582 polling_island *p1;
583 polling_island *p2;
584 ..
585 polling_island_lock_pair(&p1, &p2);
586 ..
587 .. Critical section with both p1 and p2 locked
588 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700589 // Release locks: Always call polling_island_unlock_pair() to release locks
590 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700591*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700592static void polling_island_lock_pair(polling_island** p, polling_island** q) {
593 polling_island* pi_1 = *p;
594 polling_island* pi_2 = *q;
Craig Tiller4782d922017-11-10 09:53:21 -0800595 polling_island* next_1 = nullptr;
596 polling_island* next_2 = nullptr;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700597
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700598 /* The algorithm is simple:
599 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
600 keep updating pi_1 and pi_2)
601 - Then obtain locks on the islands by following a lock order rule of
602 locking polling_island with lower address first
603 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
604 pointing to the same island. If that is the case, we can just call
605 polling_island_lock()
606 - After obtaining both the locks, double check that the polling islands
607 are still the last polling islands in their respective linked lists
608 (this is because there might have been polling island merges before
609 we got the lock)
610 - If the polling islands are the last islands, we are done. If not,
611 release the locks and continue the process from the first step */
612 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700613 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800614 while (next_1 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700615 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700616 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700617 }
618
Craig Tillerbaa14a92017-11-03 09:09:36 -0700619 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800620 while (next_2 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700621 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700622 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700623 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700624
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700625 if (pi_1 == pi_2) {
626 pi_1 = pi_2 = polling_island_lock(pi_1);
627 break;
628 }
629
630 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700631 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700632 gpr_mu_lock(&pi_2->mu);
633 } else {
634 gpr_mu_lock(&pi_2->mu);
635 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700636 }
637
Craig Tillerbaa14a92017-11-03 09:09:36 -0700638 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
639 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800640 if (next_1 == nullptr && next_2 == nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700641 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700642 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700643
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700644 gpr_mu_unlock(&pi_1->mu);
645 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700646 }
647
648 *p = pi_1;
649 *q = pi_2;
650}
651
Craig Tillerbaa14a92017-11-03 09:09:36 -0700652static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700653 if (p == q) {
654 gpr_mu_unlock(&p->mu);
655 } else {
656 gpr_mu_unlock(&p->mu);
657 gpr_mu_unlock(&q->mu);
658 }
659}
660
Craig Tillerbaa14a92017-11-03 09:09:36 -0700661static polling_island* polling_island_merge(polling_island* p,
662 polling_island* q,
663 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700664 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700665 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700666
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700667 if (p != q) {
668 /* Make sure that p points to the polling island with fewer fds than q */
669 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700670 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700671 }
672
673 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
674 Note that the refcounts on the fds being moved will not change here.
675 This is why the last param in the following two functions is 'false') */
676 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
677 polling_island_remove_all_fds_locked(p, false, error);
678
679 /* Wakeup all the pollers (if any) on p so that they pickup this change */
680 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
681
682 /* Add the 'merged_to' link from p --> q */
683 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
684 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700685 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700686 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700687
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700688 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700689
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700690 /* Return the merged polling island (Note that no merge would have happened
691 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700692 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700693}
694
Craig Tillerbaa14a92017-11-03 09:09:36 -0700695static grpc_error* polling_island_global_init() {
696 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700697
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700698 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
699 if (error == GRPC_ERROR_NONE) {
700 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
701 }
702
703 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700704}
705
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700706static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700707 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700708}
709
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700710/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700711 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700712 */
713
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700714/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700715 * but instead so that implementations with multiple threads in (for example)
716 * epoll_wait deal with the race between pollset removal and incoming poll
717 * notifications.
718 *
719 * The problem is that the poller ultimately holds a reference to this
720 * object, so it is very difficult to know when is safe to free it, at least
721 * without some expensive synchronization.
722 *
723 * If we keep the object freelisted, in the worst case losing this race just
724 * becomes a spurious read notification on a reused fd.
725 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700726
727/* The alarm system needs to be able to wakeup 'some poller' sometimes
728 * (specifically when a new alarm needs to be triggered earlier than the next
729 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
730 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700731
Craig Tiller4782d922017-11-10 09:53:21 -0800732static grpc_fd* fd_freelist = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700733static gpr_mu fd_freelist_mu;
734
ncteisend39010e2017-06-08 17:08:07 -0700735#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700736#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
737#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700738static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700739 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800740 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700741 gpr_log(GPR_DEBUG,
742 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700743 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700744 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
745 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700746#else
747#define REF_BY(fd, n, reason) ref_by(fd, n)
748#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700749static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700750#endif
751 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
752}
753
ncteisend39010e2017-06-08 17:08:07 -0700754#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700755static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700756 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800757 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700758 gpr_log(GPR_DEBUG,
759 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700760 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700761 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
762 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700763#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700764static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700765#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700766 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700767 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700768 /* Add the fd to the freelist */
769 gpr_mu_lock(&fd_freelist_mu);
770 fd->freelist_next = fd_freelist;
771 fd_freelist = fd;
772 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800773
yang-ged49fe52017-11-20 13:49:54 -0800774 fd->read_closure->DestroyEvent();
775 fd->write_closure->DestroyEvent();
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700776 fd->error_closure->DestroyEvent();
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700777
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700778 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700779 } else {
780 GPR_ASSERT(old > n);
781 }
782}
783
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700784/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700785#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700786static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700787 int line) {
788 ref_by(fd, 2, reason, file, line);
789}
790
Craig Tillerbaa14a92017-11-03 09:09:36 -0700791static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700792 int line) {
793 unref_by(fd, 2, reason, file, line);
794}
795#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700796static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
797static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700798#endif
799
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700800static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
801
802static void fd_global_shutdown(void) {
803 gpr_mu_lock(&fd_freelist_mu);
804 gpr_mu_unlock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800805 while (fd_freelist != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700806 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700807 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800808 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700809 gpr_free(fd);
810 }
811 gpr_mu_destroy(&fd_freelist_mu);
812}
813
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700814static grpc_fd* fd_create(int fd, const char* name, bool track_err) {
Craig Tiller4782d922017-11-10 09:53:21 -0800815 grpc_fd* new_fd = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700816
817 gpr_mu_lock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800818 if (fd_freelist != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700819 new_fd = fd_freelist;
820 fd_freelist = fd_freelist->freelist_next;
821 }
822 gpr_mu_unlock(&fd_freelist_mu);
823
Craig Tiller4782d922017-11-10 09:53:21 -0800824 if (new_fd == nullptr) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800825 new_fd = static_cast<grpc_fd*>(gpr_malloc(sizeof(grpc_fd)));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800826 gpr_mu_init(&new_fd->po.mu);
yang-g26521b32017-11-17 17:15:37 -0800827 new_fd->read_closure.Init();
828 new_fd->write_closure.Init();
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700829 new_fd->error_closure.Init();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700830 }
831
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800832 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
833 * is a newly created fd (or an fd we got from the freelist), no one else
834 * would be holding a lock to it anyway. */
835 gpr_mu_lock(&new_fd->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800836 new_fd->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -0400837#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800838 new_fd->po.obj_type = POLL_OBJ_FD;
839#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700840
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700841 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700842 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700843 new_fd->orphaned = false;
yang-ged49fe52017-11-20 13:49:54 -0800844 new_fd->read_closure->InitEvent();
845 new_fd->write_closure->InitEvent();
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700846 new_fd->error_closure->InitEvent();
847 new_fd->track_err = track_err;
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800848 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800849
Craig Tiller4782d922017-11-10 09:53:21 -0800850 new_fd->freelist_next = nullptr;
851 new_fd->on_done_closure = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700852
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800853 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700854
Craig Tillerbaa14a92017-11-03 09:09:36 -0700855 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700856 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
857 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700858 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700859 return new_fd;
860}
861
Craig Tillerbaa14a92017-11-03 09:09:36 -0700862static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700863 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800864 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700865 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700866 ret_fd = fd->fd;
867 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800868 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700869
870 return ret_fd;
871}
872
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800873static void fd_orphan(grpc_fd* fd, grpc_closure* on_done, int* release_fd,
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700874 const char* reason) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700875 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -0800876 polling_island* unref_pi = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700877
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800878 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700879 fd->on_done_closure = on_done;
880
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700881 /* Remove the active status but keep referenced. We want this grpc_fd struct
882 to be alive (and not added to freelist) until the end of this function */
883 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700884
885 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700886 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800887 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700888 would actually contain the fd
889 - Remove the fd from the latest polling island
890 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800891 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700892 before doing this.) */
Craig Tiller4782d922017-11-10 09:53:21 -0800893 if (fd->po.pi != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700894 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700895 polling_island_remove_fd_locked(pi_latest, fd, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700896 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700897
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800898 unref_pi = fd->po.pi;
Craig Tiller4782d922017-11-10 09:53:21 -0800899 fd->po.pi = nullptr;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700900 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700901
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700902 /* If release_fd is not NULL, we should be relinquishing control of the file
903 descriptor fd->fd (but we still own the grpc_fd structure). */
Craig Tiller4782d922017-11-10 09:53:21 -0800904 if (release_fd != nullptr) {
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700905 *release_fd = fd->fd;
906 } else {
907 close(fd->fd);
908 }
909
910 fd->orphaned = true;
911
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800912 GRPC_CLOSURE_SCHED(fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700913
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800914 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700915 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller4782d922017-11-10 09:53:21 -0800916 if (unref_pi != nullptr) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700917 /* Unref stale polling island here, outside the fd lock above.
918 The polling island owns a workqueue which owns an fd, and unreffing
919 inside the lock can cause an eventual lock loop that makes TSAN very
920 unhappy. */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800921 PI_UNREF(unref_pi, "fd_orphan");
Craig Tiller15007612016-07-06 09:36:16 -0700922 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700923 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700924 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700925 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
926 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700927 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700928}
929
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800930static grpc_pollset* fd_get_read_notifier_pollset(grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800931 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700932 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700933}
934
Craig Tillerbaa14a92017-11-03 09:09:36 -0700935static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800936 return fd->read_closure->IsShutdown();
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700937}
938
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700939/* Might be called multiple times */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800940static void fd_shutdown(grpc_fd* fd, grpc_error* why) {
941 if (fd->read_closure->SetShutdown(GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700942 shutdown(fd->fd, SHUT_RDWR);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800943 fd->write_closure->SetShutdown(GRPC_ERROR_REF(why));
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700944 fd->error_closure->SetShutdown(GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700945 }
Craig Tiller376887d2017-04-06 08:27:03 -0700946 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700947}
948
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800949static void fd_notify_on_read(grpc_fd* fd, grpc_closure* closure) {
Yash Tibrewalf0397932018-05-31 19:39:52 -0700950 if (closure != nullptr) {
951 fd->read_closure->NotifyOn(closure);
952 } else {
953 fd->read_closure->SetReady();
954 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700955}
956
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800957static void fd_notify_on_write(grpc_fd* fd, grpc_closure* closure) {
Yash Tibrewalf0397932018-05-31 19:39:52 -0700958 if (closure != nullptr) {
959 fd->write_closure->NotifyOn(closure);
960 } else {
961 fd->write_closure->SetReady();
962 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700963}
964
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700965static void fd_notify_on_error(grpc_fd* fd, grpc_closure* closure) {
Yash Tibrewalf0397932018-05-31 19:39:52 -0700966 if (closure != nullptr) {
967 fd->error_closure->NotifyOn(closure);
968 } else {
969 fd->error_closure->SetReady();
970 }
Yash Tibrewaladc733f2018-04-02 18:32:06 -0700971}
972
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700973/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700974 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700975 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700976GPR_TLS_DECL(g_current_thread_pollset);
977GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700978static __thread bool g_initialized_sigmask;
979static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700980
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700981static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700982#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700983 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700984#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700985}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700986
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700987static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700988
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700989/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700990static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700991 gpr_tls_init(&g_current_thread_pollset);
992 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700993 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700994 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700995}
996
997static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700998 gpr_tls_destroy(&g_current_thread_pollset);
999 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001000}
1001
Craig Tillerbaa14a92017-11-03 09:09:36 -07001002static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
1003 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001004
1005 /* Kick the worker only if it was not already kicked */
Noah Eisen4d20a662018-02-09 09:34:04 -08001006 if (gpr_atm_no_barrier_cas(&worker->is_kicked, static_cast<gpr_atm>(0),
1007 static_cast<gpr_atm>(1))) {
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001008 GRPC_POLLING_TRACE(
1009 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001010 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001011 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
1012 if (err_num != 0) {
1013 err = GRPC_OS_ERROR(err_num, "pthread_kill");
1014 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001015 }
1016 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001017}
1018
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001019/* Return 1 if the pollset has active threads in pollset_work (pollset must
1020 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001021static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001022 return p->root_worker.next != &p->root_worker;
1023}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001024
Craig Tillerbaa14a92017-11-03 09:09:36 -07001025static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001026 worker->prev->next = worker->next;
1027 worker->next->prev = worker->prev;
1028}
1029
Craig Tillerbaa14a92017-11-03 09:09:36 -07001030static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001031 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001032 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001033 remove_worker(p, w);
1034 return w;
1035 } else {
Craig Tiller4782d922017-11-10 09:53:21 -08001036 return nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001037 }
1038}
1039
Craig Tillerbaa14a92017-11-03 09:09:36 -07001040static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001041 worker->next = &p->root_worker;
1042 worker->prev = worker->next->prev;
1043 worker->prev->next = worker->next->prev = worker;
1044}
1045
Craig Tillerbaa14a92017-11-03 09:09:36 -07001046static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001047 worker->prev = &p->root_worker;
1048 worker->next = worker->prev->next;
1049 worker->prev->next = worker->next->prev = worker;
1050}
1051
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001052/* p->mu must be held before calling this function */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001053static grpc_error* pollset_kick(grpc_pollset* p,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001054 grpc_pollset_worker* specific_worker) {
yang-gce1cfea2018-01-31 15:59:50 -08001055 GPR_TIMER_SCOPE("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001056 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001057 GRPC_STATS_INC_POLLSET_KICK();
Craig Tillerbaa14a92017-11-03 09:09:36 -07001058 const char* err_desc = "Kick Failure";
1059 grpc_pollset_worker* worker = specific_worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001060 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001061 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001062 if (pollset_has_workers(p)) {
yang-gce1cfea2018-01-31 15:59:50 -08001063 GPR_TIMER_SCOPE("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001064 for (worker = p->root_worker.next; worker != &p->root_worker;
1065 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001066 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001067 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001068 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001069 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001070 } else {
1071 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001072 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001073 } else {
1074 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001075 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001076 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001077 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001078 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001079 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1080 /* Since worker == NULL, it means that we can kick "any" worker on this
1081 pollset 'p'. If 'p' happens to be the same pollset this thread is
1082 currently polling (i.e in pollset_work() function), then there is no need
1083 to kick any other worker since the current thread can just absorb the
1084 kick. This is the reason why we enter this case only when
1085 g_current_thread_pollset is != p */
1086
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001087 GPR_TIMER_MARK("kick_anonymous", 0);
1088 worker = pop_front_worker(p);
Craig Tiller4782d922017-11-10 09:53:21 -08001089 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090 GPR_TIMER_MARK("finally_kick", 0);
1091 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001092 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001093 } else {
1094 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001095 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001096 }
1097 }
1098
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001099 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1100 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001101}
1102
Craig Tillerbaa14a92017-11-03 09:09:36 -07001103static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001104 gpr_mu_init(&pollset->po.mu);
1105 *mu = &pollset->po.mu;
Craig Tiller4782d922017-11-10 09:53:21 -08001106 pollset->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001107#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001108 pollset->po.obj_type = POLL_OBJ_POLLSET;
1109#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001110
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001111 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001112 pollset->kicked_without_pollers = false;
1113
1114 pollset->shutting_down = false;
1115 pollset->finish_shutdown_called = false;
Craig Tiller4782d922017-11-10 09:53:21 -08001116 pollset->shutdown_done = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001117}
1118
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001119static int poll_deadline_to_millis_timeout(grpc_millis millis) {
Craig Tiller20397792017-07-18 11:35:27 -07001120 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001121 grpc_millis delta = millis - grpc_core::ExecCtx::Get()->Now();
Craig Tiller20397792017-07-18 11:35:27 -07001122 if (delta > INT_MAX)
1123 return INT_MAX;
1124 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001125 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001126 else
Noah Eisenbe82e642018-02-09 09:16:55 -08001127 return static_cast<int>(delta);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001128}
1129
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001130static void fd_become_readable(grpc_fd* fd, grpc_pollset* notifier) {
1131 fd->read_closure->SetReady();
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001132
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001133 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001134 different 'notifier's when an fd becomes readable and it is in two epoll
1135 sets (This can happen briefly during polling island merges). In such cases
1136 it does not really matter which notifer is set as the read_notifier_pollset
1137 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001138 /* Use release store to match with acquire load in fd_get_read_notifier */
1139 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001140}
1141
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001142static void fd_become_writable(grpc_fd* fd) { fd->write_closure->SetReady(); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001143
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001144static void fd_has_errors(grpc_fd* fd) { fd->error_closure->SetReady(); }
1145
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001146static void pollset_release_polling_island(grpc_pollset* ps,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001147 const char* reason) {
Craig Tiller4782d922017-11-10 09:53:21 -08001148 if (ps->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001149 PI_UNREF(ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001150 }
Craig Tiller4782d922017-11-10 09:53:21 -08001151 ps->po.pi = nullptr;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001152}
1153
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001154static void finish_shutdown_locked(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001155 /* The pollset cannot have any workers if we are at this stage */
1156 GPR_ASSERT(!pollset_has_workers(pollset));
1157
1158 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001159
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001160 /* Release the ref and set pollset->po.pi to NULL */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001161 pollset_release_polling_island(pollset, "ps_shutdown");
1162 GRPC_CLOSURE_SCHED(pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001163}
1164
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001165/* pollset->po.mu lock must be held by the caller before calling this */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001166static void pollset_shutdown(grpc_pollset* pollset, grpc_closure* closure) {
yang-gce1cfea2018-01-31 15:59:50 -08001167 GPR_TIMER_SCOPE("pollset_shutdown", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001168 GPR_ASSERT(!pollset->shutting_down);
1169 pollset->shutting_down = true;
1170 pollset->shutdown_done = closure;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001171 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001172
1173 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1174 because it would release the underlying polling island. In such a case, we
1175 let the last worker call finish_shutdown_locked() from pollset_work() */
1176 if (!pollset_has_workers(pollset)) {
1177 GPR_ASSERT(!pollset->finish_shutdown_called);
1178 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001179 finish_shutdown_locked(pollset);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001180 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001181}
1182
1183/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1184 * than destroying the mutexes, there is nothing special that needs to be done
1185 * here */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001186static void pollset_destroy(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001187 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001188 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001189}
1190
Craig Tiller84ea3412016-09-08 14:57:56 -07001191#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001192/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001193static void pollset_work_and_unlock(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001194 grpc_pollset_worker* worker, int timeout_ms,
1195 sigset_t* sig_mask, grpc_error** error) {
yang-gce1cfea2018-01-31 15:59:50 -08001196 GPR_TIMER_SCOPE("pollset_work_and_unlock", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001197 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001198 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001199 int ep_rv;
Craig Tiller4782d922017-11-10 09:53:21 -08001200 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001201 char* err_msg;
1202 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001203
1204 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001205 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001206
1207 Since epoll_fd is immutable, we can read it without obtaining the polling
1208 island lock. There is however a possibility that the polling island (from
1209 which we got the epoll_fd) got merged with another island while we are
1210 in this function. This is still okay because in such a case, we will wakeup
1211 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001212 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001213
Craig Tiller4782d922017-11-10 09:53:21 -08001214 if (pollset->po.pi == nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001215 pollset->po.pi = polling_island_create(nullptr, error);
Craig Tiller4782d922017-11-10 09:53:21 -08001216 if (pollset->po.pi == nullptr) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001217 return; /* Fatal error. We cannot continue */
1218 }
1219
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001220 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001221 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001222 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001223 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001224
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001225 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001226 epoll_fd = pi->epoll_fd;
1227
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001228 /* Update the pollset->po.pi since the island being pointed by
1229 pollset->po.pi maybe older than the one pointed by pi) */
1230 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001231 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1232 polling island to be deleted */
1233 PI_ADD_REF(pi, "ps");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001234 PI_UNREF(pollset->po.pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001235 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001236 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001237
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001238 /* Add an extra ref so that the island does not get destroyed (which means
1239 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1240 epoll_fd */
1241 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001242 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001243
Craig Tiller61f96c12017-05-12 13:36:39 -07001244 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1245 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001246
Craig Tiller61f96c12017-05-12 13:36:39 -07001247 GRPC_SCHEDULING_START_BLOCKING_REGION;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001248 GRPC_STATS_INC_SYSCALL_POLL();
Craig Tiller61f96c12017-05-12 13:36:39 -07001249 ep_rv =
1250 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001251 GRPC_SCHEDULING_END_BLOCKING_REGION;
Craig Tiller61f96c12017-05-12 13:36:39 -07001252 if (ep_rv < 0) {
1253 if (errno != EINTR) {
1254 gpr_asprintf(&err_msg,
1255 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1256 epoll_fd, errno, strerror(errno));
1257 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1258 } else {
1259 /* We were interrupted. Save an interation by doing a zero timeout
1260 epoll_wait to see if there are any other events of interest */
1261 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001262 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001263 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001264 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001265 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001266
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001267#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001268 /* See the definition of g_poll_sync for more details */
1269 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001270#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001271
Craig Tiller61f96c12017-05-12 13:36:39 -07001272 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001273 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001274 if (data_ptr == &polling_island_wakeup_fd) {
1275 GRPC_POLLING_TRACE(
1276 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1277 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001278 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001279 /* This means that our polling island is merged with a different
1280 island. We do not have to do anything here since the subsequent call
1281 to the function pollset_work_and_unlock() will pick up the correct
1282 epoll_fd */
1283 } else {
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001284 grpc_fd* fd = reinterpret_cast<grpc_fd*>(
1285 reinterpret_cast<intptr_t>(data_ptr) & ~static_cast<intptr_t>(1));
1286 bool track_err =
1287 reinterpret_cast<intptr_t>(data_ptr) & ~static_cast<intptr_t>(1);
1288 bool cancel = (ep_ev[i].events & EPOLLHUP) != 0;
1289 bool error = (ep_ev[i].events & EPOLLERR) != 0;
1290 bool read_ev = (ep_ev[i].events & (EPOLLIN | EPOLLPRI)) != 0;
1291 bool write_ev = (ep_ev[i].events & EPOLLOUT) != 0;
Yash Tibrewal905f0ca2018-05-31 19:47:30 -07001292 bool err_fallback = error && !track_err;
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001293
1294 if (error && !err_fallback) {
1295 fd_has_errors(fd);
1296 }
1297 if (read_ev || cancel || err_fallback) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001298 fd_become_readable(fd, pollset);
Craig Tiller61f96c12017-05-12 13:36:39 -07001299 }
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001300 if (write_ev || cancel || err_fallback) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001301 fd_become_writable(fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001302 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001303 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001304 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001305
Craig Tiller4782d922017-11-10 09:53:21 -08001306 g_current_thread_polling_island = nullptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001307 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1308
Craig Tiller4782d922017-11-10 09:53:21 -08001309 GPR_ASSERT(pi != nullptr);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001310
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001311 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001312 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001313 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001314 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001315 code when there is an island merge while we are doing epoll_wait() above */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001316 PI_UNREF(pi, "ps_work");
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001317}
1318
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001319/* pollset->po.mu lock must be held by the caller before calling this.
1320 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001321 during the course of its execution but it will always re-acquire the lock and
1322 ensure that it is held by the time the function returns */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001323static grpc_error* pollset_work(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001324 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001325 grpc_millis deadline) {
yang-gce1cfea2018-01-31 15:59:50 -08001326 GPR_TIMER_SCOPE("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001327 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001328 int timeout_ms = poll_deadline_to_millis_timeout(deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001329
1330 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001331
1332 grpc_pollset_worker worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001333 worker.next = worker.prev = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001334 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001335 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001336
Craig Tiller557c88c2017-04-05 17:20:18 -07001337 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001338
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001339 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1340 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001341
1342 if (pollset->kicked_without_pollers) {
1343 /* If the pollset was kicked without pollers, pretend that the current
1344 worker got the kick and skip polling. A kick indicates that there is some
1345 work that needs attention like an event on the completion queue or an
1346 alarm */
1347 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1348 pollset->kicked_without_pollers = 0;
1349 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001350 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001351 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1352 worker that there is some pending work that needs immediate attention
1353 (like an event on the completion queue, or a polling island merge that
1354 results in a new epoll-fd to wait on) and that the worker should not
1355 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001356
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001357 A worker can be kicked anytime from the point it is added to the pollset
1358 via push_front_worker() (or push_back_worker()) to the point it is
1359 removed via remove_worker().
1360 If the worker is kicked before/during it calls epoll_pwait(), it should
1361 immediately exit from epoll_wait(). If the worker is kicked after it
1362 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001363
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001364 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001365 times *except* when it is in epoll_pwait(). This way, the worker never
1366 misses acting on a kick */
1367
Craig Tiller19196992016-06-27 18:45:56 -07001368 if (!g_initialized_sigmask) {
1369 sigemptyset(&new_mask);
1370 sigaddset(&new_mask, grpc_wakeup_signal);
1371 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1372 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1373 g_initialized_sigmask = true;
1374 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1375 This is the mask used at all times *except during
1376 epoll_wait()*"
1377 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001378 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001379
Craig Tiller19196992016-06-27 18:45:56 -07001380 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001381 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001382 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001383
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001384 push_front_worker(pollset, &worker); /* Add worker to pollset */
1385
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001386 pollset_work_and_unlock(pollset, &worker, timeout_ms, &g_orig_sigmask,
1387 &error);
1388 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001389
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001390 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001391
1392 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1393 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001394 remove_worker(pollset, &worker);
1395 }
1396
1397 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1398 false at this point) and the pollset is shutting down, we may have to
1399 finish the shutdown process by calling finish_shutdown_locked().
1400 See pollset_shutdown() for more details.
1401
1402 Note: Continuing to access pollset here is safe; it is the caller's
1403 responsibility to not destroy a pollset when it has outstanding calls to
1404 pollset_work() */
1405 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1406 !pollset->finish_shutdown_called) {
1407 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001408 finish_shutdown_locked(pollset);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001409
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001410 gpr_mu_unlock(&pollset->po.mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001411 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001412 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001413 }
1414
Craig Tiller4782d922017-11-10 09:53:21 -08001415 if (worker_hdl) *worker_hdl = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001416
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001417 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1418 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001419
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001420 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1421 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001422}
1423
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001424static void add_poll_object(poll_obj* bag, poll_obj_type bag_type,
1425 poll_obj* item, poll_obj_type item_type) {
yang-gce1cfea2018-01-31 15:59:50 -08001426 GPR_TIMER_SCOPE("add_poll_object", 0);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001427
ncteisene9cd8a82017-06-29 06:03:52 -04001428#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001429 GPR_ASSERT(item->obj_type == item_type);
1430 GPR_ASSERT(bag->obj_type == bag_type);
1431#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001432
Craig Tillerbaa14a92017-11-03 09:09:36 -07001433 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -08001434 polling_island* pi_new = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001435
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001436 gpr_mu_lock(&bag->mu);
1437 gpr_mu_lock(&item->mu);
1438
Craig Tiller7212c232016-07-06 13:11:09 -07001439retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001440 /*
1441 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1442 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1443 * a refcount of 2) and point item->pi and bag->pi to the new island
1444 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1445 * the other's non-NULL pi
1446 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1447 * polling islands and update item->pi and bag->pi to point to the new
1448 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001449 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001450
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001451 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1452 * orphaned */
1453 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1454 gpr_mu_unlock(&item->mu);
1455 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001456 return;
1457 }
1458
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001459 if (item->pi == bag->pi) {
1460 pi_new = item->pi;
Craig Tiller4782d922017-11-10 09:53:21 -08001461 if (pi_new == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001462 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001463
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001464 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1465 * we need to do some extra work to make TSAN happy */
1466 if (item_type == POLL_OBJ_FD) {
1467 /* Unlock before creating a new polling island: the polling island will
1468 create a workqueue which creates a file descriptor, and holding an fd
1469 lock here can eventually cause a loop to appear to TSAN (making it
1470 unhappy). We don't think it's a real loop (there's an epoch point
1471 where that loop possibility disappears), but the advantages of
1472 keeping TSAN happy outweigh any performance advantage we might have
1473 by keeping the lock held. */
1474 gpr_mu_unlock(&item->mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001475 pi_new = polling_island_create(FD_FROM_PO(item), &error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001476 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001477
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001478 /* Need to reverify any assumptions made between the initial lock and
1479 getting to this branch: if they've changed, we need to throw away our
1480 work and figure things out again. */
Craig Tiller4782d922017-11-10 09:53:21 -08001481 if (item->pi != nullptr) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001482 GRPC_POLLING_TRACE(
1483 "add_poll_object: Raced creating new polling island. pi_new: %p "
1484 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001485 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1486 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001487 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001488 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001489 polling_island_remove_all_fds_locked(pi_new, true, &error);
1490
1491 /* Ref and unref so that the polling island gets deleted during unref
1492 */
1493 PI_ADD_REF(pi_new, "dance_of_destruction");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001494 PI_UNREF(pi_new, "dance_of_destruction");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001495 goto retry;
1496 }
Craig Tiller27da6422016-07-06 13:14:46 -07001497 } else {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001498 pi_new = polling_island_create(nullptr, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001499 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001500
1501 GRPC_POLLING_TRACE(
1502 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1503 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001504 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1505 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001506 } else {
1507 GRPC_POLLING_TRACE(
1508 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001509 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001510 }
Craig Tiller4782d922017-11-10 09:53:21 -08001511 } else if (item->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001512 /* GPR_ASSERT(bag->pi != NULL) */
1513 /* Make pi_new point to latest pi*/
1514 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001515
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001516 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001517 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001518 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1519 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001520
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001521 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001522 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001523 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1524 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001525 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1526 poll_obj_string(bag_type), (void*)bag);
Craig Tiller4782d922017-11-10 09:53:21 -08001527 } else if (bag->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001528 /* GPR_ASSERT(item->pi != NULL) */
1529 /* Make pi_new to point to latest pi */
1530 pi_new = polling_island_lock(item->pi);
1531 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001532 GRPC_POLLING_TRACE(
1533 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1534 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001535 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1536 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001537 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001538 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001539 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001540 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1541 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001542 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1543 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001544 }
1545
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001546 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1547 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001548
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001549 if (item->pi != pi_new) {
1550 PI_ADD_REF(pi_new, poll_obj_string(item_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001551 if (item->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001552 PI_UNREF(item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001553 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001554 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001555 }
1556
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001557 if (bag->pi != pi_new) {
1558 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001559 if (bag->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001560 PI_UNREF(bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001561 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001562 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001563 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001564
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001565 gpr_mu_unlock(&item->mu);
1566 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001567
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001568 GRPC_LOG_IF_ERROR("add_poll_object", error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001569}
Craig Tiller57726ca2016-09-12 11:59:45 -07001570
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001571static void pollset_add_fd(grpc_pollset* pollset, grpc_fd* fd) {
1572 add_poll_object(&pollset->po, POLL_OBJ_POLLSET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001573}
1574
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001575/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001576 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001577 */
1578
Craig Tillerbaa14a92017-11-03 09:09:36 -07001579static grpc_pollset_set* pollset_set_create(void) {
Noah Eisen4d20a662018-02-09 09:34:04 -08001580 grpc_pollset_set* pss =
1581 static_cast<grpc_pollset_set*>(gpr_malloc(sizeof(*pss)));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001582 gpr_mu_init(&pss->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -08001583 pss->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001584#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001585 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1586#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001587 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001588}
1589
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001590static void pollset_set_destroy(grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001591 gpr_mu_destroy(&pss->po.mu);
1592
Craig Tiller4782d922017-11-10 09:53:21 -08001593 if (pss->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001594 PI_UNREF(pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001595 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001596
1597 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001598}
1599
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001600static void pollset_set_add_fd(grpc_pollset_set* pss, grpc_fd* fd) {
1601 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001602}
1603
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001604static void pollset_set_del_fd(grpc_pollset_set* pss, grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001605 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001606}
1607
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001608static void pollset_set_add_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
1609 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &ps->po, POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001610}
1611
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001612static void pollset_set_del_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001613 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001614}
1615
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001616static void pollset_set_add_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001617 grpc_pollset_set* item) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001618 add_poll_object(&bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001619 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001620}
1621
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001622static void pollset_set_del_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001623 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001624 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001625}
1626
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001627/* Test helper functions
1628 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001629void* grpc_fd_get_polling_island(grpc_fd* fd) {
1630 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001631
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001632 gpr_mu_lock(&fd->po.mu);
1633 pi = fd->po.pi;
1634 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001635
1636 return pi;
1637}
1638
Craig Tillerbaa14a92017-11-03 09:09:36 -07001639void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1640 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001641
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001642 gpr_mu_lock(&ps->po.mu);
1643 pi = ps->po.pi;
1644 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001645
1646 return pi;
1647}
1648
Craig Tillerbaa14a92017-11-03 09:09:36 -07001649bool grpc_are_polling_islands_equal(void* p, void* q) {
Noah Eisenbe82e642018-02-09 09:16:55 -08001650 polling_island* p1 = static_cast<polling_island*>(p);
1651 polling_island* p2 = static_cast<polling_island*>(q);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001652
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001653 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1654 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001655 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001656 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001657
1658 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001659}
1660
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001661/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001662 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001663 */
1664
1665static void shutdown_engine(void) {
1666 fd_global_shutdown();
1667 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001668 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001669}
1670
1671static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001672 sizeof(grpc_pollset),
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001673 true,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001674
Yash Tibrewal533d1182017-09-18 10:48:22 -07001675 fd_create,
1676 fd_wrapped_fd,
1677 fd_orphan,
1678 fd_shutdown,
1679 fd_notify_on_read,
1680 fd_notify_on_write,
Yash Tibrewaladc733f2018-04-02 18:32:06 -07001681 fd_notify_on_error,
Yash Tibrewal533d1182017-09-18 10:48:22 -07001682 fd_is_shutdown,
1683 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001684
Yash Tibrewal533d1182017-09-18 10:48:22 -07001685 pollset_init,
1686 pollset_shutdown,
1687 pollset_destroy,
1688 pollset_work,
1689 pollset_kick,
1690 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001691
Yash Tibrewal533d1182017-09-18 10:48:22 -07001692 pollset_set_create,
1693 pollset_set_destroy,
1694 pollset_set_add_pollset,
1695 pollset_set_del_pollset,
1696 pollset_set_add_pollset_set,
1697 pollset_set_del_pollset_set,
1698 pollset_set_add_fd,
1699 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001700
Yash Tibrewal533d1182017-09-18 10:48:22 -07001701 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001702};
1703
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001704/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1705 * Create a dummy epoll_fd to make sure epoll support is available */
1706static bool is_epoll_available() {
1707 int fd = epoll_create1(EPOLL_CLOEXEC);
1708 if (fd < 0) {
1709 gpr_log(
1710 GPR_ERROR,
1711 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1712 fd);
1713 return false;
1714 }
1715 close(fd);
1716 return true;
1717}
1718
Craig Tillerbaa14a92017-11-03 09:09:36 -07001719const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001720 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001721 /* If use of signals is disabled, we cannot use epoll engine*/
1722 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
yang-g30101b02017-11-06 14:35:30 -08001723 gpr_log(GPR_ERROR, "Skipping epollsig because use of signals is disabled.");
Craig Tiller4782d922017-11-10 09:53:21 -08001724 return nullptr;
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001725 }
1726
Ken Paysoncd7d0472016-10-11 12:24:20 -07001727 if (!grpc_has_wakeup_fd()) {
yang-g30101b02017-11-06 14:35:30 -08001728 gpr_log(GPR_ERROR, "Skipping epollsig because of no wakeup fd.");
Craig Tiller4782d922017-11-10 09:53:21 -08001729 return nullptr;
Ken Paysonbc544be2016-10-06 19:23:47 -07001730 }
1731
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001732 if (!is_epoll_available()) {
yang-g30101b02017-11-06 14:35:30 -08001733 gpr_log(GPR_ERROR, "Skipping epollsig because epoll is unavailable.");
Craig Tiller4782d922017-11-10 09:53:21 -08001734 return nullptr;
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001735 }
1736
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001737 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001738 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001739 grpc_use_signal(SIGRTMIN + 6);
1740 } else {
yang-g30101b02017-11-06 14:35:30 -08001741 gpr_log(GPR_ERROR,
1742 "Skipping epollsig because uninitialized wakeup signal.");
Craig Tiller4782d922017-11-10 09:53:21 -08001743 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001744 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001745 }
1746
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001747 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001748
1749 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001750 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001751 }
1752
1753 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1754 polling_island_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001755 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001756 }
1757
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001758 return &vtable;
1759}
1760
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001761#else /* defined(GRPC_LINUX_EPOLL_CREATE1) */
Muxi Yan67ff4052018-05-15 12:36:10 -07001762#if defined(GRPC_POSIX_SOCKET_EV_EPOLLSIG)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001763#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001764/* If GRPC_LINUX_EPOLL_CREATE1 is not defined, it means
1765 epoll_create1 is not available. Return NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001766const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001767 bool explicit_request) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001768 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001769}
murgatroid99623dd4f2016-08-08 17:31:27 -07001770#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001771
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001772void grpc_use_signal(int signum) {}
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001773#endif /* !defined(GRPC_LINUX_EPOLL_CREATE1) */