blob: f1359e9607d0f043266843a209f954a2707006a1 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
Alexander Polcyndb3e8982018-02-21 16:59:24 -080019#include <grpc/support/port_platform.h>
20
murgatroid9954070892016-08-08 17:01:18 -070021#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070022
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070023#include <grpc/grpc_posix.h>
yang-gceb24752017-11-07 12:06:37 -080024#include <grpc/support/log.h>
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070025
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070026/* This polling engine is only relevant on linux kernels supporting epoll() */
Mehrdad Afsharifb669002018-01-17 15:37:56 -080027#ifdef GRPC_LINUX_EPOLL_CREATE1
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070028
Craig Tiller4509c472017-04-27 19:05:13 +000029#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070030
31#include <assert.h>
32#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070033#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070034#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070035#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070036#include <signal.h>
37#include <string.h>
38#include <sys/epoll.h>
39#include <sys/socket.h>
40#include <unistd.h>
41
42#include <grpc/support/alloc.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070043#include <grpc/support/string_util.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070044
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Vijay Paib6cf1232018-01-25 21:02:26 -080046#include "src/core/lib/gpr/tls.h"
Vijay Paid4d0a302018-01-25 13:24:03 -080047#include "src/core/lib/gpr/useful.h"
Mark D. Roth4f2b0fd2018-01-19 12:12:23 -080048#include "src/core/lib/gprpp/manual_constructor.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070049#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070050#include "src/core/lib/iomgr/ev_posix.h"
51#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070052#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070053#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070054#include "src/core/lib/iomgr/wakeup_fd_posix.h"
55#include "src/core/lib/profiling/timers.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070056
Craig Tillerbaa14a92017-11-03 09:09:36 -070057#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070058
Noah Eisenc384d812017-11-12 20:14:27 -080059#define GRPC_POLLING_TRACE(...) \
ncteisen9ffb1492017-11-10 14:00:49 -080060 if (grpc_polling_trace.enabled()) { \
Noah Eisenc384d812017-11-12 20:14:27 -080061 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070062 }
63
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070064static int grpc_wakeup_signal = -1;
65static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070066
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070067/* Implements the function defined in grpc_posix.h. This function might be
68 * called before even calling grpc_init() to set either a different signal to
69 * use. If signum == -1, then the use of signals is disabled */
70void grpc_use_signal(int signum) {
71 grpc_wakeup_signal = signum;
72 is_grpc_wakeup_signal_initialized = true;
73
74 if (grpc_wakeup_signal < 0) {
75 gpr_log(GPR_INFO,
76 "Use of signals is disabled. Epoll engine will not be used");
77 } else {
78 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
79 grpc_wakeup_signal);
80 }
81}
82
83struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070084
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080085typedef enum {
86 POLL_OBJ_FD,
87 POLL_OBJ_POLLSET,
88 POLL_OBJ_POLLSET_SET
89} poll_obj_type;
90
91typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040092#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080093 poll_obj_type obj_type;
94#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080095 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070096 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080097} poll_obj;
98
Craig Tillerbaa14a92017-11-03 09:09:36 -070099const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800100 switch (po_type) {
101 case POLL_OBJ_FD:
102 return "fd";
103 case POLL_OBJ_POLLSET:
104 return "pollset";
105 case POLL_OBJ_POLLSET_SET:
106 return "pollset_set";
107 }
108
109 GPR_UNREACHABLE_CODE(return "UNKNOWN");
110}
111
Craig Tillerbaa14a92017-11-03 09:09:36 -0700112 /*******************************************************************************
113 * Fd Declarations
114 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115
Craig Tillerbaa14a92017-11-03 09:09:36 -0700116#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800117
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700118struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800119 poll_obj po;
120
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700121 int fd;
122 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700123 bit 0 : 1=Active / 0=Orphaned
124 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700125 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700126 gpr_atm refst;
127
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800128 /* The fd is either closed or we relinquished control of it. In either
129 cases, this indicates that the 'fd' on this structure is no longer
130 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700131 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800133 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
134 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700135
Craig Tillerbaa14a92017-11-03 09:09:36 -0700136 struct grpc_fd* freelist_next;
137 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700138
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800139 /* The pollset that last noticed that the fd is readable. The actual type
140 * stored in this is (grpc_pollset *) */
141 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700142
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700143 grpc_iomgr_object iomgr_object;
144};
145
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700146/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700147#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700148static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
149static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700150 int line);
151#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
152#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
153#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700154static void fd_ref(grpc_fd* fd);
155static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700156#define GRPC_FD_REF(fd, reason) fd_ref(fd)
157#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
158#endif
159
160static void fd_global_init(void);
161static void fd_global_shutdown(void);
162
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700163/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700164 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700165 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700166
ncteisena1354852017-06-08 16:25:53 -0700167#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700168
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700169#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800170#define PI_UNREF(p, r) pi_unref_dbg((p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
ncteisend39010e2017-06-08 17:08:07 -0700172#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700173
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700174#define PI_ADD_REF(p, r) pi_add_ref((p))
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800175#define PI_UNREF(p, r) pi_unref((p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
ncteisena1354852017-06-08 16:25:53 -0700177#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700178
Craig Tiller460502e2016-10-13 10:02:08 -0700179/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700180typedef struct polling_island {
181 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700182 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
183 the refcount.
184 Once the ref count becomes zero, this structure is destroyed which means
185 we should ensure that there is never a scenario where a PI_ADD_REF() is
186 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700187 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700188
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700189 /* Pointer to the polling_island this merged into.
190 * merged_to value is only set once in polling_island's lifetime (and that too
191 * only if the island is merged with another island). Because of this, we can
192 * use gpr_atm type here so that we can do atomic access on this and reduce
193 * lock contention on 'mu' mutex.
194 *
195 * Note that if this field is not NULL (i.e not 0), all the remaining fields
196 * (except mu and ref_count) are invalid and must be ignored. */
197 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700198
Craig Tiller460502e2016-10-13 10:02:08 -0700199 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700200 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700201
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700202 /* The fd of the underlying epoll set */
203 int epoll_fd;
204
205 /* The file descriptors in the epoll set */
206 size_t fd_cnt;
207 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700208 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700209} polling_island;
210
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700211/*******************************************************************************
212 * Pollset Declarations
213 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700214struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700215 /* Thread id of this worker */
216 pthread_t pt_id;
217
218 /* Used to prevent a worker from getting kicked multiple times */
219 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700220 struct grpc_pollset_worker* next;
221 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700222};
223
224struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800225 poll_obj po;
226
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700227 grpc_pollset_worker root_worker;
228 bool kicked_without_pollers;
229
230 bool shutting_down; /* Is the pollset shutting down ? */
231 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700232 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700233};
234
235/*******************************************************************************
236 * Pollset-set Declarations
237 */
238struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800239 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700240};
241
242/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700243 * Common helpers
244 */
245
Craig Tillerbaa14a92017-11-03 09:09:36 -0700246static bool append_error(grpc_error** composite, grpc_error* error,
247 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700248 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700250 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700251 }
252 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700253 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700254}
255
256/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700257 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700258 */
259
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700260/* The wakeup fd that is used to wake up all threads in a Polling island. This
261 is useful in the polling island merge operation where we need to wakeup all
262 the threads currently polling the smaller polling island (so that they can
263 start polling the new/merged polling island)
264
265 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
266 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
267static grpc_wakeup_fd polling_island_wakeup_fd;
268
Craig Tiller2e620132016-10-10 15:27:44 -0700269/* The polling island being polled right now.
270 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700271static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700272
Craig Tillerb39307d2016-06-30 15:39:13 -0700273/* Forward declaration */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800274static void polling_island_delete(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700275
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700276#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700277/* Currently TSAN may incorrectly flag data races between epoll_ctl and
278 epoll_wait for any grpc_fd structs that are added to the epoll set via
279 epoll_ctl and are returned (within a very short window) via epoll_wait().
280
281 To work-around this race, we establish a happens-before relation between
282 the code just-before epoll_ctl() and the code after epoll_wait() by using
283 this atomic */
284gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700285#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700286
Craig Tillerbaa14a92017-11-03 09:09:36 -0700287static void pi_add_ref(polling_island* pi);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800288static void pi_unref(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700289
ncteisena1354852017-06-08 16:25:53 -0700290#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700291static void pi_add_ref_dbg(polling_island* pi, const char* reason,
292 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800293 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700294 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Mark D. Roth48854d22018-04-25 13:05:26 -0700295 gpr_log(GPR_INFO,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700296 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
297 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700298 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700299 }
300 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700301}
302
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800303static void pi_unref_dbg(polling_island* pi, const char* reason,
304 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800305 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700306 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Mark D. Roth48854d22018-04-25 13:05:26 -0700307 gpr_log(GPR_INFO,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700308 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
309 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700310 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700311 }
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800312 pi_unref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700313}
314#endif
315
Craig Tillerbaa14a92017-11-03 09:09:36 -0700316static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700317 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
318}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700319
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800320static void pi_unref(polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700321 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700322 Note that this deletion not be done under a lock. Once the ref count goes
323 to zero, we are guaranteed that no one else holds a reference to the
324 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700325
326 Also, if we are deleting the polling island and the merged_to field is
327 non-empty, we should remove a ref to the merged_to polling island
328 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700329 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700330 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800331 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800332 if (next != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800333 PI_UNREF(next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700334 }
335 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700336}
337
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700338/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700339static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700340 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700341 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700342 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700343 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700344 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700345 char* err_msg;
346 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700347
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700348#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700349 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700350 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700351#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700352
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700353 for (i = 0; i < fd_count; i++) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800354 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLOUT | EPOLLET);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700355 ev.data.ptr = fds[i];
356 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700357
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700358 if (err < 0) {
359 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700360 gpr_asprintf(
361 &err_msg,
362 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
363 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
364 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
365 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700366 }
367
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700368 continue;
369 }
370
371 if (pi->fd_cnt == pi->fd_capacity) {
372 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Noah Eisen4d20a662018-02-09 09:34:04 -0800373 pi->fds = static_cast<grpc_fd**>(
374 gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity));
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700375 }
376
377 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700378 if (add_fd_refs) {
379 GRPC_FD_REF(fds[i], "polling_island");
380 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700381 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700382}
383
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700384/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700385static void polling_island_add_wakeup_fd_locked(polling_island* pi,
386 grpc_wakeup_fd* wakeup_fd,
387 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700388 struct epoll_event ev;
389 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700390 char* err_msg;
391 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700392
Noah Eisenbe82e642018-02-09 09:16:55 -0800393 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLET);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700394 ev.data.ptr = wakeup_fd;
395 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
396 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700397 if (err < 0 && errno != EEXIST) {
398 gpr_asprintf(&err_msg,
399 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
400 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700401 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
402 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700403 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
404 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700405 }
406}
407
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700408/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700409static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700410 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700411 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700412 int err;
413 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700414 char* err_msg;
415 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700416
417 for (i = 0; i < pi->fd_cnt; i++) {
Craig Tiller4782d922017-11-10 09:53:21 -0800418 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, nullptr);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700419 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700420 gpr_asprintf(&err_msg,
421 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
422 "error: %d (%s)",
423 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
424 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
425 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700426 }
427
428 if (remove_fd_refs) {
429 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700430 }
431 }
432
433 pi->fd_cnt = 0;
434}
435
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700436/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700437static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700438 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700439 int err;
440 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700441 char* err_msg;
442 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700443
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700444 /* If fd is already closed, then it would have been automatically been removed
445 from the epoll set */
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700446 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, nullptr);
447 if (err < 0 && errno != ENOENT) {
448 gpr_asprintf(
449 &err_msg,
450 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
451 pi->epoll_fd, fd->fd, errno, strerror(errno));
452 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
453 gpr_free(err_msg);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700454 }
455
456 for (i = 0; i < pi->fd_cnt; i++) {
457 if (pi->fds[i] == fd) {
458 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700459 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700460 break;
461 }
462 }
463}
464
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700465/* Might return NULL in case of an error */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800466static polling_island* polling_island_create(grpc_fd* initial_fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700467 grpc_error** error) {
Craig Tiller4782d922017-11-10 09:53:21 -0800468 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700469 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700470
Craig Tillerb39307d2016-06-30 15:39:13 -0700471 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700472
Noah Eisenbe82e642018-02-09 09:16:55 -0800473 pi = static_cast<polling_island*>(gpr_malloc(sizeof(*pi)));
Craig Tillerb39307d2016-06-30 15:39:13 -0700474 gpr_mu_init(&pi->mu);
475 pi->fd_cnt = 0;
476 pi->fd_capacity = 0;
Craig Tiller4782d922017-11-10 09:53:21 -0800477 pi->fds = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700478 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700479
Craig Tiller15007612016-07-06 09:36:16 -0700480 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700481 gpr_atm_rel_store(&pi->poller_count, 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800482 gpr_atm_rel_store(&pi->merged_to, (gpr_atm) nullptr);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700483
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700484 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700485
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700486 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700487 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
488 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700489 }
490
Craig Tiller4782d922017-11-10 09:53:21 -0800491 if (initial_fd != nullptr) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700492 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700493 }
494
Craig Tillerb39307d2016-06-30 15:39:13 -0700495done:
496 if (*error != GRPC_ERROR_NONE) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800497 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800498 pi = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700499 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700500 return pi;
501}
502
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800503static void polling_island_delete(polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700504 GPR_ASSERT(pi->fd_cnt == 0);
505
Craig Tiller0a06cd72016-07-14 13:21:24 -0700506 if (pi->epoll_fd >= 0) {
507 close(pi->epoll_fd);
508 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700509 gpr_mu_destroy(&pi->mu);
510 gpr_free(pi->fds);
511 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700512}
513
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700514/* Attempts to gets the last polling island in the linked list (liked by the
515 * 'merged_to' field). Since this does not lock the polling island, there are no
516 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700517static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
518 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800519 while (next != nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700520 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700521 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700522 }
523
524 return pi;
525}
526
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700527/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700528 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700529 returned polling island's mu.
530 Usage: To lock/unlock polling island "pi", do the following:
531 polling_island *pi_latest = polling_island_lock(pi);
532 ...
533 ... critical section ..
534 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700535 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700536static polling_island* polling_island_lock(polling_island* pi) {
Craig Tiller4782d922017-11-10 09:53:21 -0800537 polling_island* next = nullptr;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700539 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700540 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800541 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700542 /* Looks like 'pi' is the last node in the linked list but unless we check
543 this by holding the pi->mu lock, we cannot be sure (i.e without the
544 pi->mu lock, we don't prevent island merges).
545 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700546 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700547 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800548 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700549 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700550 break;
551 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700552
553 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
554 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700555 gpr_mu_unlock(&pi->mu);
556 }
557
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700558 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700559 }
560
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700561 return pi;
562}
563
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700564/* Gets the lock on the *latest* polling islands in the linked lists pointed by
565 *p and *q (and also updates *p and *q to point to the latest polling islands)
566
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700567 This function is needed because calling the following block of code to obtain
568 locks on polling islands (*p and *q) is prone to deadlocks.
569 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700570 polling_island_lock(*p, true);
571 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700572 }
573
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700574 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700575 polling_island *p1;
576 polling_island *p2;
577 ..
578 polling_island_lock_pair(&p1, &p2);
579 ..
580 .. Critical section with both p1 and p2 locked
581 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700582 // Release locks: Always call polling_island_unlock_pair() to release locks
583 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700584*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700585static void polling_island_lock_pair(polling_island** p, polling_island** q) {
586 polling_island* pi_1 = *p;
587 polling_island* pi_2 = *q;
Craig Tiller4782d922017-11-10 09:53:21 -0800588 polling_island* next_1 = nullptr;
589 polling_island* next_2 = nullptr;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700590
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700591 /* The algorithm is simple:
592 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
593 keep updating pi_1 and pi_2)
594 - Then obtain locks on the islands by following a lock order rule of
595 locking polling_island with lower address first
596 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
597 pointing to the same island. If that is the case, we can just call
598 polling_island_lock()
599 - After obtaining both the locks, double check that the polling islands
600 are still the last polling islands in their respective linked lists
601 (this is because there might have been polling island merges before
602 we got the lock)
603 - If the polling islands are the last islands, we are done. If not,
604 release the locks and continue the process from the first step */
605 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700606 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800607 while (next_1 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700608 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700609 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700610 }
611
Craig Tillerbaa14a92017-11-03 09:09:36 -0700612 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800613 while (next_2 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700614 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700615 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700616 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700617
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700618 if (pi_1 == pi_2) {
619 pi_1 = pi_2 = polling_island_lock(pi_1);
620 break;
621 }
622
623 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700624 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700625 gpr_mu_lock(&pi_2->mu);
626 } else {
627 gpr_mu_lock(&pi_2->mu);
628 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700629 }
630
Craig Tillerbaa14a92017-11-03 09:09:36 -0700631 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
632 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800633 if (next_1 == nullptr && next_2 == nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700634 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700635 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700636
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700637 gpr_mu_unlock(&pi_1->mu);
638 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700639 }
640
641 *p = pi_1;
642 *q = pi_2;
643}
644
Craig Tillerbaa14a92017-11-03 09:09:36 -0700645static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700646 if (p == q) {
647 gpr_mu_unlock(&p->mu);
648 } else {
649 gpr_mu_unlock(&p->mu);
650 gpr_mu_unlock(&q->mu);
651 }
652}
653
Craig Tillerbaa14a92017-11-03 09:09:36 -0700654static polling_island* polling_island_merge(polling_island* p,
655 polling_island* q,
656 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700657 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700658 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700659
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700660 if (p != q) {
661 /* Make sure that p points to the polling island with fewer fds than q */
662 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700663 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700664 }
665
666 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
667 Note that the refcounts on the fds being moved will not change here.
668 This is why the last param in the following two functions is 'false') */
669 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
670 polling_island_remove_all_fds_locked(p, false, error);
671
672 /* Wakeup all the pollers (if any) on p so that they pickup this change */
673 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
674
675 /* Add the 'merged_to' link from p --> q */
676 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
677 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700678 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700679 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700680
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700681 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700682
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700683 /* Return the merged polling island (Note that no merge would have happened
684 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700685 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700686}
687
Craig Tillerbaa14a92017-11-03 09:09:36 -0700688static grpc_error* polling_island_global_init() {
689 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700690
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700691 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
692 if (error == GRPC_ERROR_NONE) {
693 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
694 }
695
696 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700697}
698
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700699static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700700 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700701}
702
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700703/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700704 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700705 */
706
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700707/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700708 * but instead so that implementations with multiple threads in (for example)
709 * epoll_wait deal with the race between pollset removal and incoming poll
710 * notifications.
711 *
712 * The problem is that the poller ultimately holds a reference to this
713 * object, so it is very difficult to know when is safe to free it, at least
714 * without some expensive synchronization.
715 *
716 * If we keep the object freelisted, in the worst case losing this race just
717 * becomes a spurious read notification on a reused fd.
718 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700719
720/* The alarm system needs to be able to wakeup 'some poller' sometimes
721 * (specifically when a new alarm needs to be triggered earlier than the next
722 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
723 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700724
Craig Tiller4782d922017-11-10 09:53:21 -0800725static grpc_fd* fd_freelist = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700726static gpr_mu fd_freelist_mu;
727
ncteisend39010e2017-06-08 17:08:07 -0700728#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700729#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
730#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700731static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700732 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800733 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700734 gpr_log(GPR_DEBUG,
735 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700736 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700737 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
738 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700739#else
740#define REF_BY(fd, n, reason) ref_by(fd, n)
741#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700742static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700743#endif
744 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
745}
746
ncteisend39010e2017-06-08 17:08:07 -0700747#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700748static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700749 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800750 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700751 gpr_log(GPR_DEBUG,
752 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700753 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700754 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
755 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700756#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700757static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700758#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700759 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700760 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700761 /* Add the fd to the freelist */
762 gpr_mu_lock(&fd_freelist_mu);
763 fd->freelist_next = fd_freelist;
764 fd_freelist = fd;
765 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800766
yang-ged49fe52017-11-20 13:49:54 -0800767 fd->read_closure->DestroyEvent();
768 fd->write_closure->DestroyEvent();
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700769
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700770 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700771 } else {
772 GPR_ASSERT(old > n);
773 }
774}
775
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700776/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700777#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700778static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700779 int line) {
780 ref_by(fd, 2, reason, file, line);
781}
782
Craig Tillerbaa14a92017-11-03 09:09:36 -0700783static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700784 int line) {
785 unref_by(fd, 2, reason, file, line);
786}
787#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700788static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
789static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700790#endif
791
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700792static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
793
794static void fd_global_shutdown(void) {
795 gpr_mu_lock(&fd_freelist_mu);
796 gpr_mu_unlock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800797 while (fd_freelist != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700798 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700799 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800800 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700801 gpr_free(fd);
802 }
803 gpr_mu_destroy(&fd_freelist_mu);
804}
805
Craig Tillerbaa14a92017-11-03 09:09:36 -0700806static grpc_fd* fd_create(int fd, const char* name) {
Craig Tiller4782d922017-11-10 09:53:21 -0800807 grpc_fd* new_fd = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700808
809 gpr_mu_lock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800810 if (fd_freelist != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700811 new_fd = fd_freelist;
812 fd_freelist = fd_freelist->freelist_next;
813 }
814 gpr_mu_unlock(&fd_freelist_mu);
815
Craig Tiller4782d922017-11-10 09:53:21 -0800816 if (new_fd == nullptr) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800817 new_fd = static_cast<grpc_fd*>(gpr_malloc(sizeof(grpc_fd)));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800818 gpr_mu_init(&new_fd->po.mu);
yang-g26521b32017-11-17 17:15:37 -0800819 new_fd->read_closure.Init();
820 new_fd->write_closure.Init();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700821 }
822
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800823 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
824 * is a newly created fd (or an fd we got from the freelist), no one else
825 * would be holding a lock to it anyway. */
826 gpr_mu_lock(&new_fd->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800827 new_fd->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -0400828#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800829 new_fd->po.obj_type = POLL_OBJ_FD;
830#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700831
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700832 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700833 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700834 new_fd->orphaned = false;
yang-ged49fe52017-11-20 13:49:54 -0800835 new_fd->read_closure->InitEvent();
836 new_fd->write_closure->InitEvent();
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800837 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800838
Craig Tiller4782d922017-11-10 09:53:21 -0800839 new_fd->freelist_next = nullptr;
840 new_fd->on_done_closure = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800842 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700843
Craig Tillerbaa14a92017-11-03 09:09:36 -0700844 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700845 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
846 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700847 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700848 return new_fd;
849}
850
Craig Tillerbaa14a92017-11-03 09:09:36 -0700851static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700852 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800853 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700854 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700855 ret_fd = fd->fd;
856 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800857 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700858
859 return ret_fd;
860}
861
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800862static void fd_orphan(grpc_fd* fd, grpc_closure* on_done, int* release_fd,
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700863 const char* reason) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700864 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -0800865 polling_island* unref_pi = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700866
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800867 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700868 fd->on_done_closure = on_done;
869
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700870 /* Remove the active status but keep referenced. We want this grpc_fd struct
871 to be alive (and not added to freelist) until the end of this function */
872 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700873
874 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700875 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800876 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700877 would actually contain the fd
878 - Remove the fd from the latest polling island
879 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800880 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700881 before doing this.) */
Craig Tiller4782d922017-11-10 09:53:21 -0800882 if (fd->po.pi != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700883 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Alexander Polcyn4e8a2f52018-05-31 00:28:45 -0700884 polling_island_remove_fd_locked(pi_latest, fd, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700885 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700886
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800887 unref_pi = fd->po.pi;
Craig Tiller4782d922017-11-10 09:53:21 -0800888 fd->po.pi = nullptr;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700889 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700890
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700891 /* If release_fd is not NULL, we should be relinquishing control of the file
892 descriptor fd->fd (but we still own the grpc_fd structure). */
Craig Tiller4782d922017-11-10 09:53:21 -0800893 if (release_fd != nullptr) {
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700894 *release_fd = fd->fd;
895 } else {
896 close(fd->fd);
897 }
898
899 fd->orphaned = true;
900
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800901 GRPC_CLOSURE_SCHED(fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700902
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800903 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700904 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller4782d922017-11-10 09:53:21 -0800905 if (unref_pi != nullptr) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700906 /* Unref stale polling island here, outside the fd lock above.
907 The polling island owns a workqueue which owns an fd, and unreffing
908 inside the lock can cause an eventual lock loop that makes TSAN very
909 unhappy. */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800910 PI_UNREF(unref_pi, "fd_orphan");
Craig Tiller15007612016-07-06 09:36:16 -0700911 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700912 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700913 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700914 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
915 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700916 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700917}
918
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800919static grpc_pollset* fd_get_read_notifier_pollset(grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800920 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700921 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700922}
923
Craig Tillerbaa14a92017-11-03 09:09:36 -0700924static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800925 return fd->read_closure->IsShutdown();
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700926}
927
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700928/* Might be called multiple times */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800929static void fd_shutdown(grpc_fd* fd, grpc_error* why) {
930 if (fd->read_closure->SetShutdown(GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700931 shutdown(fd->fd, SHUT_RDWR);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800932 fd->write_closure->SetShutdown(GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700933 }
Craig Tiller376887d2017-04-06 08:27:03 -0700934 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700935}
936
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800937static void fd_notify_on_read(grpc_fd* fd, grpc_closure* closure) {
938 fd->read_closure->NotifyOn(closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700939}
940
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800941static void fd_notify_on_write(grpc_fd* fd, grpc_closure* closure) {
942 fd->write_closure->NotifyOn(closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700943}
944
945/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700946 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700947 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700948GPR_TLS_DECL(g_current_thread_pollset);
949GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700950static __thread bool g_initialized_sigmask;
951static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700952
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700953static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700954#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700955 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700956#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700957}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700958
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700959static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700960
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700961/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700962static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700963 gpr_tls_init(&g_current_thread_pollset);
964 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700965 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700966 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700967}
968
969static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700970 gpr_tls_destroy(&g_current_thread_pollset);
971 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700972}
973
Craig Tillerbaa14a92017-11-03 09:09:36 -0700974static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
975 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700976
977 /* Kick the worker only if it was not already kicked */
Noah Eisen4d20a662018-02-09 09:34:04 -0800978 if (gpr_atm_no_barrier_cas(&worker->is_kicked, static_cast<gpr_atm>(0),
979 static_cast<gpr_atm>(1))) {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700980 GRPC_POLLING_TRACE(
981 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -0700982 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700983 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
984 if (err_num != 0) {
985 err = GRPC_OS_ERROR(err_num, "pthread_kill");
986 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700987 }
988 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700989}
990
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700991/* Return 1 if the pollset has active threads in pollset_work (pollset must
992 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700993static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700994 return p->root_worker.next != &p->root_worker;
995}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700996
Craig Tillerbaa14a92017-11-03 09:09:36 -0700997static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700998 worker->prev->next = worker->next;
999 worker->next->prev = worker->prev;
1000}
1001
Craig Tillerbaa14a92017-11-03 09:09:36 -07001002static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001003 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001004 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001005 remove_worker(p, w);
1006 return w;
1007 } else {
Craig Tiller4782d922017-11-10 09:53:21 -08001008 return nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001009 }
1010}
1011
Craig Tillerbaa14a92017-11-03 09:09:36 -07001012static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001013 worker->next = &p->root_worker;
1014 worker->prev = worker->next->prev;
1015 worker->prev->next = worker->next->prev = worker;
1016}
1017
Craig Tillerbaa14a92017-11-03 09:09:36 -07001018static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001019 worker->prev = &p->root_worker;
1020 worker->next = worker->prev->next;
1021 worker->prev->next = worker->next->prev = worker;
1022}
1023
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001024/* p->mu must be held before calling this function */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001025static grpc_error* pollset_kick(grpc_pollset* p,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001026 grpc_pollset_worker* specific_worker) {
yang-gce1cfea2018-01-31 15:59:50 -08001027 GPR_TIMER_SCOPE("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001028 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001029 GRPC_STATS_INC_POLLSET_KICK();
Craig Tillerbaa14a92017-11-03 09:09:36 -07001030 const char* err_desc = "Kick Failure";
1031 grpc_pollset_worker* worker = specific_worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001032 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001033 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034 if (pollset_has_workers(p)) {
yang-gce1cfea2018-01-31 15:59:50 -08001035 GPR_TIMER_SCOPE("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001036 for (worker = p->root_worker.next; worker != &p->root_worker;
1037 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001038 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001039 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001040 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001041 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001042 } else {
1043 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001044 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001045 } else {
1046 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001047 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001048 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001049 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001050 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001051 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1052 /* Since worker == NULL, it means that we can kick "any" worker on this
1053 pollset 'p'. If 'p' happens to be the same pollset this thread is
1054 currently polling (i.e in pollset_work() function), then there is no need
1055 to kick any other worker since the current thread can just absorb the
1056 kick. This is the reason why we enter this case only when
1057 g_current_thread_pollset is != p */
1058
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001059 GPR_TIMER_MARK("kick_anonymous", 0);
1060 worker = pop_front_worker(p);
Craig Tiller4782d922017-11-10 09:53:21 -08001061 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001062 GPR_TIMER_MARK("finally_kick", 0);
1063 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001064 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001065 } else {
1066 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001067 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001068 }
1069 }
1070
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001071 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1072 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001073}
1074
Craig Tillerbaa14a92017-11-03 09:09:36 -07001075static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001076 gpr_mu_init(&pollset->po.mu);
1077 *mu = &pollset->po.mu;
Craig Tiller4782d922017-11-10 09:53:21 -08001078 pollset->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001079#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001080 pollset->po.obj_type = POLL_OBJ_POLLSET;
1081#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001082
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001083 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001084 pollset->kicked_without_pollers = false;
1085
1086 pollset->shutting_down = false;
1087 pollset->finish_shutdown_called = false;
Craig Tiller4782d922017-11-10 09:53:21 -08001088 pollset->shutdown_done = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001089}
1090
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001091static int poll_deadline_to_millis_timeout(grpc_millis millis) {
Craig Tiller20397792017-07-18 11:35:27 -07001092 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001093 grpc_millis delta = millis - grpc_core::ExecCtx::Get()->Now();
Craig Tiller20397792017-07-18 11:35:27 -07001094 if (delta > INT_MAX)
1095 return INT_MAX;
1096 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001097 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001098 else
Noah Eisenbe82e642018-02-09 09:16:55 -08001099 return static_cast<int>(delta);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001100}
1101
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001102static void fd_become_readable(grpc_fd* fd, grpc_pollset* notifier) {
1103 fd->read_closure->SetReady();
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001104
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001105 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001106 different 'notifier's when an fd becomes readable and it is in two epoll
1107 sets (This can happen briefly during polling island merges). In such cases
1108 it does not really matter which notifer is set as the read_notifier_pollset
1109 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001110 /* Use release store to match with acquire load in fd_get_read_notifier */
1111 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001112}
1113
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001114static void fd_become_writable(grpc_fd* fd) { fd->write_closure->SetReady(); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001115
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001116static void pollset_release_polling_island(grpc_pollset* ps,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001117 const char* reason) {
Craig Tiller4782d922017-11-10 09:53:21 -08001118 if (ps->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001119 PI_UNREF(ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001120 }
Craig Tiller4782d922017-11-10 09:53:21 -08001121 ps->po.pi = nullptr;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001122}
1123
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001124static void finish_shutdown_locked(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001125 /* The pollset cannot have any workers if we are at this stage */
1126 GPR_ASSERT(!pollset_has_workers(pollset));
1127
1128 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001129
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001130 /* Release the ref and set pollset->po.pi to NULL */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001131 pollset_release_polling_island(pollset, "ps_shutdown");
1132 GRPC_CLOSURE_SCHED(pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001133}
1134
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001135/* pollset->po.mu lock must be held by the caller before calling this */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001136static void pollset_shutdown(grpc_pollset* pollset, grpc_closure* closure) {
yang-gce1cfea2018-01-31 15:59:50 -08001137 GPR_TIMER_SCOPE("pollset_shutdown", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001138 GPR_ASSERT(!pollset->shutting_down);
1139 pollset->shutting_down = true;
1140 pollset->shutdown_done = closure;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001141 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001142
1143 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1144 because it would release the underlying polling island. In such a case, we
1145 let the last worker call finish_shutdown_locked() from pollset_work() */
1146 if (!pollset_has_workers(pollset)) {
1147 GPR_ASSERT(!pollset->finish_shutdown_called);
1148 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001149 finish_shutdown_locked(pollset);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001150 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001151}
1152
1153/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1154 * than destroying the mutexes, there is nothing special that needs to be done
1155 * here */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001156static void pollset_destroy(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001157 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001158 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001159}
1160
Craig Tiller84ea3412016-09-08 14:57:56 -07001161#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001162/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001163static void pollset_work_and_unlock(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001164 grpc_pollset_worker* worker, int timeout_ms,
1165 sigset_t* sig_mask, grpc_error** error) {
yang-gce1cfea2018-01-31 15:59:50 -08001166 GPR_TIMER_SCOPE("pollset_work_and_unlock", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001167 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001168 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001169 int ep_rv;
Craig Tiller4782d922017-11-10 09:53:21 -08001170 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001171 char* err_msg;
1172 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001173
1174 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001175 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001176
1177 Since epoll_fd is immutable, we can read it without obtaining the polling
1178 island lock. There is however a possibility that the polling island (from
1179 which we got the epoll_fd) got merged with another island while we are
1180 in this function. This is still okay because in such a case, we will wakeup
1181 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001182 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001183
Craig Tiller4782d922017-11-10 09:53:21 -08001184 if (pollset->po.pi == nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001185 pollset->po.pi = polling_island_create(nullptr, error);
Craig Tiller4782d922017-11-10 09:53:21 -08001186 if (pollset->po.pi == nullptr) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001187 return; /* Fatal error. We cannot continue */
1188 }
1189
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001190 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001191 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001192 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001193 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001194
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001195 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001196 epoll_fd = pi->epoll_fd;
1197
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001198 /* Update the pollset->po.pi since the island being pointed by
1199 pollset->po.pi maybe older than the one pointed by pi) */
1200 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001201 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1202 polling island to be deleted */
1203 PI_ADD_REF(pi, "ps");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001204 PI_UNREF(pollset->po.pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001205 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001206 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001207
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001208 /* Add an extra ref so that the island does not get destroyed (which means
1209 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1210 epoll_fd */
1211 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001212 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001213
Craig Tiller61f96c12017-05-12 13:36:39 -07001214 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1215 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001216
Craig Tiller61f96c12017-05-12 13:36:39 -07001217 GRPC_SCHEDULING_START_BLOCKING_REGION;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001218 GRPC_STATS_INC_SYSCALL_POLL();
Craig Tiller61f96c12017-05-12 13:36:39 -07001219 ep_rv =
1220 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001221 GRPC_SCHEDULING_END_BLOCKING_REGION;
Craig Tiller61f96c12017-05-12 13:36:39 -07001222 if (ep_rv < 0) {
1223 if (errno != EINTR) {
1224 gpr_asprintf(&err_msg,
1225 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1226 epoll_fd, errno, strerror(errno));
1227 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1228 } else {
1229 /* We were interrupted. Save an interation by doing a zero timeout
1230 epoll_wait to see if there are any other events of interest */
1231 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001232 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001233 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001234 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001235 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001236
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001237#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001238 /* See the definition of g_poll_sync for more details */
1239 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001240#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001241
Craig Tiller61f96c12017-05-12 13:36:39 -07001242 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001243 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001244 if (data_ptr == &polling_island_wakeup_fd) {
1245 GRPC_POLLING_TRACE(
1246 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1247 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001248 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001249 /* This means that our polling island is merged with a different
1250 island. We do not have to do anything here since the subsequent call
1251 to the function pollset_work_and_unlock() will pick up the correct
1252 epoll_fd */
1253 } else {
Noah Eisenbe82e642018-02-09 09:16:55 -08001254 grpc_fd* fd = static_cast<grpc_fd*>(data_ptr);
Craig Tiller61f96c12017-05-12 13:36:39 -07001255 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1256 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1257 int write_ev = ep_ev[i].events & EPOLLOUT;
1258 if (read_ev || cancel) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001259 fd_become_readable(fd, pollset);
Craig Tiller61f96c12017-05-12 13:36:39 -07001260 }
1261 if (write_ev || cancel) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001262 fd_become_writable(fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001263 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001264 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001265 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001266
Craig Tiller4782d922017-11-10 09:53:21 -08001267 g_current_thread_polling_island = nullptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001268 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1269
Craig Tiller4782d922017-11-10 09:53:21 -08001270 GPR_ASSERT(pi != nullptr);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001271
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001272 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001273 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001274 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001275 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001276 code when there is an island merge while we are doing epoll_wait() above */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001277 PI_UNREF(pi, "ps_work");
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001278}
1279
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001280/* pollset->po.mu lock must be held by the caller before calling this.
1281 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001282 during the course of its execution but it will always re-acquire the lock and
1283 ensure that it is held by the time the function returns */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001284static grpc_error* pollset_work(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001285 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001286 grpc_millis deadline) {
yang-gce1cfea2018-01-31 15:59:50 -08001287 GPR_TIMER_SCOPE("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001288 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001289 int timeout_ms = poll_deadline_to_millis_timeout(deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001290
1291 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001292
1293 grpc_pollset_worker worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001294 worker.next = worker.prev = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001295 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001296 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001297
Craig Tiller557c88c2017-04-05 17:20:18 -07001298 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001299
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001300 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1301 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001302
1303 if (pollset->kicked_without_pollers) {
1304 /* If the pollset was kicked without pollers, pretend that the current
1305 worker got the kick and skip polling. A kick indicates that there is some
1306 work that needs attention like an event on the completion queue or an
1307 alarm */
1308 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1309 pollset->kicked_without_pollers = 0;
1310 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001311 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001312 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1313 worker that there is some pending work that needs immediate attention
1314 (like an event on the completion queue, or a polling island merge that
1315 results in a new epoll-fd to wait on) and that the worker should not
1316 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001317
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001318 A worker can be kicked anytime from the point it is added to the pollset
1319 via push_front_worker() (or push_back_worker()) to the point it is
1320 removed via remove_worker().
1321 If the worker is kicked before/during it calls epoll_pwait(), it should
1322 immediately exit from epoll_wait(). If the worker is kicked after it
1323 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001324
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001325 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001326 times *except* when it is in epoll_pwait(). This way, the worker never
1327 misses acting on a kick */
1328
Craig Tiller19196992016-06-27 18:45:56 -07001329 if (!g_initialized_sigmask) {
1330 sigemptyset(&new_mask);
1331 sigaddset(&new_mask, grpc_wakeup_signal);
1332 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1333 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1334 g_initialized_sigmask = true;
1335 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1336 This is the mask used at all times *except during
1337 epoll_wait()*"
1338 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001339 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001340
Craig Tiller19196992016-06-27 18:45:56 -07001341 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001342 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001343 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001344
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001345 push_front_worker(pollset, &worker); /* Add worker to pollset */
1346
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001347 pollset_work_and_unlock(pollset, &worker, timeout_ms, &g_orig_sigmask,
1348 &error);
1349 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001350
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001351 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001352
1353 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1354 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001355 remove_worker(pollset, &worker);
1356 }
1357
1358 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1359 false at this point) and the pollset is shutting down, we may have to
1360 finish the shutdown process by calling finish_shutdown_locked().
1361 See pollset_shutdown() for more details.
1362
1363 Note: Continuing to access pollset here is safe; it is the caller's
1364 responsibility to not destroy a pollset when it has outstanding calls to
1365 pollset_work() */
1366 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1367 !pollset->finish_shutdown_called) {
1368 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001369 finish_shutdown_locked(pollset);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001370
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001371 gpr_mu_unlock(&pollset->po.mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001372 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001373 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001374 }
1375
Craig Tiller4782d922017-11-10 09:53:21 -08001376 if (worker_hdl) *worker_hdl = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001377
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001378 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1379 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001380
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001381 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1382 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001383}
1384
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001385static void add_poll_object(poll_obj* bag, poll_obj_type bag_type,
1386 poll_obj* item, poll_obj_type item_type) {
yang-gce1cfea2018-01-31 15:59:50 -08001387 GPR_TIMER_SCOPE("add_poll_object", 0);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001388
ncteisene9cd8a82017-06-29 06:03:52 -04001389#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001390 GPR_ASSERT(item->obj_type == item_type);
1391 GPR_ASSERT(bag->obj_type == bag_type);
1392#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001393
Craig Tillerbaa14a92017-11-03 09:09:36 -07001394 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -08001395 polling_island* pi_new = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001396
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001397 gpr_mu_lock(&bag->mu);
1398 gpr_mu_lock(&item->mu);
1399
Craig Tiller7212c232016-07-06 13:11:09 -07001400retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001401 /*
1402 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1403 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1404 * a refcount of 2) and point item->pi and bag->pi to the new island
1405 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1406 * the other's non-NULL pi
1407 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1408 * polling islands and update item->pi and bag->pi to point to the new
1409 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001410 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001411
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001412 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1413 * orphaned */
1414 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1415 gpr_mu_unlock(&item->mu);
1416 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001417 return;
1418 }
1419
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001420 if (item->pi == bag->pi) {
1421 pi_new = item->pi;
Craig Tiller4782d922017-11-10 09:53:21 -08001422 if (pi_new == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001423 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001424
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001425 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1426 * we need to do some extra work to make TSAN happy */
1427 if (item_type == POLL_OBJ_FD) {
1428 /* Unlock before creating a new polling island: the polling island will
1429 create a workqueue which creates a file descriptor, and holding an fd
1430 lock here can eventually cause a loop to appear to TSAN (making it
1431 unhappy). We don't think it's a real loop (there's an epoch point
1432 where that loop possibility disappears), but the advantages of
1433 keeping TSAN happy outweigh any performance advantage we might have
1434 by keeping the lock held. */
1435 gpr_mu_unlock(&item->mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001436 pi_new = polling_island_create(FD_FROM_PO(item), &error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001437 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001438
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001439 /* Need to reverify any assumptions made between the initial lock and
1440 getting to this branch: if they've changed, we need to throw away our
1441 work and figure things out again. */
Craig Tiller4782d922017-11-10 09:53:21 -08001442 if (item->pi != nullptr) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001443 GRPC_POLLING_TRACE(
1444 "add_poll_object: Raced creating new polling island. pi_new: %p "
1445 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001446 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1447 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001448 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001449 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001450 polling_island_remove_all_fds_locked(pi_new, true, &error);
1451
1452 /* Ref and unref so that the polling island gets deleted during unref
1453 */
1454 PI_ADD_REF(pi_new, "dance_of_destruction");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001455 PI_UNREF(pi_new, "dance_of_destruction");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001456 goto retry;
1457 }
Craig Tiller27da6422016-07-06 13:14:46 -07001458 } else {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001459 pi_new = polling_island_create(nullptr, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001460 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001461
1462 GRPC_POLLING_TRACE(
1463 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1464 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001465 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1466 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001467 } else {
1468 GRPC_POLLING_TRACE(
1469 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001470 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001471 }
Craig Tiller4782d922017-11-10 09:53:21 -08001472 } else if (item->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001473 /* GPR_ASSERT(bag->pi != NULL) */
1474 /* Make pi_new point to latest pi*/
1475 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001476
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001477 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001478 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001479 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1480 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001481
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001482 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001483 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001484 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1485 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001486 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1487 poll_obj_string(bag_type), (void*)bag);
Craig Tiller4782d922017-11-10 09:53:21 -08001488 } else if (bag->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001489 /* GPR_ASSERT(item->pi != NULL) */
1490 /* Make pi_new to point to latest pi */
1491 pi_new = polling_island_lock(item->pi);
1492 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001493 GRPC_POLLING_TRACE(
1494 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1495 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001496 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1497 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001498 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001499 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001500 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001501 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1502 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001503 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1504 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001505 }
1506
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001507 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1508 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001509
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001510 if (item->pi != pi_new) {
1511 PI_ADD_REF(pi_new, poll_obj_string(item_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001512 if (item->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001513 PI_UNREF(item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001514 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001515 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001516 }
1517
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001518 if (bag->pi != pi_new) {
1519 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001520 if (bag->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001521 PI_UNREF(bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001522 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001523 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001524 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001525
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001526 gpr_mu_unlock(&item->mu);
1527 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001528
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001529 GRPC_LOG_IF_ERROR("add_poll_object", error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001530}
Craig Tiller57726ca2016-09-12 11:59:45 -07001531
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001532static void pollset_add_fd(grpc_pollset* pollset, grpc_fd* fd) {
1533 add_poll_object(&pollset->po, POLL_OBJ_POLLSET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001534}
1535
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001536/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001537 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001538 */
1539
Craig Tillerbaa14a92017-11-03 09:09:36 -07001540static grpc_pollset_set* pollset_set_create(void) {
Noah Eisen4d20a662018-02-09 09:34:04 -08001541 grpc_pollset_set* pss =
1542 static_cast<grpc_pollset_set*>(gpr_malloc(sizeof(*pss)));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001543 gpr_mu_init(&pss->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -08001544 pss->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001545#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001546 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1547#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001548 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001549}
1550
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001551static void pollset_set_destroy(grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001552 gpr_mu_destroy(&pss->po.mu);
1553
Craig Tiller4782d922017-11-10 09:53:21 -08001554 if (pss->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001555 PI_UNREF(pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001556 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001557
1558 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001559}
1560
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001561static void pollset_set_add_fd(grpc_pollset_set* pss, grpc_fd* fd) {
1562 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001563}
1564
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001565static void pollset_set_del_fd(grpc_pollset_set* pss, grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001566 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001567}
1568
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001569static void pollset_set_add_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
1570 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &ps->po, POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001571}
1572
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001573static void pollset_set_del_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001574 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001575}
1576
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001577static void pollset_set_add_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001578 grpc_pollset_set* item) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001579 add_poll_object(&bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001580 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001581}
1582
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001583static void pollset_set_del_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001584 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001585 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001586}
1587
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001588/* Test helper functions
1589 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001590void* grpc_fd_get_polling_island(grpc_fd* fd) {
1591 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001592
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001593 gpr_mu_lock(&fd->po.mu);
1594 pi = fd->po.pi;
1595 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001596
1597 return pi;
1598}
1599
Craig Tillerbaa14a92017-11-03 09:09:36 -07001600void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1601 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001602
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001603 gpr_mu_lock(&ps->po.mu);
1604 pi = ps->po.pi;
1605 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001606
1607 return pi;
1608}
1609
Craig Tillerbaa14a92017-11-03 09:09:36 -07001610bool grpc_are_polling_islands_equal(void* p, void* q) {
Noah Eisenbe82e642018-02-09 09:16:55 -08001611 polling_island* p1 = static_cast<polling_island*>(p);
1612 polling_island* p2 = static_cast<polling_island*>(q);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001613
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001614 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1615 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001616 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001617 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001618
1619 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001620}
1621
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001622/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001623 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001624 */
1625
1626static void shutdown_engine(void) {
1627 fd_global_shutdown();
1628 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001629 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001630}
1631
1632static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001633 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001634
Yash Tibrewal533d1182017-09-18 10:48:22 -07001635 fd_create,
1636 fd_wrapped_fd,
1637 fd_orphan,
1638 fd_shutdown,
1639 fd_notify_on_read,
1640 fd_notify_on_write,
1641 fd_is_shutdown,
1642 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001643
Yash Tibrewal533d1182017-09-18 10:48:22 -07001644 pollset_init,
1645 pollset_shutdown,
1646 pollset_destroy,
1647 pollset_work,
1648 pollset_kick,
1649 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001650
Yash Tibrewal533d1182017-09-18 10:48:22 -07001651 pollset_set_create,
1652 pollset_set_destroy,
1653 pollset_set_add_pollset,
1654 pollset_set_del_pollset,
1655 pollset_set_add_pollset_set,
1656 pollset_set_del_pollset_set,
1657 pollset_set_add_fd,
1658 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001659
Yash Tibrewal533d1182017-09-18 10:48:22 -07001660 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001661};
1662
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001663/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1664 * Create a dummy epoll_fd to make sure epoll support is available */
1665static bool is_epoll_available() {
1666 int fd = epoll_create1(EPOLL_CLOEXEC);
1667 if (fd < 0) {
1668 gpr_log(
1669 GPR_ERROR,
1670 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1671 fd);
1672 return false;
1673 }
1674 close(fd);
1675 return true;
1676}
1677
Craig Tillerbaa14a92017-11-03 09:09:36 -07001678const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001679 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001680 /* If use of signals is disabled, we cannot use epoll engine*/
1681 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
yang-g30101b02017-11-06 14:35:30 -08001682 gpr_log(GPR_ERROR, "Skipping epollsig because use of signals is disabled.");
Craig Tiller4782d922017-11-10 09:53:21 -08001683 return nullptr;
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001684 }
1685
Ken Paysoncd7d0472016-10-11 12:24:20 -07001686 if (!grpc_has_wakeup_fd()) {
yang-g30101b02017-11-06 14:35:30 -08001687 gpr_log(GPR_ERROR, "Skipping epollsig because of no wakeup fd.");
Craig Tiller4782d922017-11-10 09:53:21 -08001688 return nullptr;
Ken Paysonbc544be2016-10-06 19:23:47 -07001689 }
1690
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001691 if (!is_epoll_available()) {
yang-g30101b02017-11-06 14:35:30 -08001692 gpr_log(GPR_ERROR, "Skipping epollsig because epoll is unavailable.");
Craig Tiller4782d922017-11-10 09:53:21 -08001693 return nullptr;
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001694 }
1695
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001696 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001697 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001698 grpc_use_signal(SIGRTMIN + 6);
1699 } else {
yang-g30101b02017-11-06 14:35:30 -08001700 gpr_log(GPR_ERROR,
1701 "Skipping epollsig because uninitialized wakeup signal.");
Craig Tiller4782d922017-11-10 09:53:21 -08001702 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001703 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001704 }
1705
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001706 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001707
1708 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001709 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001710 }
1711
1712 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1713 polling_island_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001714 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001715 }
1716
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001717 return &vtable;
1718}
1719
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001720#else /* defined(GRPC_LINUX_EPOLL_CREATE1) */
Muxi Yan67ff4052018-05-15 12:36:10 -07001721#if defined(GRPC_POSIX_SOCKET_EV_EPOLLSIG)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001722#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001723/* If GRPC_LINUX_EPOLL_CREATE1 is not defined, it means
1724 epoll_create1 is not available. Return NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001725const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001726 bool explicit_request) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001727 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001728}
murgatroid99623dd4f2016-08-08 17:31:27 -07001729#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001730
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001731void grpc_use_signal(int signum) {}
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001732#endif /* !defined(GRPC_LINUX_EPOLL_CREATE1) */