blob: 9a127806fa159a4fccc4e936a09b60c72c9ada81 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070021#include <grpc/grpc_posix.h>
22
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070023/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070024#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
Craig Tiller4509c472017-04-27 19:05:13 +000026#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070027
28#include <assert.h>
29#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070030#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070031#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070032#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070033#include <signal.h>
34#include <string.h>
35#include <sys/epoll.h>
36#include <sys/socket.h>
37#include <unistd.h>
38
39#include <grpc/support/alloc.h>
40#include <grpc/support/log.h>
41#include <grpc/support/string_util.h>
42#include <grpc/support/tls.h>
43#include <grpc/support/useful.h>
44
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070046#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070047#include "src/core/lib/iomgr/ev_posix.h"
48#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070049#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070050#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070051#include "src/core/lib/iomgr/wakeup_fd_posix.h"
52#include "src/core/lib/profiling/timers.h"
Craig Tillerfbf61bb2017-11-08 11:50:14 -080053#include "src/core/lib/support/manual_constructor.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070054
Craig Tillerbaa14a92017-11-03 09:09:36 -070055#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070056
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070057#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070058 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070059 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070060 }
61
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070062static int grpc_wakeup_signal = -1;
63static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070064
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070065/* Implements the function defined in grpc_posix.h. This function might be
66 * called before even calling grpc_init() to set either a different signal to
67 * use. If signum == -1, then the use of signals is disabled */
68void grpc_use_signal(int signum) {
69 grpc_wakeup_signal = signum;
70 is_grpc_wakeup_signal_initialized = true;
71
72 if (grpc_wakeup_signal < 0) {
73 gpr_log(GPR_INFO,
74 "Use of signals is disabled. Epoll engine will not be used");
75 } else {
76 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
77 grpc_wakeup_signal);
78 }
79}
80
81struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070082
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080083typedef enum {
84 POLL_OBJ_FD,
85 POLL_OBJ_POLLSET,
86 POLL_OBJ_POLLSET_SET
87} poll_obj_type;
88
89typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040090#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080091 poll_obj_type obj_type;
92#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080093 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070094 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080095} poll_obj;
96
Craig Tillerbaa14a92017-11-03 09:09:36 -070097const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080098 switch (po_type) {
99 case POLL_OBJ_FD:
100 return "fd";
101 case POLL_OBJ_POLLSET:
102 return "pollset";
103 case POLL_OBJ_POLLSET_SET:
104 return "pollset_set";
105 }
106
107 GPR_UNREACHABLE_CODE(return "UNKNOWN");
108}
109
Craig Tillerbaa14a92017-11-03 09:09:36 -0700110 /*******************************************************************************
111 * Fd Declarations
112 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800113
Craig Tillerbaa14a92017-11-03 09:09:36 -0700114#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700116struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800117 poll_obj po;
118
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700119 int fd;
120 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700121 bit 0 : 1=Active / 0=Orphaned
122 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700123 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700124 gpr_atm refst;
125
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800126 /* The fd is either closed or we relinquished control of it. In either
127 cases, this indicates that the 'fd' on this structure is no longer
128 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700129 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700130
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800131 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
132 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133
Craig Tillerbaa14a92017-11-03 09:09:36 -0700134 struct grpc_fd* freelist_next;
135 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700136
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800137 /* The pollset that last noticed that the fd is readable. The actual type
138 * stored in this is (grpc_pollset *) */
139 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700140
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700141 grpc_iomgr_object iomgr_object;
142};
143
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700144/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700145#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700146static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
147static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700148 int line);
149#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
150#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
151#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700152static void fd_ref(grpc_fd* fd);
153static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700154#define GRPC_FD_REF(fd, reason) fd_ref(fd)
155#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
156#endif
157
158static void fd_global_init(void);
159static void fd_global_shutdown(void);
160
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700161/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700162 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700163 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700164
ncteisena1354852017-06-08 16:25:53 -0700165#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700166
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700167#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700168#define PI_UNREF(exec_ctx, p, r) \
169 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700170
ncteisend39010e2017-06-08 17:08:07 -0700171#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700172
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700173#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700174#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700175
ncteisena1354852017-06-08 16:25:53 -0700176#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700177
Craig Tiller460502e2016-10-13 10:02:08 -0700178/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700179typedef struct polling_island {
180 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700181 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
182 the refcount.
183 Once the ref count becomes zero, this structure is destroyed which means
184 we should ensure that there is never a scenario where a PI_ADD_REF() is
185 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700186 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700187
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700188 /* Pointer to the polling_island this merged into.
189 * merged_to value is only set once in polling_island's lifetime (and that too
190 * only if the island is merged with another island). Because of this, we can
191 * use gpr_atm type here so that we can do atomic access on this and reduce
192 * lock contention on 'mu' mutex.
193 *
194 * Note that if this field is not NULL (i.e not 0), all the remaining fields
195 * (except mu and ref_count) are invalid and must be ignored. */
196 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700197
Craig Tiller460502e2016-10-13 10:02:08 -0700198 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700199 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700200
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700201 /* The fd of the underlying epoll set */
202 int epoll_fd;
203
204 /* The file descriptors in the epoll set */
205 size_t fd_cnt;
206 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700207 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700208} polling_island;
209
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700210/*******************************************************************************
211 * Pollset Declarations
212 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700213struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700214 /* Thread id of this worker */
215 pthread_t pt_id;
216
217 /* Used to prevent a worker from getting kicked multiple times */
218 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700219 struct grpc_pollset_worker* next;
220 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700221};
222
223struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800224 poll_obj po;
225
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700226 grpc_pollset_worker root_worker;
227 bool kicked_without_pollers;
228
229 bool shutting_down; /* Is the pollset shutting down ? */
230 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700231 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700232};
233
234/*******************************************************************************
235 * Pollset-set Declarations
236 */
237struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800238 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700239};
240
241/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700242 * Common helpers
243 */
244
Craig Tillerbaa14a92017-11-03 09:09:36 -0700245static bool append_error(grpc_error** composite, grpc_error* error,
246 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700247 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700248 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700249 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700250 }
251 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700252 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700253}
254
255/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700256 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700257 */
258
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700259/* The wakeup fd that is used to wake up all threads in a Polling island. This
260 is useful in the polling island merge operation where we need to wakeup all
261 the threads currently polling the smaller polling island (so that they can
262 start polling the new/merged polling island)
263
264 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
265 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
266static grpc_wakeup_fd polling_island_wakeup_fd;
267
Craig Tiller2e620132016-10-10 15:27:44 -0700268/* The polling island being polled right now.
269 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700270static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700271
Craig Tillerb39307d2016-06-30 15:39:13 -0700272/* Forward declaration */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700273static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700274
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700275#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700276/* Currently TSAN may incorrectly flag data races between epoll_ctl and
277 epoll_wait for any grpc_fd structs that are added to the epoll set via
278 epoll_ctl and are returned (within a very short window) via epoll_wait().
279
280 To work-around this race, we establish a happens-before relation between
281 the code just-before epoll_ctl() and the code after epoll_wait() by using
282 this atomic */
283gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700284#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700285
Craig Tillerbaa14a92017-11-03 09:09:36 -0700286static void pi_add_ref(polling_island* pi);
287static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700288
ncteisena1354852017-06-08 16:25:53 -0700289#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700290static void pi_add_ref_dbg(polling_island* pi, const char* reason,
291 const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700292 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700293 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700294 gpr_log(GPR_DEBUG,
295 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
296 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700297 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700298 }
299 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700300}
301
Craig Tillerbaa14a92017-11-03 09:09:36 -0700302static void pi_unref_dbg(grpc_exec_ctx* exec_ctx, polling_island* pi,
303 const char* reason, const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700304 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700305 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700306 gpr_log(GPR_DEBUG,
307 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
308 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700309 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700310 }
311 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700312}
313#endif
314
Craig Tillerbaa14a92017-11-03 09:09:36 -0700315static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700316 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
317}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700318
Craig Tillerbaa14a92017-11-03 09:09:36 -0700319static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700320 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700321 Note that this deletion not be done under a lock. Once the ref count goes
322 to zero, we are guaranteed that no one else holds a reference to the
323 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700324
325 Also, if we are deleting the polling island and the merged_to field is
326 non-empty, we should remove a ref to the merged_to polling island
327 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700328 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700329 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700330 polling_island_delete(exec_ctx, pi);
331 if (next != NULL) {
332 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700333 }
334 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700335}
336
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700337/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700338static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700339 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700340 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700341 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700342 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700343 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700344 char* err_msg;
345 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700346
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700347#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700348 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700349 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700350#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700351
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700352 for (i = 0; i < fd_count; i++) {
353 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
354 ev.data.ptr = fds[i];
355 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700356
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700357 if (err < 0) {
358 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700359 gpr_asprintf(
360 &err_msg,
361 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
362 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
363 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
364 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700365 }
366
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700367 continue;
368 }
369
370 if (pi->fd_cnt == pi->fd_capacity) {
371 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700372 pi->fds =
Craig Tillerbaa14a92017-11-03 09:09:36 -0700373 (grpc_fd**)gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700374 }
375
376 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700377 if (add_fd_refs) {
378 GRPC_FD_REF(fds[i], "polling_island");
379 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700380 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700381}
382
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700383/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700384static void polling_island_add_wakeup_fd_locked(polling_island* pi,
385 grpc_wakeup_fd* wakeup_fd,
386 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700387 struct epoll_event ev;
388 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700389 char* err_msg;
390 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700391
392 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
393 ev.data.ptr = wakeup_fd;
394 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
395 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700396 if (err < 0 && errno != EEXIST) {
397 gpr_asprintf(&err_msg,
398 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
399 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700400 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
401 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700402 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
403 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700404 }
405}
406
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700407/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700408static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700409 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700410 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700411 int err;
412 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700413 char* err_msg;
414 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700415
416 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700417 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700418 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700419 gpr_asprintf(&err_msg,
420 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
421 "error: %d (%s)",
422 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
423 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
424 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700425 }
426
427 if (remove_fd_refs) {
428 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700429 }
430 }
431
432 pi->fd_cnt = 0;
433}
434
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700435/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700436static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700437 bool is_fd_closed,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700438 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700439 int err;
440 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700441 char* err_msg;
442 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700443
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700444 /* If fd is already closed, then it would have been automatically been removed
445 from the epoll set */
446 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700447 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
448 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700449 gpr_asprintf(
450 &err_msg,
451 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
452 pi->epoll_fd, fd->fd, errno, strerror(errno));
453 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
454 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700455 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700456 }
457
458 for (i = 0; i < pi->fd_cnt; i++) {
459 if (pi->fds[i] == fd) {
460 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700461 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700462 break;
463 }
464 }
465}
466
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700467/* Might return NULL in case of an error */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700468static polling_island* polling_island_create(grpc_exec_ctx* exec_ctx,
469 grpc_fd* initial_fd,
470 grpc_error** error) {
471 polling_island* pi = NULL;
472 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700473
Craig Tillerb39307d2016-06-30 15:39:13 -0700474 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700475
Craig Tillerbaa14a92017-11-03 09:09:36 -0700476 pi = (polling_island*)gpr_malloc(sizeof(*pi));
Craig Tillerb39307d2016-06-30 15:39:13 -0700477 gpr_mu_init(&pi->mu);
478 pi->fd_cnt = 0;
479 pi->fd_capacity = 0;
480 pi->fds = NULL;
481 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700482
Craig Tiller15007612016-07-06 09:36:16 -0700483 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700484 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700485 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700486
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700487 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700488
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700489 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700490 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
491 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700492 }
493
Craig Tillerb39307d2016-06-30 15:39:13 -0700494 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700495 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700496 }
497
Craig Tillerb39307d2016-06-30 15:39:13 -0700498done:
499 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700500 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700501 pi = NULL;
502 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700503 return pi;
504}
505
Craig Tillerbaa14a92017-11-03 09:09:36 -0700506static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700507 GPR_ASSERT(pi->fd_cnt == 0);
508
Craig Tiller0a06cd72016-07-14 13:21:24 -0700509 if (pi->epoll_fd >= 0) {
510 close(pi->epoll_fd);
511 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700512 gpr_mu_destroy(&pi->mu);
513 gpr_free(pi->fds);
514 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700515}
516
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700517/* Attempts to gets the last polling island in the linked list (liked by the
518 * 'merged_to' field). Since this does not lock the polling island, there are no
519 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700520static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
521 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700522 while (next != NULL) {
523 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700524 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700525 }
526
527 return pi;
528}
529
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700530/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700531 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700532 returned polling island's mu.
533 Usage: To lock/unlock polling island "pi", do the following:
534 polling_island *pi_latest = polling_island_lock(pi);
535 ...
536 ... critical section ..
537 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700538 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700539static polling_island* polling_island_lock(polling_island* pi) {
540 polling_island* next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700541
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700542 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700543 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700544 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700545 /* Looks like 'pi' is the last node in the linked list but unless we check
546 this by holding the pi->mu lock, we cannot be sure (i.e without the
547 pi->mu lock, we don't prevent island merges).
548 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700549 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700550 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700551 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700552 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700553 break;
554 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700555
556 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
557 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700558 gpr_mu_unlock(&pi->mu);
559 }
560
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700561 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700562 }
563
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700564 return pi;
565}
566
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700567/* Gets the lock on the *latest* polling islands in the linked lists pointed by
568 *p and *q (and also updates *p and *q to point to the latest polling islands)
569
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700570 This function is needed because calling the following block of code to obtain
571 locks on polling islands (*p and *q) is prone to deadlocks.
572 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700573 polling_island_lock(*p, true);
574 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700575 }
576
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700577 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700578 polling_island *p1;
579 polling_island *p2;
580 ..
581 polling_island_lock_pair(&p1, &p2);
582 ..
583 .. Critical section with both p1 and p2 locked
584 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700585 // Release locks: Always call polling_island_unlock_pair() to release locks
586 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700587*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700588static void polling_island_lock_pair(polling_island** p, polling_island** q) {
589 polling_island* pi_1 = *p;
590 polling_island* pi_2 = *q;
591 polling_island* next_1 = NULL;
592 polling_island* next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700593
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700594 /* The algorithm is simple:
595 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
596 keep updating pi_1 and pi_2)
597 - Then obtain locks on the islands by following a lock order rule of
598 locking polling_island with lower address first
599 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
600 pointing to the same island. If that is the case, we can just call
601 polling_island_lock()
602 - After obtaining both the locks, double check that the polling islands
603 are still the last polling islands in their respective linked lists
604 (this is because there might have been polling island merges before
605 we got the lock)
606 - If the polling islands are the last islands, we are done. If not,
607 release the locks and continue the process from the first step */
608 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700609 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700610 while (next_1 != NULL) {
611 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700612 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700613 }
614
Craig Tillerbaa14a92017-11-03 09:09:36 -0700615 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700616 while (next_2 != NULL) {
617 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700618 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700619 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700620
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700621 if (pi_1 == pi_2) {
622 pi_1 = pi_2 = polling_island_lock(pi_1);
623 break;
624 }
625
626 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700627 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700628 gpr_mu_lock(&pi_2->mu);
629 } else {
630 gpr_mu_lock(&pi_2->mu);
631 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700632 }
633
Craig Tillerbaa14a92017-11-03 09:09:36 -0700634 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
635 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700636 if (next_1 == NULL && next_2 == NULL) {
637 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700638 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700639
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700640 gpr_mu_unlock(&pi_1->mu);
641 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700642 }
643
644 *p = pi_1;
645 *q = pi_2;
646}
647
Craig Tillerbaa14a92017-11-03 09:09:36 -0700648static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700649 if (p == q) {
650 gpr_mu_unlock(&p->mu);
651 } else {
652 gpr_mu_unlock(&p->mu);
653 gpr_mu_unlock(&q->mu);
654 }
655}
656
Craig Tillerbaa14a92017-11-03 09:09:36 -0700657static polling_island* polling_island_merge(polling_island* p,
658 polling_island* q,
659 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700660 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700661 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700662
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700663 if (p != q) {
664 /* Make sure that p points to the polling island with fewer fds than q */
665 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700666 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700667 }
668
669 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
670 Note that the refcounts on the fds being moved will not change here.
671 This is why the last param in the following two functions is 'false') */
672 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
673 polling_island_remove_all_fds_locked(p, false, error);
674
675 /* Wakeup all the pollers (if any) on p so that they pickup this change */
676 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
677
678 /* Add the 'merged_to' link from p --> q */
679 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
680 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700681 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700682 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700683
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700684 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700685
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700686 /* Return the merged polling island (Note that no merge would have happened
687 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700688 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700689}
690
Craig Tillerbaa14a92017-11-03 09:09:36 -0700691static grpc_error* polling_island_global_init() {
692 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700693
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700694 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
695 if (error == GRPC_ERROR_NONE) {
696 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
697 }
698
699 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700700}
701
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700702static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700703 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700704}
705
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700706/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700707 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700708 */
709
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700710/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700711 * but instead so that implementations with multiple threads in (for example)
712 * epoll_wait deal with the race between pollset removal and incoming poll
713 * notifications.
714 *
715 * The problem is that the poller ultimately holds a reference to this
716 * object, so it is very difficult to know when is safe to free it, at least
717 * without some expensive synchronization.
718 *
719 * If we keep the object freelisted, in the worst case losing this race just
720 * becomes a spurious read notification on a reused fd.
721 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700722
723/* The alarm system needs to be able to wakeup 'some poller' sometimes
724 * (specifically when a new alarm needs to be triggered earlier than the next
725 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
726 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700727
Craig Tillerbaa14a92017-11-03 09:09:36 -0700728static grpc_fd* fd_freelist = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700729static gpr_mu fd_freelist_mu;
730
ncteisend39010e2017-06-08 17:08:07 -0700731#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700732#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
733#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700734static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700735 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700736 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700737 gpr_log(GPR_DEBUG,
738 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700739 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700740 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
741 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700742#else
743#define REF_BY(fd, n, reason) ref_by(fd, n)
744#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700745static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700746#endif
747 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
748}
749
ncteisend39010e2017-06-08 17:08:07 -0700750#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700751static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700752 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700753 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700754 gpr_log(GPR_DEBUG,
755 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700756 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700757 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
758 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700759#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700760static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700761#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700762 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700763 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700764 /* Add the fd to the freelist */
765 gpr_mu_lock(&fd_freelist_mu);
766 fd->freelist_next = fd_freelist;
767 fd_freelist = fd;
768 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800769
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800770 fd->read_closure.Destroy();
771 fd->write_closure.Destroy();
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700772
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700773 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700774 } else {
775 GPR_ASSERT(old > n);
776 }
777}
778
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700779/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700780#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700781static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700782 int line) {
783 ref_by(fd, 2, reason, file, line);
784}
785
Craig Tillerbaa14a92017-11-03 09:09:36 -0700786static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700787 int line) {
788 unref_by(fd, 2, reason, file, line);
789}
790#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700791static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
792static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700793#endif
794
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700795static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
796
797static void fd_global_shutdown(void) {
798 gpr_mu_lock(&fd_freelist_mu);
799 gpr_mu_unlock(&fd_freelist_mu);
800 while (fd_freelist != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700801 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700802 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800803 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700804 gpr_free(fd);
805 }
806 gpr_mu_destroy(&fd_freelist_mu);
807}
808
Craig Tillerbaa14a92017-11-03 09:09:36 -0700809static grpc_fd* fd_create(int fd, const char* name) {
810 grpc_fd* new_fd = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700811
812 gpr_mu_lock(&fd_freelist_mu);
813 if (fd_freelist != NULL) {
814 new_fd = fd_freelist;
815 fd_freelist = fd_freelist->freelist_next;
816 }
817 gpr_mu_unlock(&fd_freelist_mu);
818
819 if (new_fd == NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700820 new_fd = (grpc_fd*)gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800821 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700822 }
823
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800824 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
825 * is a newly created fd (or an fd we got from the freelist), no one else
826 * would be holding a lock to it anyway. */
827 gpr_mu_lock(&new_fd->po.mu);
828 new_fd->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -0400829#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800830 new_fd->po.obj_type = POLL_OBJ_FD;
831#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700832
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700833 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700834 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700835 new_fd->orphaned = false;
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800836 new_fd->read_closure.Init();
837 new_fd->write_closure.Init();
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800838 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800839
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700840 new_fd->freelist_next = NULL;
841 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700842
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800843 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700844
Craig Tillerbaa14a92017-11-03 09:09:36 -0700845 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700846 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
847 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700848 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700849 return new_fd;
850}
851
Craig Tillerbaa14a92017-11-03 09:09:36 -0700852static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700853 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800854 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700855 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700856 ret_fd = fd->fd;
857 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800858 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700859
860 return ret_fd;
861}
862
Craig Tillerbaa14a92017-11-03 09:09:36 -0700863static void fd_orphan(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
864 grpc_closure* on_done, int* release_fd,
865 bool already_closed, const char* reason) {
866 grpc_error* error = GRPC_ERROR_NONE;
867 polling_island* unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700868
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800869 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700870 fd->on_done_closure = on_done;
871
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700872 /* Remove the active status but keep referenced. We want this grpc_fd struct
873 to be alive (and not added to freelist) until the end of this function */
874 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700875
876 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700877 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800878 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700879 would actually contain the fd
880 - Remove the fd from the latest polling island
881 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800882 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700883 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800884 if (fd->po.pi != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700885 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700886 polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700887 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700888
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800889 unref_pi = fd->po.pi;
890 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700891 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700892
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700893 /* If release_fd is not NULL, we should be relinquishing control of the file
894 descriptor fd->fd (but we still own the grpc_fd structure). */
895 if (release_fd != NULL) {
896 *release_fd = fd->fd;
897 } else {
898 close(fd->fd);
899 }
900
901 fd->orphaned = true;
902
ncteisen969b46e2017-06-08 14:57:11 -0700903 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700904
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800905 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700906 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700907 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700908 /* Unref stale polling island here, outside the fd lock above.
909 The polling island owns a workqueue which owns an fd, and unreffing
910 inside the lock can cause an eventual lock loop that makes TSAN very
911 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700912 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
913 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700914 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700915 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700916 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
917 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700918 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700919}
920
Craig Tillerbaa14a92017-11-03 09:09:36 -0700921static grpc_pollset* fd_get_read_notifier_pollset(grpc_exec_ctx* exec_ctx,
922 grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800923 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700924 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700925}
926
Craig Tillerbaa14a92017-11-03 09:09:36 -0700927static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800928 return fd->read_closure->IsShutdown();
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700929}
930
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700931/* Might be called multiple times */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700932static void fd_shutdown(grpc_exec_ctx* exec_ctx, grpc_fd* fd, grpc_error* why) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800933 if (fd->read_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700934 shutdown(fd->fd, SHUT_RDWR);
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800935 fd->write_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700936 }
Craig Tiller376887d2017-04-06 08:27:03 -0700937 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700938}
939
Craig Tillerbaa14a92017-11-03 09:09:36 -0700940static void fd_notify_on_read(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
941 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800942 fd->read_closure->NotifyOn(exec_ctx, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700943}
944
Craig Tillerbaa14a92017-11-03 09:09:36 -0700945static void fd_notify_on_write(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
946 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800947 fd->write_closure->NotifyOn(exec_ctx, closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700948}
949
950/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700951 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700952 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700953GPR_TLS_DECL(g_current_thread_pollset);
954GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700955static __thread bool g_initialized_sigmask;
956static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700957
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700958static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700959#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700960 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700961#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700962}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700963
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700964static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700965
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700966/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700967static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700968 gpr_tls_init(&g_current_thread_pollset);
969 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700970 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700971 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700972}
973
974static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700975 gpr_tls_destroy(&g_current_thread_pollset);
976 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700977}
978
Craig Tillerbaa14a92017-11-03 09:09:36 -0700979static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
980 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700981
982 /* Kick the worker only if it was not already kicked */
983 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
984 GRPC_POLLING_TRACE(
985 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -0700986 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700987 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
988 if (err_num != 0) {
989 err = GRPC_OS_ERROR(err_num, "pthread_kill");
990 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700991 }
992 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700993}
994
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700995/* Return 1 if the pollset has active threads in pollset_work (pollset must
996 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700997static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700998 return p->root_worker.next != &p->root_worker;
999}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001000
Craig Tillerbaa14a92017-11-03 09:09:36 -07001001static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001002 worker->prev->next = worker->next;
1003 worker->next->prev = worker->prev;
1004}
1005
Craig Tillerbaa14a92017-11-03 09:09:36 -07001006static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001007 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001008 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001009 remove_worker(p, w);
1010 return w;
1011 } else {
1012 return NULL;
1013 }
1014}
1015
Craig Tillerbaa14a92017-11-03 09:09:36 -07001016static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001017 worker->next = &p->root_worker;
1018 worker->prev = worker->next->prev;
1019 worker->prev->next = worker->next->prev = worker;
1020}
1021
Craig Tillerbaa14a92017-11-03 09:09:36 -07001022static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001023 worker->prev = &p->root_worker;
1024 worker->next = worker->prev->next;
1025 worker->prev->next = worker->next->prev = worker;
1026}
1027
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001028/* p->mu must be held before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001029static grpc_error* pollset_kick(grpc_exec_ctx* exec_ctx, grpc_pollset* p,
1030 grpc_pollset_worker* specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001031 GPR_TIMER_BEGIN("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001032 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001033 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001034 const char* err_desc = "Kick Failure";
1035 grpc_pollset_worker* worker = specific_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001036 if (worker != NULL) {
1037 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001038 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001039 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 for (worker = p->root_worker.next; worker != &p->root_worker;
1041 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001042 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001043 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001044 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001045 }
Craig Tillera218a062016-06-26 09:58:37 -07001046 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001047 } else {
1048 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001049 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001050 } else {
1051 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001053 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001055 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001056 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1057 /* Since worker == NULL, it means that we can kick "any" worker on this
1058 pollset 'p'. If 'p' happens to be the same pollset this thread is
1059 currently polling (i.e in pollset_work() function), then there is no need
1060 to kick any other worker since the current thread can just absorb the
1061 kick. This is the reason why we enter this case only when
1062 g_current_thread_pollset is != p */
1063
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001064 GPR_TIMER_MARK("kick_anonymous", 0);
1065 worker = pop_front_worker(p);
1066 if (worker != NULL) {
1067 GPR_TIMER_MARK("finally_kick", 0);
1068 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001069 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001070 } else {
1071 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001072 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001073 }
1074 }
1075
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001076 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001077 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1078 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001079}
1080
Craig Tillerbaa14a92017-11-03 09:09:36 -07001081static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001082 gpr_mu_init(&pollset->po.mu);
1083 *mu = &pollset->po.mu;
1084 pollset->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001085#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001086 pollset->po.obj_type = POLL_OBJ_POLLSET;
1087#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001088
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001089 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090 pollset->kicked_without_pollers = false;
1091
1092 pollset->shutting_down = false;
1093 pollset->finish_shutdown_called = false;
1094 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001095}
1096
Craig Tillerbaa14a92017-11-03 09:09:36 -07001097static int poll_deadline_to_millis_timeout(grpc_exec_ctx* exec_ctx,
Craig Tiller20397792017-07-18 11:35:27 -07001098 grpc_millis millis) {
1099 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
1100 grpc_millis delta = millis - grpc_exec_ctx_now(exec_ctx);
1101 if (delta > INT_MAX)
1102 return INT_MAX;
1103 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001104 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001105 else
1106 return (int)delta;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001107}
1108
Craig Tillerbaa14a92017-11-03 09:09:36 -07001109static void fd_become_readable(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
1110 grpc_pollset* notifier) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -08001111 fd->read_closure->SetReady(exec_ctx);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001112
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001113 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001114 different 'notifier's when an fd becomes readable and it is in two epoll
1115 sets (This can happen briefly during polling island merges). In such cases
1116 it does not really matter which notifer is set as the read_notifier_pollset
1117 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001118 /* Use release store to match with acquire load in fd_get_read_notifier */
1119 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001120}
1121
Craig Tillerbaa14a92017-11-03 09:09:36 -07001122static void fd_become_writable(grpc_exec_ctx* exec_ctx, grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -08001123 fd->write_closure->SetReady(exec_ctx);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001124}
1125
Craig Tillerbaa14a92017-11-03 09:09:36 -07001126static void pollset_release_polling_island(grpc_exec_ctx* exec_ctx,
1127 grpc_pollset* ps,
1128 const char* reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001129 if (ps->po.pi != NULL) {
1130 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001131 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001132 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001133}
1134
Craig Tillerbaa14a92017-11-03 09:09:36 -07001135static void finish_shutdown_locked(grpc_exec_ctx* exec_ctx,
1136 grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001137 /* The pollset cannot have any workers if we are at this stage */
1138 GPR_ASSERT(!pollset_has_workers(pollset));
1139
1140 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001141
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001142 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001143 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001144 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001145}
1146
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001147/* pollset->po.mu lock must be held by the caller before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001148static void pollset_shutdown(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1149 grpc_closure* closure) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001150 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1151 GPR_ASSERT(!pollset->shutting_down);
1152 pollset->shutting_down = true;
1153 pollset->shutdown_done = closure;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001154 pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001155
1156 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1157 because it would release the underlying polling island. In such a case, we
1158 let the last worker call finish_shutdown_locked() from pollset_work() */
1159 if (!pollset_has_workers(pollset)) {
1160 GPR_ASSERT(!pollset->finish_shutdown_called);
1161 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1162 finish_shutdown_locked(exec_ctx, pollset);
1163 }
1164 GPR_TIMER_END("pollset_shutdown", 0);
1165}
1166
1167/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1168 * than destroying the mutexes, there is nothing special that needs to be done
1169 * here */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001170static void pollset_destroy(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001171 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001172 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001173}
1174
Craig Tiller84ea3412016-09-08 14:57:56 -07001175#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001176/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001177static void pollset_work_and_unlock(grpc_exec_ctx* exec_ctx,
1178 grpc_pollset* pollset,
1179 grpc_pollset_worker* worker, int timeout_ms,
1180 sigset_t* sig_mask, grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001181 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001182 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001183 int ep_rv;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001184 polling_island* pi = NULL;
1185 char* err_msg;
1186 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001187 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1188
1189 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001190 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001191
1192 Since epoll_fd is immutable, we can read it without obtaining the polling
1193 island lock. There is however a possibility that the polling island (from
1194 which we got the epoll_fd) got merged with another island while we are
1195 in this function. This is still okay because in such a case, we will wakeup
1196 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001197 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001198
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001199 if (pollset->po.pi == NULL) {
1200 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1201 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001202 GPR_TIMER_END("pollset_work_and_unlock", 0);
1203 return; /* Fatal error. We cannot continue */
1204 }
1205
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001206 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001207 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001208 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001209 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001210
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001211 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001212 epoll_fd = pi->epoll_fd;
1213
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001214 /* Update the pollset->po.pi since the island being pointed by
1215 pollset->po.pi maybe older than the one pointed by pi) */
1216 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001217 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1218 polling island to be deleted */
1219 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001220 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1221 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001222 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001223
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001224 /* Add an extra ref so that the island does not get destroyed (which means
1225 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1226 epoll_fd */
1227 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001228 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001229
Craig Tiller61f96c12017-05-12 13:36:39 -07001230 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1231 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001232
Craig Tiller61f96c12017-05-12 13:36:39 -07001233 GRPC_SCHEDULING_START_BLOCKING_REGION;
Craig Tillerb4bb1cd2017-07-20 14:18:17 -07001234 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001235 ep_rv =
1236 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Craig Tiller781e91a2017-07-17 16:21:00 -07001237 GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001238 if (ep_rv < 0) {
1239 if (errno != EINTR) {
1240 gpr_asprintf(&err_msg,
1241 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1242 epoll_fd, errno, strerror(errno));
1243 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1244 } else {
1245 /* We were interrupted. Save an interation by doing a zero timeout
1246 epoll_wait to see if there are any other events of interest */
1247 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001248 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001249 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001250 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001251 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001252
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001253#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001254 /* See the definition of g_poll_sync for more details */
1255 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001256#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001257
Craig Tiller61f96c12017-05-12 13:36:39 -07001258 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001259 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001260 if (data_ptr == &polling_island_wakeup_fd) {
1261 GRPC_POLLING_TRACE(
1262 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1263 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001264 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001265 /* This means that our polling island is merged with a different
1266 island. We do not have to do anything here since the subsequent call
1267 to the function pollset_work_and_unlock() will pick up the correct
1268 epoll_fd */
1269 } else {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001270 grpc_fd* fd = (grpc_fd*)data_ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001271 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1272 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1273 int write_ev = ep_ev[i].events & EPOLLOUT;
1274 if (read_ev || cancel) {
1275 fd_become_readable(exec_ctx, fd, pollset);
1276 }
1277 if (write_ev || cancel) {
1278 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001279 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001280 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001281 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001282
Craig Tiller61f96c12017-05-12 13:36:39 -07001283 g_current_thread_polling_island = NULL;
1284 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1285
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001286 GPR_ASSERT(pi != NULL);
1287
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001288 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001289 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001290 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001291 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001292 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001293 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001294
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001295 GPR_TIMER_END("pollset_work_and_unlock", 0);
1296}
1297
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001298/* pollset->po.mu lock must be held by the caller before calling this.
1299 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001300 during the course of its execution but it will always re-acquire the lock and
1301 ensure that it is held by the time the function returns */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001302static grpc_error* pollset_work(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1303 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001304 grpc_millis deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001305 GPR_TIMER_BEGIN("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001306 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller20397792017-07-18 11:35:27 -07001307 int timeout_ms = poll_deadline_to_millis_timeout(exec_ctx, deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001308
1309 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001310
1311 grpc_pollset_worker worker;
1312 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001313 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001314 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001315
Craig Tiller557c88c2017-04-05 17:20:18 -07001316 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001317
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001318 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1319 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001320
1321 if (pollset->kicked_without_pollers) {
1322 /* If the pollset was kicked without pollers, pretend that the current
1323 worker got the kick and skip polling. A kick indicates that there is some
1324 work that needs attention like an event on the completion queue or an
1325 alarm */
1326 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1327 pollset->kicked_without_pollers = 0;
1328 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001329 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001330 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1331 worker that there is some pending work that needs immediate attention
1332 (like an event on the completion queue, or a polling island merge that
1333 results in a new epoll-fd to wait on) and that the worker should not
1334 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001335
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001336 A worker can be kicked anytime from the point it is added to the pollset
1337 via push_front_worker() (or push_back_worker()) to the point it is
1338 removed via remove_worker().
1339 If the worker is kicked before/during it calls epoll_pwait(), it should
1340 immediately exit from epoll_wait(). If the worker is kicked after it
1341 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001342
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001343 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001344 times *except* when it is in epoll_pwait(). This way, the worker never
1345 misses acting on a kick */
1346
Craig Tiller19196992016-06-27 18:45:56 -07001347 if (!g_initialized_sigmask) {
1348 sigemptyset(&new_mask);
1349 sigaddset(&new_mask, grpc_wakeup_signal);
1350 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1351 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1352 g_initialized_sigmask = true;
1353 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1354 This is the mask used at all times *except during
1355 epoll_wait()*"
1356 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001357 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001358
Craig Tiller19196992016-06-27 18:45:56 -07001359 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001360 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001361 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001362
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001363 push_front_worker(pollset, &worker); /* Add worker to pollset */
1364
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001365 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1366 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001367 grpc_exec_ctx_flush(exec_ctx);
1368
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001369 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001370
1371 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1372 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001373 remove_worker(pollset, &worker);
1374 }
1375
1376 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1377 false at this point) and the pollset is shutting down, we may have to
1378 finish the shutdown process by calling finish_shutdown_locked().
1379 See pollset_shutdown() for more details.
1380
1381 Note: Continuing to access pollset here is safe; it is the caller's
1382 responsibility to not destroy a pollset when it has outstanding calls to
1383 pollset_work() */
1384 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1385 !pollset->finish_shutdown_called) {
1386 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1387 finish_shutdown_locked(exec_ctx, pollset);
1388
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001389 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001390 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001391 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001392 }
1393
Craig Tiller557c88c2017-04-05 17:20:18 -07001394 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001395
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001396 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1397 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001398
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001399 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001400
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001401 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1402 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001403}
1404
Craig Tillerbaa14a92017-11-03 09:09:36 -07001405static void add_poll_object(grpc_exec_ctx* exec_ctx, poll_obj* bag,
1406 poll_obj_type bag_type, poll_obj* item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001407 poll_obj_type item_type) {
1408 GPR_TIMER_BEGIN("add_poll_object", 0);
1409
ncteisene9cd8a82017-06-29 06:03:52 -04001410#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001411 GPR_ASSERT(item->obj_type == item_type);
1412 GPR_ASSERT(bag->obj_type == bag_type);
1413#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001414
Craig Tillerbaa14a92017-11-03 09:09:36 -07001415 grpc_error* error = GRPC_ERROR_NONE;
1416 polling_island* pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001417
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001418 gpr_mu_lock(&bag->mu);
1419 gpr_mu_lock(&item->mu);
1420
Craig Tiller7212c232016-07-06 13:11:09 -07001421retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001422 /*
1423 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1424 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1425 * a refcount of 2) and point item->pi and bag->pi to the new island
1426 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1427 * the other's non-NULL pi
1428 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1429 * polling islands and update item->pi and bag->pi to point to the new
1430 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001431 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001432
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001433 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1434 * orphaned */
1435 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1436 gpr_mu_unlock(&item->mu);
1437 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001438 return;
1439 }
1440
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001441 if (item->pi == bag->pi) {
1442 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001443 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001444 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001445
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001446 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1447 * we need to do some extra work to make TSAN happy */
1448 if (item_type == POLL_OBJ_FD) {
1449 /* Unlock before creating a new polling island: the polling island will
1450 create a workqueue which creates a file descriptor, and holding an fd
1451 lock here can eventually cause a loop to appear to TSAN (making it
1452 unhappy). We don't think it's a real loop (there's an epoch point
1453 where that loop possibility disappears), but the advantages of
1454 keeping TSAN happy outweigh any performance advantage we might have
1455 by keeping the lock held. */
1456 gpr_mu_unlock(&item->mu);
1457 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1458 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001459
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001460 /* Need to reverify any assumptions made between the initial lock and
1461 getting to this branch: if they've changed, we need to throw away our
1462 work and figure things out again. */
1463 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001464 GRPC_POLLING_TRACE(
1465 "add_poll_object: Raced creating new polling island. pi_new: %p "
1466 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001467 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1468 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001469 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001470 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001471 polling_island_remove_all_fds_locked(pi_new, true, &error);
1472
1473 /* Ref and unref so that the polling island gets deleted during unref
1474 */
1475 PI_ADD_REF(pi_new, "dance_of_destruction");
1476 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1477 goto retry;
1478 }
Craig Tiller27da6422016-07-06 13:14:46 -07001479 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001480 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001481 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001482
1483 GRPC_POLLING_TRACE(
1484 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1485 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001486 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1487 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001488 } else {
1489 GRPC_POLLING_TRACE(
1490 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001491 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001492 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001493 } else if (item->pi == NULL) {
1494 /* GPR_ASSERT(bag->pi != NULL) */
1495 /* Make pi_new point to latest pi*/
1496 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001497
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001498 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001499 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001500 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1501 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001502
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001503 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001504 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001505 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1506 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001507 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1508 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001509 } else if (bag->pi == NULL) {
1510 /* GPR_ASSERT(item->pi != NULL) */
1511 /* Make pi_new to point to latest pi */
1512 pi_new = polling_island_lock(item->pi);
1513 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001514 GRPC_POLLING_TRACE(
1515 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1516 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001517 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1518 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001519 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001520 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001521 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001522 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1523 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001524 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1525 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001526 }
1527
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001528 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1529 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001530
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001531 if (item->pi != pi_new) {
1532 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1533 if (item->pi != NULL) {
1534 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001535 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001536 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001537 }
1538
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001539 if (bag->pi != pi_new) {
1540 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1541 if (bag->pi != NULL) {
1542 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001543 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001544 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001545 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001546
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001547 gpr_mu_unlock(&item->mu);
1548 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001549
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001550 GRPC_LOG_IF_ERROR("add_poll_object", error);
1551 GPR_TIMER_END("add_poll_object", 0);
1552}
Craig Tiller57726ca2016-09-12 11:59:45 -07001553
Craig Tillerbaa14a92017-11-03 09:09:36 -07001554static void pollset_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1555 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001556 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001557 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001558}
1559
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001560/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001561 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001562 */
1563
Craig Tillerbaa14a92017-11-03 09:09:36 -07001564static grpc_pollset_set* pollset_set_create(void) {
1565 grpc_pollset_set* pss = (grpc_pollset_set*)gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001566 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001567 pss->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001568#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001569 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1570#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001571 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001572}
1573
Craig Tillerbaa14a92017-11-03 09:09:36 -07001574static void pollset_set_destroy(grpc_exec_ctx* exec_ctx,
1575 grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001576 gpr_mu_destroy(&pss->po.mu);
1577
1578 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001579 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001580 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001581
1582 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001583}
1584
Craig Tillerbaa14a92017-11-03 09:09:36 -07001585static void pollset_set_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1586 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001587 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001588 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001589}
1590
Craig Tillerbaa14a92017-11-03 09:09:36 -07001591static void pollset_set_del_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1592 grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001593 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001594}
1595
Craig Tillerbaa14a92017-11-03 09:09:36 -07001596static void pollset_set_add_pollset(grpc_exec_ctx* exec_ctx,
1597 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001598 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001599 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001600}
1601
Craig Tillerbaa14a92017-11-03 09:09:36 -07001602static void pollset_set_del_pollset(grpc_exec_ctx* exec_ctx,
1603 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001604 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001605}
1606
Craig Tillerbaa14a92017-11-03 09:09:36 -07001607static void pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx,
1608 grpc_pollset_set* bag,
1609 grpc_pollset_set* item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001610 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001611 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001612}
1613
Craig Tillerbaa14a92017-11-03 09:09:36 -07001614static void pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx,
1615 grpc_pollset_set* bag,
1616 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001617 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001618}
1619
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001620/* Test helper functions
1621 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001622void* grpc_fd_get_polling_island(grpc_fd* fd) {
1623 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001624
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001625 gpr_mu_lock(&fd->po.mu);
1626 pi = fd->po.pi;
1627 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001628
1629 return pi;
1630}
1631
Craig Tillerbaa14a92017-11-03 09:09:36 -07001632void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1633 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001634
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001635 gpr_mu_lock(&ps->po.mu);
1636 pi = ps->po.pi;
1637 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001638
1639 return pi;
1640}
1641
Craig Tillerbaa14a92017-11-03 09:09:36 -07001642bool grpc_are_polling_islands_equal(void* p, void* q) {
1643 polling_island* p1 = (polling_island*)p;
1644 polling_island* p2 = (polling_island*)q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001645
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001646 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1647 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001648 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001649 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001650
1651 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001652}
1653
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001654/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001655 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001656 */
1657
1658static void shutdown_engine(void) {
1659 fd_global_shutdown();
1660 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001661 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001662}
1663
1664static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001665 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001666
Yash Tibrewal533d1182017-09-18 10:48:22 -07001667 fd_create,
1668 fd_wrapped_fd,
1669 fd_orphan,
1670 fd_shutdown,
1671 fd_notify_on_read,
1672 fd_notify_on_write,
1673 fd_is_shutdown,
1674 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001675
Yash Tibrewal533d1182017-09-18 10:48:22 -07001676 pollset_init,
1677 pollset_shutdown,
1678 pollset_destroy,
1679 pollset_work,
1680 pollset_kick,
1681 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001682
Yash Tibrewal533d1182017-09-18 10:48:22 -07001683 pollset_set_create,
1684 pollset_set_destroy,
1685 pollset_set_add_pollset,
1686 pollset_set_del_pollset,
1687 pollset_set_add_pollset_set,
1688 pollset_set_del_pollset_set,
1689 pollset_set_add_fd,
1690 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001691
Yash Tibrewal533d1182017-09-18 10:48:22 -07001692 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001693};
1694
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001695/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1696 * Create a dummy epoll_fd to make sure epoll support is available */
1697static bool is_epoll_available() {
1698 int fd = epoll_create1(EPOLL_CLOEXEC);
1699 if (fd < 0) {
1700 gpr_log(
1701 GPR_ERROR,
1702 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1703 fd);
1704 return false;
1705 }
1706 close(fd);
1707 return true;
1708}
1709
Craig Tillerbaa14a92017-11-03 09:09:36 -07001710const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001711 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001712 /* If use of signals is disabled, we cannot use epoll engine*/
1713 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1714 return NULL;
1715 }
1716
Ken Paysoncd7d0472016-10-11 12:24:20 -07001717 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001718 return NULL;
1719 }
1720
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001721 if (!is_epoll_available()) {
1722 return NULL;
1723 }
1724
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001725 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001726 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001727 grpc_use_signal(SIGRTMIN + 6);
1728 } else {
1729 return NULL;
1730 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001731 }
1732
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001733 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001734
1735 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1736 return NULL;
1737 }
1738
1739 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1740 polling_island_global_init())) {
1741 return NULL;
1742 }
1743
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001744 return &vtable;
1745}
1746
murgatroid99623dd4f2016-08-08 17:31:27 -07001747#else /* defined(GRPC_LINUX_EPOLL) */
1748#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001749#include "src/core/lib/iomgr/ev_epollsig_linux.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001750/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001751 * NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001752const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001753 bool explicit_request) {
1754 return NULL;
1755}
murgatroid99623dd4f2016-08-08 17:31:27 -07001756#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001757
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001758void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001759#endif /* !defined(GRPC_LINUX_EPOLL) */