blob: d5f3122abc56dd152fa12ba70a505a8c8a821701 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070021#include <grpc/grpc_posix.h>
22
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070023/* This polling engine is only relevant on linux kernels supporting epoll() */
murgatroid99623dd4f2016-08-08 17:31:27 -070024#ifdef GRPC_LINUX_EPOLL
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070025
Craig Tiller4509c472017-04-27 19:05:13 +000026#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070027
28#include <assert.h>
29#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070030#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070031#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070032#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070033#include <signal.h>
34#include <string.h>
35#include <sys/epoll.h>
36#include <sys/socket.h>
37#include <unistd.h>
38
39#include <grpc/support/alloc.h>
40#include <grpc/support/log.h>
41#include <grpc/support/string_util.h>
42#include <grpc/support/tls.h>
43#include <grpc/support/useful.h>
44
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070045#include "src/core/lib/debug/stats.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070046#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070047#include "src/core/lib/iomgr/ev_posix.h"
48#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070049#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070050#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070051#include "src/core/lib/iomgr/wakeup_fd_posix.h"
52#include "src/core/lib/profiling/timers.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070053
Craig Tillerbaa14a92017-11-03 09:09:36 -070054#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070055
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070056#define GRPC_POLLING_TRACE(...) \
Craig Tillerbc0ab082017-05-05 10:42:44 -070057 if (GRPC_TRACER_ON(grpc_polling_trace)) { \
Craig Tiller2d1e8cd2017-05-17 12:41:44 -070058 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070059 }
60
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070061static int grpc_wakeup_signal = -1;
62static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070063
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070064/* Implements the function defined in grpc_posix.h. This function might be
65 * called before even calling grpc_init() to set either a different signal to
66 * use. If signum == -1, then the use of signals is disabled */
67void grpc_use_signal(int signum) {
68 grpc_wakeup_signal = signum;
69 is_grpc_wakeup_signal_initialized = true;
70
71 if (grpc_wakeup_signal < 0) {
72 gpr_log(GPR_INFO,
73 "Use of signals is disabled. Epoll engine will not be used");
74 } else {
75 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
76 grpc_wakeup_signal);
77 }
78}
79
80struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070081
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080082typedef enum {
83 POLL_OBJ_FD,
84 POLL_OBJ_POLLSET,
85 POLL_OBJ_POLLSET_SET
86} poll_obj_type;
87
88typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040089#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080090 poll_obj_type obj_type;
91#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080092 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070093 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080094} poll_obj;
95
Craig Tillerbaa14a92017-11-03 09:09:36 -070096const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080097 switch (po_type) {
98 case POLL_OBJ_FD:
99 return "fd";
100 case POLL_OBJ_POLLSET:
101 return "pollset";
102 case POLL_OBJ_POLLSET_SET:
103 return "pollset_set";
104 }
105
106 GPR_UNREACHABLE_CODE(return "UNKNOWN");
107}
108
Craig Tillerbaa14a92017-11-03 09:09:36 -0700109 /*******************************************************************************
110 * Fd Declarations
111 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800112
Craig Tillerbaa14a92017-11-03 09:09:36 -0700113#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800114
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700115struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800116 poll_obj po;
117
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700118 int fd;
119 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700120 bit 0 : 1=Active / 0=Orphaned
121 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700122 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700123 gpr_atm refst;
124
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800125 /* The fd is either closed or we relinquished control of it. In either
126 cases, this indicates that the 'fd' on this structure is no longer
127 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700128 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700129
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800130 gpr_atm read_closure;
131 gpr_atm write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700132
Craig Tillerbaa14a92017-11-03 09:09:36 -0700133 struct grpc_fd* freelist_next;
134 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700135
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800136 /* The pollset that last noticed that the fd is readable. The actual type
137 * stored in this is (grpc_pollset *) */
138 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700139
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700140 grpc_iomgr_object iomgr_object;
141};
142
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700143/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700144#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700145static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
146static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700147 int line);
148#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
149#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
150#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700151static void fd_ref(grpc_fd* fd);
152static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700153#define GRPC_FD_REF(fd, reason) fd_ref(fd)
154#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
155#endif
156
157static void fd_global_init(void);
158static void fd_global_shutdown(void);
159
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700160/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700161 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700162 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700163
ncteisena1354852017-06-08 16:25:53 -0700164#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700165
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700166#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Craig Tillerb39307d2016-06-30 15:39:13 -0700167#define PI_UNREF(exec_ctx, p, r) \
168 pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisend39010e2017-06-08 17:08:07 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Craig Tillerb39307d2016-06-30 15:39:13 -0700173#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700206 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700218 struct grpc_pollset_worker* next;
219 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700230 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerbaa14a92017-11-03 09:09:36 -0700244static bool append_error(grpc_error** composite, grpc_error* error,
245 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700269static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700272static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerbaa14a92017-11-03 09:09:36 -0700285static void pi_add_ref(polling_island* pi);
286static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700289static void pi_add_ref_dbg(polling_island* pi, const char* reason,
290 const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700291 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700292 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700293 gpr_log(GPR_DEBUG,
294 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
295 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700296 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700297 }
298 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700299}
300
Craig Tillerbaa14a92017-11-03 09:09:36 -0700301static void pi_unref_dbg(grpc_exec_ctx* exec_ctx, polling_island* pi,
302 const char* reason, const char* file, int line) {
ncteisenf8061e82017-06-09 10:44:42 -0700303 if (GRPC_TRACER_ON(grpc_polling_trace)) {
ncteisen3ac64f82017-06-19 17:35:44 -0700304 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700305 gpr_log(GPR_DEBUG,
306 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
307 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700308 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700309 }
310 pi_unref(exec_ctx, pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700311}
312#endif
313
Craig Tillerbaa14a92017-11-03 09:09:36 -0700314static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700315 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
316}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700317
Craig Tillerbaa14a92017-11-03 09:09:36 -0700318static void pi_unref(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700319 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700320 Note that this deletion not be done under a lock. Once the ref count goes
321 to zero, we are guaranteed that no one else holds a reference to the
322 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700323
324 Also, if we are deleting the polling island and the merged_to field is
325 non-empty, we should remove a ref to the merged_to polling island
326 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700327 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700328 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700329 polling_island_delete(exec_ctx, pi);
330 if (next != NULL) {
331 PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700332 }
333 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700334}
335
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700336/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700337static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700338 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700339 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700340 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700341 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700342 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700343 char* err_msg;
344 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700345
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700346#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700347 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700348 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700349#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700350
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700351 for (i = 0; i < fd_count; i++) {
352 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
353 ev.data.ptr = fds[i];
354 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700355
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700356 if (err < 0) {
357 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700358 gpr_asprintf(
359 &err_msg,
360 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
361 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
362 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
363 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700364 }
365
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700366 continue;
367 }
368
369 if (pi->fd_cnt == pi->fd_capacity) {
370 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700371 pi->fds =
Craig Tillerbaa14a92017-11-03 09:09:36 -0700372 (grpc_fd**)gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700373 }
374
375 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700376 if (add_fd_refs) {
377 GRPC_FD_REF(fds[i], "polling_island");
378 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700379 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700380}
381
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700382/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700383static void polling_island_add_wakeup_fd_locked(polling_island* pi,
384 grpc_wakeup_fd* wakeup_fd,
385 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700386 struct epoll_event ev;
387 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700388 char* err_msg;
389 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700390
391 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
392 ev.data.ptr = wakeup_fd;
393 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
394 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700395 if (err < 0 && errno != EEXIST) {
396 gpr_asprintf(&err_msg,
397 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
398 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700399 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
400 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700401 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
402 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700403 }
404}
405
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700406/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700407static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700408 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700409 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700410 int err;
411 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700412 char* err_msg;
413 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700414
415 for (i = 0; i < pi->fd_cnt; i++) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700416 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700417 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700418 gpr_asprintf(&err_msg,
419 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
420 "error: %d (%s)",
421 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
422 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
423 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700424 }
425
426 if (remove_fd_refs) {
427 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700428 }
429 }
430
431 pi->fd_cnt = 0;
432}
433
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700434/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700435static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700436 bool is_fd_closed,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700437 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700438 int err;
439 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700440 char* err_msg;
441 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700442
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700443 /* If fd is already closed, then it would have been automatically been removed
444 from the epoll set */
445 if (!is_fd_closed) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700446 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
447 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700448 gpr_asprintf(
449 &err_msg,
450 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
451 pi->epoll_fd, fd->fd, errno, strerror(errno));
452 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
453 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700454 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700455 }
456
457 for (i = 0; i < pi->fd_cnt; i++) {
458 if (pi->fds[i] == fd) {
459 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700460 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700461 break;
462 }
463 }
464}
465
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700466/* Might return NULL in case of an error */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700467static polling_island* polling_island_create(grpc_exec_ctx* exec_ctx,
468 grpc_fd* initial_fd,
469 grpc_error** error) {
470 polling_island* pi = NULL;
471 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700472
Craig Tillerb39307d2016-06-30 15:39:13 -0700473 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700474
Craig Tillerbaa14a92017-11-03 09:09:36 -0700475 pi = (polling_island*)gpr_malloc(sizeof(*pi));
Craig Tillerb39307d2016-06-30 15:39:13 -0700476 gpr_mu_init(&pi->mu);
477 pi->fd_cnt = 0;
478 pi->fd_capacity = 0;
479 pi->fds = NULL;
480 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700481
Craig Tiller15007612016-07-06 09:36:16 -0700482 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700483 gpr_atm_rel_store(&pi->poller_count, 0);
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700484 gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700485
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700486 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700487
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700488 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700489 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
490 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700491 }
492
Craig Tillerb39307d2016-06-30 15:39:13 -0700493 if (initial_fd != NULL) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700494 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700495 }
496
Craig Tillerb39307d2016-06-30 15:39:13 -0700497done:
498 if (*error != GRPC_ERROR_NONE) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700499 polling_island_delete(exec_ctx, pi);
Craig Tillerb39307d2016-06-30 15:39:13 -0700500 pi = NULL;
501 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700502 return pi;
503}
504
Craig Tillerbaa14a92017-11-03 09:09:36 -0700505static void polling_island_delete(grpc_exec_ctx* exec_ctx, polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700506 GPR_ASSERT(pi->fd_cnt == 0);
507
Craig Tiller0a06cd72016-07-14 13:21:24 -0700508 if (pi->epoll_fd >= 0) {
509 close(pi->epoll_fd);
510 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700511 gpr_mu_destroy(&pi->mu);
512 gpr_free(pi->fds);
513 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700514}
515
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700516/* Attempts to gets the last polling island in the linked list (liked by the
517 * 'merged_to' field). Since this does not lock the polling island, there are no
518 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700519static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
520 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700521 while (next != NULL) {
522 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700523 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700524 }
525
526 return pi;
527}
528
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700529/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700530 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700531 returned polling island's mu.
532 Usage: To lock/unlock polling island "pi", do the following:
533 polling_island *pi_latest = polling_island_lock(pi);
534 ...
535 ... critical section ..
536 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700537 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700538static polling_island* polling_island_lock(polling_island* pi) {
539 polling_island* next = NULL;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700540
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700541 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700542 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700543 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700544 /* Looks like 'pi' is the last node in the linked list but unless we check
545 this by holding the pi->mu lock, we cannot be sure (i.e without the
546 pi->mu lock, we don't prevent island merges).
547 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700548 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700549 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700550 if (next == NULL) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700551 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700552 break;
553 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700554
555 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
556 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700557 gpr_mu_unlock(&pi->mu);
558 }
559
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700560 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700561 }
562
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700563 return pi;
564}
565
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700566/* Gets the lock on the *latest* polling islands in the linked lists pointed by
567 *p and *q (and also updates *p and *q to point to the latest polling islands)
568
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700569 This function is needed because calling the following block of code to obtain
570 locks on polling islands (*p and *q) is prone to deadlocks.
571 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700572 polling_island_lock(*p, true);
573 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700574 }
575
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700576 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700577 polling_island *p1;
578 polling_island *p2;
579 ..
580 polling_island_lock_pair(&p1, &p2);
581 ..
582 .. Critical section with both p1 and p2 locked
583 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700584 // Release locks: Always call polling_island_unlock_pair() to release locks
585 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700586*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700587static void polling_island_lock_pair(polling_island** p, polling_island** q) {
588 polling_island* pi_1 = *p;
589 polling_island* pi_2 = *q;
590 polling_island* next_1 = NULL;
591 polling_island* next_2 = NULL;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700592
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700593 /* The algorithm is simple:
594 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
595 keep updating pi_1 and pi_2)
596 - Then obtain locks on the islands by following a lock order rule of
597 locking polling_island with lower address first
598 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
599 pointing to the same island. If that is the case, we can just call
600 polling_island_lock()
601 - After obtaining both the locks, double check that the polling islands
602 are still the last polling islands in their respective linked lists
603 (this is because there might have been polling island merges before
604 we got the lock)
605 - If the polling islands are the last islands, we are done. If not,
606 release the locks and continue the process from the first step */
607 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700608 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700609 while (next_1 != NULL) {
610 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700611 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700612 }
613
Craig Tillerbaa14a92017-11-03 09:09:36 -0700614 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700615 while (next_2 != NULL) {
616 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700617 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700618 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700619
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700620 if (pi_1 == pi_2) {
621 pi_1 = pi_2 = polling_island_lock(pi_1);
622 break;
623 }
624
625 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700626 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700627 gpr_mu_lock(&pi_2->mu);
628 } else {
629 gpr_mu_lock(&pi_2->mu);
630 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700631 }
632
Craig Tillerbaa14a92017-11-03 09:09:36 -0700633 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
634 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700635 if (next_1 == NULL && next_2 == NULL) {
636 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700637 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700638
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700639 gpr_mu_unlock(&pi_1->mu);
640 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700641 }
642
643 *p = pi_1;
644 *q = pi_2;
645}
646
Craig Tillerbaa14a92017-11-03 09:09:36 -0700647static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700648 if (p == q) {
649 gpr_mu_unlock(&p->mu);
650 } else {
651 gpr_mu_unlock(&p->mu);
652 gpr_mu_unlock(&q->mu);
653 }
654}
655
Craig Tillerbaa14a92017-11-03 09:09:36 -0700656static polling_island* polling_island_merge(polling_island* p,
657 polling_island* q,
658 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700659 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700660 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700661
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700662 if (p != q) {
663 /* Make sure that p points to the polling island with fewer fds than q */
664 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700665 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700666 }
667
668 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
669 Note that the refcounts on the fds being moved will not change here.
670 This is why the last param in the following two functions is 'false') */
671 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
672 polling_island_remove_all_fds_locked(p, false, error);
673
674 /* Wakeup all the pollers (if any) on p so that they pickup this change */
675 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
676
677 /* Add the 'merged_to' link from p --> q */
678 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
679 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700680 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700681 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700682
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700683 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700684
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700685 /* Return the merged polling island (Note that no merge would have happened
686 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700687 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700688}
689
Craig Tillerbaa14a92017-11-03 09:09:36 -0700690static grpc_error* polling_island_global_init() {
691 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700692
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700693 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
694 if (error == GRPC_ERROR_NONE) {
695 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
696 }
697
698 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700699}
700
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700701static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700702 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700703}
704
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700705/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700706 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700707 */
708
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700709/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700710 * but instead so that implementations with multiple threads in (for example)
711 * epoll_wait deal with the race between pollset removal and incoming poll
712 * notifications.
713 *
714 * The problem is that the poller ultimately holds a reference to this
715 * object, so it is very difficult to know when is safe to free it, at least
716 * without some expensive synchronization.
717 *
718 * If we keep the object freelisted, in the worst case losing this race just
719 * becomes a spurious read notification on a reused fd.
720 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700721
722/* The alarm system needs to be able to wakeup 'some poller' sometimes
723 * (specifically when a new alarm needs to be triggered earlier than the next
724 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
725 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700726
Craig Tillerbaa14a92017-11-03 09:09:36 -0700727static grpc_fd* fd_freelist = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700728static gpr_mu fd_freelist_mu;
729
ncteisend39010e2017-06-08 17:08:07 -0700730#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700731#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
732#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700733static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700734 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700735 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700736 gpr_log(GPR_DEBUG,
737 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700738 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700739 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
740 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700741#else
742#define REF_BY(fd, n, reason) ref_by(fd, n)
743#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700744static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700745#endif
746 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
747}
748
ncteisend39010e2017-06-08 17:08:07 -0700749#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700750static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700751 int line) {
ncteisend39010e2017-06-08 17:08:07 -0700752 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
ncteisen973863d2017-06-12 10:28:50 -0700753 gpr_log(GPR_DEBUG,
754 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700755 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700756 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
757 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700758#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700759static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700760#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700761 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700762 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700763 /* Add the fd to the freelist */
764 gpr_mu_lock(&fd_freelist_mu);
765 fd->freelist_next = fd_freelist;
766 fd_freelist = fd;
767 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800768
Craig Tiller376887d2017-04-06 08:27:03 -0700769 grpc_lfev_destroy(&fd->read_closure);
770 grpc_lfev_destroy(&fd->write_closure);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700771
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700772 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700773 } else {
774 GPR_ASSERT(old > n);
775 }
776}
777
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700778/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700779#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700780static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700781 int line) {
782 ref_by(fd, 2, reason, file, line);
783}
784
Craig Tillerbaa14a92017-11-03 09:09:36 -0700785static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700786 int line) {
787 unref_by(fd, 2, reason, file, line);
788}
789#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700790static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
791static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700792#endif
793
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700794static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
795
796static void fd_global_shutdown(void) {
797 gpr_mu_lock(&fd_freelist_mu);
798 gpr_mu_unlock(&fd_freelist_mu);
799 while (fd_freelist != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700800 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700801 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800802 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700803 gpr_free(fd);
804 }
805 gpr_mu_destroy(&fd_freelist_mu);
806}
807
Craig Tillerbaa14a92017-11-03 09:09:36 -0700808static grpc_fd* fd_create(int fd, const char* name) {
809 grpc_fd* new_fd = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700810
811 gpr_mu_lock(&fd_freelist_mu);
812 if (fd_freelist != NULL) {
813 new_fd = fd_freelist;
814 fd_freelist = fd_freelist->freelist_next;
815 }
816 gpr_mu_unlock(&fd_freelist_mu);
817
818 if (new_fd == NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700819 new_fd = (grpc_fd*)gpr_malloc(sizeof(grpc_fd));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800820 gpr_mu_init(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700821 }
822
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800823 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
824 * is a newly created fd (or an fd we got from the freelist), no one else
825 * would be holding a lock to it anyway. */
826 gpr_mu_lock(&new_fd->po.mu);
827 new_fd->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -0400828#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800829 new_fd->po.obj_type = POLL_OBJ_FD;
830#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700831
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700832 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700833 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700834 new_fd->orphaned = false;
Craig Tiller376887d2017-04-06 08:27:03 -0700835 grpc_lfev_init(&new_fd->read_closure);
836 grpc_lfev_init(&new_fd->write_closure);
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800837 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800838
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700839 new_fd->freelist_next = NULL;
840 new_fd->on_done_closure = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700841
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800842 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700843
Craig Tillerbaa14a92017-11-03 09:09:36 -0700844 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700845 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
846 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700847 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700848 return new_fd;
849}
850
Craig Tillerbaa14a92017-11-03 09:09:36 -0700851static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700852 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800853 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700854 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700855 ret_fd = fd->fd;
856 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800857 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700858
859 return ret_fd;
860}
861
Craig Tillerbaa14a92017-11-03 09:09:36 -0700862static void fd_orphan(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
863 grpc_closure* on_done, int* release_fd,
864 bool already_closed, const char* reason) {
865 grpc_error* error = GRPC_ERROR_NONE;
866 polling_island* unref_pi = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700867
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800868 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700869 fd->on_done_closure = on_done;
870
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700871 /* Remove the active status but keep referenced. We want this grpc_fd struct
872 to be alive (and not added to freelist) until the end of this function */
873 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700874
875 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700876 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800877 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700878 would actually contain the fd
879 - Remove the fd from the latest polling island
880 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800881 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700882 before doing this.) */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800883 if (fd->po.pi != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700884 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700885 polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700886 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700887
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800888 unref_pi = fd->po.pi;
889 fd->po.pi = NULL;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700890 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700891
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700892 /* If release_fd is not NULL, we should be relinquishing control of the file
893 descriptor fd->fd (but we still own the grpc_fd structure). */
894 if (release_fd != NULL) {
895 *release_fd = fd->fd;
896 } else {
897 close(fd->fd);
898 }
899
900 fd->orphaned = true;
901
ncteisen969b46e2017-06-08 14:57:11 -0700902 GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700903
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800904 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700905 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller15007612016-07-06 09:36:16 -0700906 if (unref_pi != NULL) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700907 /* Unref stale polling island here, outside the fd lock above.
908 The polling island owns a workqueue which owns an fd, and unreffing
909 inside the lock can cause an eventual lock loop that makes TSAN very
910 unhappy. */
Craig Tiller15007612016-07-06 09:36:16 -0700911 PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
912 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700913 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700914 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700915 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
916 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700917 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700918}
919
Craig Tillerbaa14a92017-11-03 09:09:36 -0700920static grpc_pollset* fd_get_read_notifier_pollset(grpc_exec_ctx* exec_ctx,
921 grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800922 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700923 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700924}
925
Craig Tillerbaa14a92017-11-03 09:09:36 -0700926static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tiller376887d2017-04-06 08:27:03 -0700927 return grpc_lfev_is_shutdown(&fd->read_closure);
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700928}
929
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700930/* Might be called multiple times */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700931static void fd_shutdown(grpc_exec_ctx* exec_ctx, grpc_fd* fd, grpc_error* why) {
Craig Tillere16372b2017-04-06 08:51:39 -0700932 if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
933 GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700934 shutdown(fd->fd, SHUT_RDWR);
Craig Tillere16372b2017-04-06 08:51:39 -0700935 grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700936 }
Craig Tiller376887d2017-04-06 08:27:03 -0700937 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700938}
939
Craig Tillerbaa14a92017-11-03 09:09:36 -0700940static void fd_notify_on_read(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
941 grpc_closure* closure) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700942 grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700943}
944
Craig Tillerbaa14a92017-11-03 09:09:36 -0700945static void fd_notify_on_write(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
946 grpc_closure* closure) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700947 grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700948}
949
950/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700951 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700952 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700953GPR_TLS_DECL(g_current_thread_pollset);
954GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700955static __thread bool g_initialized_sigmask;
956static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700957
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700958static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700959#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700960 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700961#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700962}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700963
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700964static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700965
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700966/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700967static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700968 gpr_tls_init(&g_current_thread_pollset);
969 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700970 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700971 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700972}
973
974static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700975 gpr_tls_destroy(&g_current_thread_pollset);
976 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700977}
978
Craig Tillerbaa14a92017-11-03 09:09:36 -0700979static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
980 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700981
982 /* Kick the worker only if it was not already kicked */
983 if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) {
984 GRPC_POLLING_TRACE(
985 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -0700986 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700987 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
988 if (err_num != 0) {
989 err = GRPC_OS_ERROR(err_num, "pthread_kill");
990 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700991 }
992 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700993}
994
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700995/* Return 1 if the pollset has active threads in pollset_work (pollset must
996 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700997static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700998 return p->root_worker.next != &p->root_worker;
999}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001000
Craig Tillerbaa14a92017-11-03 09:09:36 -07001001static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001002 worker->prev->next = worker->next;
1003 worker->next->prev = worker->prev;
1004}
1005
Craig Tillerbaa14a92017-11-03 09:09:36 -07001006static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001007 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001008 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001009 remove_worker(p, w);
1010 return w;
1011 } else {
1012 return NULL;
1013 }
1014}
1015
Craig Tillerbaa14a92017-11-03 09:09:36 -07001016static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001017 worker->next = &p->root_worker;
1018 worker->prev = worker->next->prev;
1019 worker->prev->next = worker->next->prev = worker;
1020}
1021
Craig Tillerbaa14a92017-11-03 09:09:36 -07001022static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001023 worker->prev = &p->root_worker;
1024 worker->next = worker->prev->next;
1025 worker->prev->next = worker->next->prev = worker;
1026}
1027
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001028/* p->mu must be held before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001029static grpc_error* pollset_kick(grpc_exec_ctx* exec_ctx, grpc_pollset* p,
1030 grpc_pollset_worker* specific_worker) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001031 GPR_TIMER_BEGIN("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001032 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001033 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001034 const char* err_desc = "Kick Failure";
1035 grpc_pollset_worker* worker = specific_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001036 if (worker != NULL) {
1037 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001038 if (pollset_has_workers(p)) {
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001039 GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001040 for (worker = p->root_worker.next; worker != &p->root_worker;
1041 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001042 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001043 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001044 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001045 }
Craig Tillera218a062016-06-26 09:58:37 -07001046 GPR_TIMER_END("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001047 } else {
1048 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001049 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001050 } else {
1051 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001053 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001054 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001055 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001056 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1057 /* Since worker == NULL, it means that we can kick "any" worker on this
1058 pollset 'p'. If 'p' happens to be the same pollset this thread is
1059 currently polling (i.e in pollset_work() function), then there is no need
1060 to kick any other worker since the current thread can just absorb the
1061 kick. This is the reason why we enter this case only when
1062 g_current_thread_pollset is != p */
1063
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001064 GPR_TIMER_MARK("kick_anonymous", 0);
1065 worker = pop_front_worker(p);
1066 if (worker != NULL) {
1067 GPR_TIMER_MARK("finally_kick", 0);
1068 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001069 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001070 } else {
1071 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001072 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001073 }
1074 }
1075
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001076 GPR_TIMER_END("pollset_kick", 0);
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001077 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1078 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001079}
1080
Craig Tillerbaa14a92017-11-03 09:09:36 -07001081static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001082 gpr_mu_init(&pollset->po.mu);
1083 *mu = &pollset->po.mu;
1084 pollset->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001085#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001086 pollset->po.obj_type = POLL_OBJ_POLLSET;
1087#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001088
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001089 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001090 pollset->kicked_without_pollers = false;
1091
1092 pollset->shutting_down = false;
1093 pollset->finish_shutdown_called = false;
1094 pollset->shutdown_done = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001095}
1096
Craig Tillerbaa14a92017-11-03 09:09:36 -07001097static int poll_deadline_to_millis_timeout(grpc_exec_ctx* exec_ctx,
Craig Tiller20397792017-07-18 11:35:27 -07001098 grpc_millis millis) {
1099 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
1100 grpc_millis delta = millis - grpc_exec_ctx_now(exec_ctx);
1101 if (delta > INT_MAX)
1102 return INT_MAX;
1103 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001104 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001105 else
1106 return (int)delta;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001107}
1108
Craig Tillerbaa14a92017-11-03 09:09:36 -07001109static void fd_become_readable(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
1110 grpc_pollset* notifier) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001111 grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read");
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001112
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001113 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001114 different 'notifier's when an fd becomes readable and it is in two epoll
1115 sets (This can happen briefly during polling island merges). In such cases
1116 it does not really matter which notifer is set as the read_notifier_pollset
1117 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001118 /* Use release store to match with acquire load in fd_get_read_notifier */
1119 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001120}
1121
Craig Tillerbaa14a92017-11-03 09:09:36 -07001122static void fd_become_writable(grpc_exec_ctx* exec_ctx, grpc_fd* fd) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001123 grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001124}
1125
Craig Tillerbaa14a92017-11-03 09:09:36 -07001126static void pollset_release_polling_island(grpc_exec_ctx* exec_ctx,
1127 grpc_pollset* ps,
1128 const char* reason) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001129 if (ps->po.pi != NULL) {
1130 PI_UNREF(exec_ctx, ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001131 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001132 ps->po.pi = NULL;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001133}
1134
Craig Tillerbaa14a92017-11-03 09:09:36 -07001135static void finish_shutdown_locked(grpc_exec_ctx* exec_ctx,
1136 grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001137 /* The pollset cannot have any workers if we are at this stage */
1138 GPR_ASSERT(!pollset_has_workers(pollset));
1139
1140 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001141
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001142 /* Release the ref and set pollset->po.pi to NULL */
Craig Tillerb39307d2016-06-30 15:39:13 -07001143 pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
ncteisen969b46e2017-06-08 14:57:11 -07001144 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001145}
1146
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001147/* pollset->po.mu lock must be held by the caller before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001148static void pollset_shutdown(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1149 grpc_closure* closure) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001150 GPR_TIMER_BEGIN("pollset_shutdown", 0);
1151 GPR_ASSERT(!pollset->shutting_down);
1152 pollset->shutting_down = true;
1153 pollset->shutdown_done = closure;
Craig Tiller0ff222a2017-09-01 09:41:43 -07001154 pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001155
1156 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1157 because it would release the underlying polling island. In such a case, we
1158 let the last worker call finish_shutdown_locked() from pollset_work() */
1159 if (!pollset_has_workers(pollset)) {
1160 GPR_ASSERT(!pollset->finish_shutdown_called);
1161 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
1162 finish_shutdown_locked(exec_ctx, pollset);
1163 }
1164 GPR_TIMER_END("pollset_shutdown", 0);
1165}
1166
1167/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1168 * than destroying the mutexes, there is nothing special that needs to be done
1169 * here */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001170static void pollset_destroy(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001171 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001172 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001173}
1174
Craig Tiller84ea3412016-09-08 14:57:56 -07001175#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001176/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001177static void pollset_work_and_unlock(grpc_exec_ctx* exec_ctx,
1178 grpc_pollset* pollset,
1179 grpc_pollset_worker* worker, int timeout_ms,
1180 sigset_t* sig_mask, grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001181 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001182 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001183 int ep_rv;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001184 polling_island* pi = NULL;
1185 char* err_msg;
1186 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001187 GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
1188
1189 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001190 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001191
1192 Since epoll_fd is immutable, we can read it without obtaining the polling
1193 island lock. There is however a possibility that the polling island (from
1194 which we got the epoll_fd) got merged with another island while we are
1195 in this function. This is still okay because in such a case, we will wakeup
1196 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001197 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001198
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001199 if (pollset->po.pi == NULL) {
1200 pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
1201 if (pollset->po.pi == NULL) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001202 GPR_TIMER_END("pollset_work_and_unlock", 0);
1203 return; /* Fatal error. We cannot continue */
1204 }
1205
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001206 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001207 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001208 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001209 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001210
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001211 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001212 epoll_fd = pi->epoll_fd;
1213
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001214 /* Update the pollset->po.pi since the island being pointed by
1215 pollset->po.pi maybe older than the one pointed by pi) */
1216 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001217 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1218 polling island to be deleted */
1219 PI_ADD_REF(pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001220 PI_UNREF(exec_ctx, pollset->po.pi, "ps");
1221 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001222 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001223
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001224 /* Add an extra ref so that the island does not get destroyed (which means
1225 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1226 epoll_fd */
1227 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001228 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001229
Craig Tiller61f96c12017-05-12 13:36:39 -07001230 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1231 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001232
Craig Tiller61f96c12017-05-12 13:36:39 -07001233 GRPC_SCHEDULING_START_BLOCKING_REGION;
Craig Tillerb4bb1cd2017-07-20 14:18:17 -07001234 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001235 ep_rv =
1236 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Craig Tiller781e91a2017-07-17 16:21:00 -07001237 GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
Craig Tiller61f96c12017-05-12 13:36:39 -07001238 if (ep_rv < 0) {
1239 if (errno != EINTR) {
1240 gpr_asprintf(&err_msg,
1241 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1242 epoll_fd, errno, strerror(errno));
1243 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1244 } else {
1245 /* We were interrupted. Save an interation by doing a zero timeout
1246 epoll_wait to see if there are any other events of interest */
1247 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001248 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001249 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001250 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001251 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001252
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001253#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001254 /* See the definition of g_poll_sync for more details */
1255 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001256#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001257
Craig Tiller61f96c12017-05-12 13:36:39 -07001258 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001259 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001260 if (data_ptr == &polling_island_wakeup_fd) {
1261 GRPC_POLLING_TRACE(
1262 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1263 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001264 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001265 /* This means that our polling island is merged with a different
1266 island. We do not have to do anything here since the subsequent call
1267 to the function pollset_work_and_unlock() will pick up the correct
1268 epoll_fd */
1269 } else {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001270 grpc_fd* fd = (grpc_fd*)data_ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001271 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1272 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1273 int write_ev = ep_ev[i].events & EPOLLOUT;
1274 if (read_ev || cancel) {
1275 fd_become_readable(exec_ctx, fd, pollset);
1276 }
1277 if (write_ev || cancel) {
1278 fd_become_writable(exec_ctx, fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001279 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001280 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001281 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001282
Craig Tiller61f96c12017-05-12 13:36:39 -07001283 g_current_thread_polling_island = NULL;
1284 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1285
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001286 GPR_ASSERT(pi != NULL);
1287
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001288 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001289 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001290 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001291 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001292 code when there is an island merge while we are doing epoll_wait() above */
Craig Tillerb39307d2016-06-30 15:39:13 -07001293 PI_UNREF(exec_ctx, pi, "ps_work");
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001294
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001295 GPR_TIMER_END("pollset_work_and_unlock", 0);
1296}
1297
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001298/* pollset->po.mu lock must be held by the caller before calling this.
1299 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001300 during the course of its execution but it will always re-acquire the lock and
1301 ensure that it is held by the time the function returns */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001302static grpc_error* pollset_work(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1303 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001304 grpc_millis deadline) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001305 GPR_TIMER_BEGIN("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001306 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller20397792017-07-18 11:35:27 -07001307 int timeout_ms = poll_deadline_to_millis_timeout(exec_ctx, deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001308
1309 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001310
1311 grpc_pollset_worker worker;
1312 worker.next = worker.prev = NULL;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001313 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001314 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001315
Craig Tiller557c88c2017-04-05 17:20:18 -07001316 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001317
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001318 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1319 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001320
1321 if (pollset->kicked_without_pollers) {
1322 /* If the pollset was kicked without pollers, pretend that the current
1323 worker got the kick and skip polling. A kick indicates that there is some
1324 work that needs attention like an event on the completion queue or an
1325 alarm */
1326 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1327 pollset->kicked_without_pollers = 0;
1328 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001329 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001330 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1331 worker that there is some pending work that needs immediate attention
1332 (like an event on the completion queue, or a polling island merge that
1333 results in a new epoll-fd to wait on) and that the worker should not
1334 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001335
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001336 A worker can be kicked anytime from the point it is added to the pollset
1337 via push_front_worker() (or push_back_worker()) to the point it is
1338 removed via remove_worker().
1339 If the worker is kicked before/during it calls epoll_pwait(), it should
1340 immediately exit from epoll_wait(). If the worker is kicked after it
1341 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001342
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001343 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001344 times *except* when it is in epoll_pwait(). This way, the worker never
1345 misses acting on a kick */
1346
Craig Tiller19196992016-06-27 18:45:56 -07001347 if (!g_initialized_sigmask) {
1348 sigemptyset(&new_mask);
1349 sigaddset(&new_mask, grpc_wakeup_signal);
1350 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1351 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1352 g_initialized_sigmask = true;
1353 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1354 This is the mask used at all times *except during
1355 epoll_wait()*"
1356 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001357 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001358
Craig Tiller19196992016-06-27 18:45:56 -07001359 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001360 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001361 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001362
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001363 push_front_worker(pollset, &worker); /* Add worker to pollset */
1364
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001365 pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms,
1366 &g_orig_sigmask, &error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001367 grpc_exec_ctx_flush(exec_ctx);
1368
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001369 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001370
1371 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1372 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001373 remove_worker(pollset, &worker);
1374 }
1375
1376 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1377 false at this point) and the pollset is shutting down, we may have to
1378 finish the shutdown process by calling finish_shutdown_locked().
1379 See pollset_shutdown() for more details.
1380
1381 Note: Continuing to access pollset here is safe; it is the caller's
1382 responsibility to not destroy a pollset when it has outstanding calls to
1383 pollset_work() */
1384 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1385 !pollset->finish_shutdown_called) {
1386 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
1387 finish_shutdown_locked(exec_ctx, pollset);
1388
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001389 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001390 grpc_exec_ctx_flush(exec_ctx);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001391 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001392 }
1393
Craig Tiller557c88c2017-04-05 17:20:18 -07001394 if (worker_hdl) *worker_hdl = NULL;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001395
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001396 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1397 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001398
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001399 GPR_TIMER_END("pollset_work", 0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001400
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001401 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1402 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001403}
1404
Craig Tillerbaa14a92017-11-03 09:09:36 -07001405static void add_poll_object(grpc_exec_ctx* exec_ctx, poll_obj* bag,
1406 poll_obj_type bag_type, poll_obj* item,
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001407 poll_obj_type item_type) {
1408 GPR_TIMER_BEGIN("add_poll_object", 0);
1409
ncteisene9cd8a82017-06-29 06:03:52 -04001410#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001411 GPR_ASSERT(item->obj_type == item_type);
1412 GPR_ASSERT(bag->obj_type == bag_type);
1413#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001414
Craig Tillerbaa14a92017-11-03 09:09:36 -07001415 grpc_error* error = GRPC_ERROR_NONE;
1416 polling_island* pi_new = NULL;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001417
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001418 gpr_mu_lock(&bag->mu);
1419 gpr_mu_lock(&item->mu);
1420
Craig Tiller7212c232016-07-06 13:11:09 -07001421retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001422 /*
1423 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1424 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1425 * a refcount of 2) and point item->pi and bag->pi to the new island
1426 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1427 * the other's non-NULL pi
1428 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1429 * polling islands and update item->pi and bag->pi to point to the new
1430 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001431 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001432
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001433 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1434 * orphaned */
1435 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1436 gpr_mu_unlock(&item->mu);
1437 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001438 return;
1439 }
1440
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001441 if (item->pi == bag->pi) {
1442 pi_new = item->pi;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -07001443 if (pi_new == NULL) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001444 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001445
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001446 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1447 * we need to do some extra work to make TSAN happy */
1448 if (item_type == POLL_OBJ_FD) {
1449 /* Unlock before creating a new polling island: the polling island will
1450 create a workqueue which creates a file descriptor, and holding an fd
1451 lock here can eventually cause a loop to appear to TSAN (making it
1452 unhappy). We don't think it's a real loop (there's an epoch point
1453 where that loop possibility disappears), but the advantages of
1454 keeping TSAN happy outweigh any performance advantage we might have
1455 by keeping the lock held. */
1456 gpr_mu_unlock(&item->mu);
1457 pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
1458 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001459
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001460 /* Need to reverify any assumptions made between the initial lock and
1461 getting to this branch: if they've changed, we need to throw away our
1462 work and figure things out again. */
1463 if (item->pi != NULL) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001464 GRPC_POLLING_TRACE(
1465 "add_poll_object: Raced creating new polling island. pi_new: %p "
1466 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001467 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1468 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001469 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001470 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001471 polling_island_remove_all_fds_locked(pi_new, true, &error);
1472
1473 /* Ref and unref so that the polling island gets deleted during unref
1474 */
1475 PI_ADD_REF(pi_new, "dance_of_destruction");
1476 PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
1477 goto retry;
1478 }
Craig Tiller27da6422016-07-06 13:14:46 -07001479 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001480 pi_new = polling_island_create(exec_ctx, NULL, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001481 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001482
1483 GRPC_POLLING_TRACE(
1484 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1485 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001486 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1487 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001488 } else {
1489 GRPC_POLLING_TRACE(
1490 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001491 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001492 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001493 } else if (item->pi == NULL) {
1494 /* GPR_ASSERT(bag->pi != NULL) */
1495 /* Make pi_new point to latest pi*/
1496 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001497
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001498 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001499 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001500 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1501 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001502
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001503 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001504 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001505 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1506 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001507 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1508 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001509 } else if (bag->pi == NULL) {
1510 /* GPR_ASSERT(item->pi != NULL) */
1511 /* Make pi_new to point to latest pi */
1512 pi_new = polling_island_lock(item->pi);
1513 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001514 GRPC_POLLING_TRACE(
1515 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1516 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001517 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1518 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001519 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001520 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001521 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001522 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1523 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001524 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1525 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001526 }
1527
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001528 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1529 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001530
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001531 if (item->pi != pi_new) {
1532 PI_ADD_REF(pi_new, poll_obj_string(item_type));
1533 if (item->pi != NULL) {
1534 PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001535 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001536 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001537 }
1538
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001539 if (bag->pi != pi_new) {
1540 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
1541 if (bag->pi != NULL) {
1542 PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001543 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001544 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001545 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001546
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001547 gpr_mu_unlock(&item->mu);
1548 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001549
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001550 GRPC_LOG_IF_ERROR("add_poll_object", error);
1551 GPR_TIMER_END("add_poll_object", 0);
1552}
Craig Tiller57726ca2016-09-12 11:59:45 -07001553
Craig Tillerbaa14a92017-11-03 09:09:36 -07001554static void pollset_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1555 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001556 add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001557 POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001558}
1559
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001560/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001561 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001562 */
1563
Craig Tillerbaa14a92017-11-03 09:09:36 -07001564static grpc_pollset_set* pollset_set_create(void) {
1565 grpc_pollset_set* pss = (grpc_pollset_set*)gpr_malloc(sizeof(*pss));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001566 gpr_mu_init(&pss->po.mu);
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001567 pss->po.pi = NULL;
ncteisene9cd8a82017-06-29 06:03:52 -04001568#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001569 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1570#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001571 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001572}
1573
Craig Tillerbaa14a92017-11-03 09:09:36 -07001574static void pollset_set_destroy(grpc_exec_ctx* exec_ctx,
1575 grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001576 gpr_mu_destroy(&pss->po.mu);
1577
1578 if (pss->po.pi != NULL) {
Craig Tiller9e5ac1b2017-02-14 22:25:50 -08001579 PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001580 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001581
1582 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001583}
1584
Craig Tillerbaa14a92017-11-03 09:09:36 -07001585static void pollset_set_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1586 grpc_fd* fd) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001587 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001588 POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001589}
1590
Craig Tillerbaa14a92017-11-03 09:09:36 -07001591static void pollset_set_del_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1592 grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001593 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001594}
1595
Craig Tillerbaa14a92017-11-03 09:09:36 -07001596static void pollset_set_add_pollset(grpc_exec_ctx* exec_ctx,
1597 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001598 add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001599 POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001600}
1601
Craig Tillerbaa14a92017-11-03 09:09:36 -07001602static void pollset_set_del_pollset(grpc_exec_ctx* exec_ctx,
1603 grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001604 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001605}
1606
Craig Tillerbaa14a92017-11-03 09:09:36 -07001607static void pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx,
1608 grpc_pollset_set* bag,
1609 grpc_pollset_set* item) {
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001610 add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001611 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001612}
1613
Craig Tillerbaa14a92017-11-03 09:09:36 -07001614static void pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx,
1615 grpc_pollset_set* bag,
1616 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001617 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001618}
1619
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001620/* Test helper functions
1621 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001622void* grpc_fd_get_polling_island(grpc_fd* fd) {
1623 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001624
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001625 gpr_mu_lock(&fd->po.mu);
1626 pi = fd->po.pi;
1627 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001628
1629 return pi;
1630}
1631
Craig Tillerbaa14a92017-11-03 09:09:36 -07001632void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1633 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001634
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001635 gpr_mu_lock(&ps->po.mu);
1636 pi = ps->po.pi;
1637 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001638
1639 return pi;
1640}
1641
Craig Tillerbaa14a92017-11-03 09:09:36 -07001642bool grpc_are_polling_islands_equal(void* p, void* q) {
1643 polling_island* p1 = (polling_island*)p;
1644 polling_island* p2 = (polling_island*)q;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001645
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001646 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1647 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001648 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001649 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001650
1651 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001652}
1653
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001654/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001655 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001656 */
1657
1658static void shutdown_engine(void) {
1659 fd_global_shutdown();
1660 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001661 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001662}
1663
1664static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001665 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001666
Yash Tibrewal533d1182017-09-18 10:48:22 -07001667 fd_create,
1668 fd_wrapped_fd,
1669 fd_orphan,
1670 fd_shutdown,
1671 fd_notify_on_read,
1672 fd_notify_on_write,
1673 fd_is_shutdown,
1674 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001675
Yash Tibrewal533d1182017-09-18 10:48:22 -07001676 pollset_init,
1677 pollset_shutdown,
1678 pollset_destroy,
1679 pollset_work,
1680 pollset_kick,
1681 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001682
Yash Tibrewal533d1182017-09-18 10:48:22 -07001683 pollset_set_create,
1684 pollset_set_destroy,
1685 pollset_set_add_pollset,
1686 pollset_set_del_pollset,
1687 pollset_set_add_pollset_set,
1688 pollset_set_del_pollset_set,
1689 pollset_set_add_fd,
1690 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001691
Yash Tibrewal533d1182017-09-18 10:48:22 -07001692 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001693};
1694
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001695/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1696 * Create a dummy epoll_fd to make sure epoll support is available */
1697static bool is_epoll_available() {
1698 int fd = epoll_create1(EPOLL_CLOEXEC);
1699 if (fd < 0) {
1700 gpr_log(
1701 GPR_ERROR,
1702 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1703 fd);
1704 return false;
1705 }
1706 close(fd);
1707 return true;
1708}
1709
Craig Tillerbaa14a92017-11-03 09:09:36 -07001710const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001711 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001712 /* If use of signals is disabled, we cannot use epoll engine*/
1713 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
1714 return NULL;
1715 }
1716
Ken Paysoncd7d0472016-10-11 12:24:20 -07001717 if (!grpc_has_wakeup_fd()) {
Ken Paysonbc544be2016-10-06 19:23:47 -07001718 return NULL;
1719 }
1720
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001721 if (!is_epoll_available()) {
1722 return NULL;
1723 }
1724
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001725 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001726 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001727 grpc_use_signal(SIGRTMIN + 6);
1728 } else {
1729 return NULL;
1730 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001731 }
1732
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001733 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001734
1735 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
1736 return NULL;
1737 }
1738
1739 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1740 polling_island_global_init())) {
1741 return NULL;
1742 }
1743
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001744 return &vtable;
1745}
1746
murgatroid99623dd4f2016-08-08 17:31:27 -07001747#else /* defined(GRPC_LINUX_EPOLL) */
1748#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001749#include "src/core/lib/iomgr/ev_epollsig_linux.h"
murgatroid99623dd4f2016-08-08 17:31:27 -07001750/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001751 * NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001752const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001753 bool explicit_request) {
1754 return NULL;
1755}
murgatroid99623dd4f2016-08-08 17:31:27 -07001756#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001757
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001758void grpc_use_signal(int signum) {}
murgatroid99623dd4f2016-08-08 17:31:27 -07001759#endif /* !defined(GRPC_LINUX_EPOLL) */