blob: 5c99c72aa59ae3b7431f3865af8d48cdcafce783 [file] [log] [blame]
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2016 gRPC authors.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070016 *
17 */
18
murgatroid9954070892016-08-08 17:01:18 -070019#include "src/core/lib/iomgr/port.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070020
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070021#include <grpc/grpc_posix.h>
yang-gceb24752017-11-07 12:06:37 -080022#include <grpc/support/log.h>
Yash Tibrewal4e0fe522017-10-08 18:07:15 -070023
Sree Kuchibhotla76a07952016-06-22 15:09:06 -070024/* This polling engine is only relevant on linux kernels supporting epoll() */
Mehrdad Afsharifb669002018-01-17 15:37:56 -080025#ifdef GRPC_LINUX_EPOLL_CREATE1
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070026
Craig Tiller4509c472017-04-27 19:05:13 +000027#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070028
29#include <assert.h>
30#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070031#include <limits.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070032#include <poll.h>
Sree Kuchibhotla4998e302016-08-16 07:26:31 -070033#include <pthread.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070034#include <signal.h>
35#include <string.h>
36#include <sys/epoll.h>
37#include <sys/socket.h>
38#include <unistd.h>
39
40#include <grpc/support/alloc.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070041#include <grpc/support/string_util.h>
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070042
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070043#include "src/core/lib/debug/stats.h"
Vijay Paib6cf1232018-01-25 21:02:26 -080044#include "src/core/lib/gpr/tls.h"
Vijay Paid4d0a302018-01-25 13:24:03 -080045#include "src/core/lib/gpr/useful.h"
Mark D. Roth4f2b0fd2018-01-19 12:12:23 -080046#include "src/core/lib/gprpp/manual_constructor.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070047#include "src/core/lib/iomgr/block_annotate.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070048#include "src/core/lib/iomgr/ev_posix.h"
49#include "src/core/lib/iomgr/iomgr_internal.h"
Craig Tiller376887d2017-04-06 08:27:03 -070050#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tiller185f6c92017-03-17 08:33:19 -070051#include "src/core/lib/iomgr/timer.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070052#include "src/core/lib/iomgr/wakeup_fd_posix.h"
53#include "src/core/lib/profiling/timers.h"
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070054
Craig Tillerbaa14a92017-11-03 09:09:36 -070055#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker*)1)
Craig Tillere24b24d2017-04-06 16:05:45 -070056
Noah Eisenc384d812017-11-12 20:14:27 -080057#define GRPC_POLLING_TRACE(...) \
ncteisen9ffb1492017-11-10 14:00:49 -080058 if (grpc_polling_trace.enabled()) { \
Noah Eisenc384d812017-11-12 20:14:27 -080059 gpr_log(GPR_INFO, __VA_ARGS__); \
Sree Kuchibhotla1e776682016-06-28 14:09:26 -070060 }
61
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070062static int grpc_wakeup_signal = -1;
63static bool is_grpc_wakeup_signal_initialized = false;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -070064
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -070065/* Implements the function defined in grpc_posix.h. This function might be
66 * called before even calling grpc_init() to set either a different signal to
67 * use. If signum == -1, then the use of signals is disabled */
68void grpc_use_signal(int signum) {
69 grpc_wakeup_signal = signum;
70 is_grpc_wakeup_signal_initialized = true;
71
72 if (grpc_wakeup_signal < 0) {
73 gpr_log(GPR_INFO,
74 "Use of signals is disabled. Epoll engine will not be used");
75 } else {
76 gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
77 grpc_wakeup_signal);
78 }
79}
80
81struct polling_island;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -070082
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080083typedef enum {
84 POLL_OBJ_FD,
85 POLL_OBJ_POLLSET,
86 POLL_OBJ_POLLSET_SET
87} poll_obj_type;
88
89typedef struct poll_obj {
ncteisene9cd8a82017-06-29 06:03:52 -040090#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -080091 poll_obj_type obj_type;
92#endif
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080093 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -070094 struct polling_island* pi;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080095} poll_obj;
96
Craig Tillerbaa14a92017-11-03 09:09:36 -070097const char* poll_obj_string(poll_obj_type po_type) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -080098 switch (po_type) {
99 case POLL_OBJ_FD:
100 return "fd";
101 case POLL_OBJ_POLLSET:
102 return "pollset";
103 case POLL_OBJ_POLLSET_SET:
104 return "pollset_set";
105 }
106
107 GPR_UNREACHABLE_CODE(return "UNKNOWN");
108}
109
Craig Tillerbaa14a92017-11-03 09:09:36 -0700110 /*******************************************************************************
111 * Fd Declarations
112 */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800113
Craig Tillerbaa14a92017-11-03 09:09:36 -0700114#define FD_FROM_PO(po) ((grpc_fd*)(po))
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800115
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700116struct grpc_fd {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800117 poll_obj po;
118
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700119 int fd;
120 /* refst format:
Sree Kuchibhotla5098f912016-05-31 10:58:17 -0700121 bit 0 : 1=Active / 0=Orphaned
122 bits 1-n : refcount
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700123 Ref/Unref by two to avoid altering the orphaned bit */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700124 gpr_atm refst;
125
Sree Kuchibhotla99983382017-02-12 17:03:27 -0800126 /* The fd is either closed or we relinquished control of it. In either
127 cases, this indicates that the 'fd' on this structure is no longer
128 valid */
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700129 bool orphaned;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700130
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800131 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
132 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700133
Craig Tillerbaa14a92017-11-03 09:09:36 -0700134 struct grpc_fd* freelist_next;
135 grpc_closure* on_done_closure;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700136
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800137 /* The pollset that last noticed that the fd is readable. The actual type
138 * stored in this is (grpc_pollset *) */
139 gpr_atm read_notifier_pollset;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700140
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700141 grpc_iomgr_object iomgr_object;
142};
143
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700144/* Reference counting for fds */
ncteisend39010e2017-06-08 17:08:07 -0700145#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700146static void fd_ref(grpc_fd* fd, const char* reason, const char* file, int line);
147static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700148 int line);
149#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
150#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
151#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700152static void fd_ref(grpc_fd* fd);
153static void fd_unref(grpc_fd* fd);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700154#define GRPC_FD_REF(fd, reason) fd_ref(fd)
155#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
156#endif
157
158static void fd_global_init(void);
159static void fd_global_shutdown(void);
160
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700161/*******************************************************************************
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700162 * Polling island Declarations
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700163 */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700164
ncteisena1354852017-06-08 16:25:53 -0700165#ifndef NDEBUG
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700166
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700167#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800168#define PI_UNREF(p, r) pi_unref_dbg((p), (r), __FILE__, __LINE__)
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700169
ncteisend39010e2017-06-08 17:08:07 -0700170#else
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700171
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700172#define PI_ADD_REF(p, r) pi_add_ref((p))
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800173#define PI_UNREF(p, r) pi_unref((p))
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700174
ncteisena1354852017-06-08 16:25:53 -0700175#endif
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700176
Craig Tiller460502e2016-10-13 10:02:08 -0700177/* This is also used as grpc_workqueue (by directly casing it) */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700178typedef struct polling_island {
179 gpr_mu mu;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700180 /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
181 the refcount.
182 Once the ref count becomes zero, this structure is destroyed which means
183 we should ensure that there is never a scenario where a PI_ADD_REF() is
184 racing with a PI_UNREF() that just made the ref_count zero. */
Craig Tiller15007612016-07-06 09:36:16 -0700185 gpr_atm ref_count;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700186
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700187 /* Pointer to the polling_island this merged into.
188 * merged_to value is only set once in polling_island's lifetime (and that too
189 * only if the island is merged with another island). Because of this, we can
190 * use gpr_atm type here so that we can do atomic access on this and reduce
191 * lock contention on 'mu' mutex.
192 *
193 * Note that if this field is not NULL (i.e not 0), all the remaining fields
194 * (except mu and ref_count) are invalid and must be ignored. */
195 gpr_atm merged_to;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700196
Craig Tiller460502e2016-10-13 10:02:08 -0700197 /* Number of threads currently polling on this island */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700198 gpr_atm poller_count;
Craig Tillerb39307d2016-06-30 15:39:13 -0700199
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700200 /* The fd of the underlying epoll set */
201 int epoll_fd;
202
203 /* The file descriptors in the epoll set */
204 size_t fd_cnt;
205 size_t fd_capacity;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700206 grpc_fd** fds;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700207} polling_island;
208
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700209/*******************************************************************************
210 * Pollset Declarations
211 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700212struct grpc_pollset_worker {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700213 /* Thread id of this worker */
214 pthread_t pt_id;
215
216 /* Used to prevent a worker from getting kicked multiple times */
217 gpr_atm is_kicked;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700218 struct grpc_pollset_worker* next;
219 struct grpc_pollset_worker* prev;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700220};
221
222struct grpc_pollset {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800223 poll_obj po;
224
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700225 grpc_pollset_worker root_worker;
226 bool kicked_without_pollers;
227
228 bool shutting_down; /* Is the pollset shutting down ? */
229 bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700230 grpc_closure* shutdown_done; /* Called after after shutdown is complete */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700231};
232
233/*******************************************************************************
234 * Pollset-set Declarations
235 */
236struct grpc_pollset_set {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800237 poll_obj po;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700238};
239
240/*******************************************************************************
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700241 * Common helpers
242 */
243
Craig Tillerbaa14a92017-11-03 09:09:36 -0700244static bool append_error(grpc_error** composite, grpc_error* error,
245 const char* desc) {
Craig Tillerf975f742016-07-01 14:56:27 -0700246 if (error == GRPC_ERROR_NONE) return true;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700247 if (*composite == GRPC_ERROR_NONE) {
Noah Eisen3005ce82017-03-14 13:38:41 -0700248 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700249 }
250 *composite = grpc_error_add_child(*composite, error);
Craig Tillerf975f742016-07-01 14:56:27 -0700251 return false;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700252}
253
254/*******************************************************************************
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700255 * Polling island Definitions
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700256 */
257
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700258/* The wakeup fd that is used to wake up all threads in a Polling island. This
259 is useful in the polling island merge operation where we need to wakeup all
260 the threads currently polling the smaller polling island (so that they can
261 start polling the new/merged polling island)
262
263 NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
264 threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
265static grpc_wakeup_fd polling_island_wakeup_fd;
266
Craig Tiller2e620132016-10-10 15:27:44 -0700267/* The polling island being polled right now.
268 See comments in workqueue_maybe_wakeup for why this is tracked. */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700269static __thread polling_island* g_current_thread_polling_island;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700270
Craig Tillerb39307d2016-06-30 15:39:13 -0700271/* Forward declaration */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800272static void polling_island_delete(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700273
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700274#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700275/* Currently TSAN may incorrectly flag data races between epoll_ctl and
276 epoll_wait for any grpc_fd structs that are added to the epoll set via
277 epoll_ctl and are returned (within a very short window) via epoll_wait().
278
279 To work-around this race, we establish a happens-before relation between
280 the code just-before epoll_ctl() and the code after epoll_wait() by using
281 this atomic */
282gpr_atm g_epoll_sync;
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700283#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700284
Craig Tillerbaa14a92017-11-03 09:09:36 -0700285static void pi_add_ref(polling_island* pi);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800286static void pi_unref(polling_island* pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700287
ncteisena1354852017-06-08 16:25:53 -0700288#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700289static void pi_add_ref_dbg(polling_island* pi, const char* reason,
290 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800291 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700292 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700293 gpr_log(GPR_DEBUG,
294 "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
295 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700296 pi, old_cnt, old_cnt + 1, reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700297 }
298 pi_add_ref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700299}
300
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800301static void pi_unref_dbg(polling_island* pi, const char* reason,
302 const char* file, int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800303 if (grpc_polling_trace.enabled()) {
ncteisen3ac64f82017-06-19 17:35:44 -0700304 gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700305 gpr_log(GPR_DEBUG,
306 "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR
307 " (%s) - (%s, %d)",
ncteisen3ac64f82017-06-19 17:35:44 -0700308 pi, old_cnt, (old_cnt - 1), reason, file, line);
ncteisena1354852017-06-08 16:25:53 -0700309 }
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800310 pi_unref(pi);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700311}
312#endif
313
Craig Tillerbaa14a92017-11-03 09:09:36 -0700314static void pi_add_ref(polling_island* pi) {
Craig Tiller15007612016-07-06 09:36:16 -0700315 gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
316}
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700317
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800318static void pi_unref(polling_island* pi) {
Craig Tillerd8a3c042016-09-09 12:42:37 -0700319 /* If ref count went to zero, delete the polling island.
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700320 Note that this deletion not be done under a lock. Once the ref count goes
321 to zero, we are guaranteed that no one else holds a reference to the
322 polling island (and that there is no racing pi_add_ref() call either).
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700323
324 Also, if we are deleting the polling island and the merged_to field is
325 non-empty, we should remove a ref to the merged_to polling island
326 */
Craig Tillerd8a3c042016-09-09 12:42:37 -0700327 if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700328 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800329 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800330 if (next != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800331 PI_UNREF(next, "pi_delete"); /* Recursive call */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700332 }
333 }
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700334}
335
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700336/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700337static void polling_island_add_fds_locked(polling_island* pi, grpc_fd** fds,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700338 size_t fd_count, bool add_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700339 grpc_error** error) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700340 int err;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700341 size_t i;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700342 struct epoll_event ev;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700343 char* err_msg;
344 const char* err_desc = "polling_island_add_fds";
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700345
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700346#ifdef GRPC_TSAN
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700347 /* See the definition of g_epoll_sync for more context */
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700348 gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -0700349#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700350
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700351 for (i = 0; i < fd_count; i++) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800352 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLOUT | EPOLLET);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700353 ev.data.ptr = fds[i];
354 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700355
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700356 if (err < 0) {
357 if (errno != EEXIST) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700358 gpr_asprintf(
359 &err_msg,
360 "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
361 pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
362 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
363 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700364 }
365
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700366 continue;
367 }
368
369 if (pi->fd_cnt == pi->fd_capacity) {
370 pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
Noah Eisen4d20a662018-02-09 09:34:04 -0800371 pi->fds = static_cast<grpc_fd**>(
372 gpr_realloc(pi->fds, sizeof(grpc_fd*) * pi->fd_capacity));
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700373 }
374
375 pi->fds[pi->fd_cnt++] = fds[i];
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700376 if (add_fd_refs) {
377 GRPC_FD_REF(fds[i], "polling_island");
378 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700379 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700380}
381
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700382/* The caller is expected to hold pi->mu before calling this */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700383static void polling_island_add_wakeup_fd_locked(polling_island* pi,
384 grpc_wakeup_fd* wakeup_fd,
385 grpc_error** error) {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700386 struct epoll_event ev;
387 int err;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700388 char* err_msg;
389 const char* err_desc = "polling_island_add_wakeup_fd";
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700390
Noah Eisenbe82e642018-02-09 09:16:55 -0800391 ev.events = static_cast<uint32_t>(EPOLLIN | EPOLLET);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700392 ev.data.ptr = wakeup_fd;
393 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
394 GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700395 if (err < 0 && errno != EEXIST) {
396 gpr_asprintf(&err_msg,
397 "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
398 "error: %d (%s)",
Craig Tillerc3571792017-05-02 12:33:38 -0700399 pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
400 strerror(errno));
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700401 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
402 gpr_free(err_msg);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700403 }
404}
405
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700406/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700407static void polling_island_remove_all_fds_locked(polling_island* pi,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700408 bool remove_fd_refs,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700409 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700410 int err;
411 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700412 char* err_msg;
413 const char* err_desc = "polling_island_remove_fds";
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700414
415 for (i = 0; i < pi->fd_cnt; i++) {
Craig Tiller4782d922017-11-10 09:53:21 -0800416 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, nullptr);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700417 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700418 gpr_asprintf(&err_msg,
419 "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
420 "error: %d (%s)",
421 pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
422 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
423 gpr_free(err_msg);
Sree Kuchibhotlaad162ba2016-06-06 16:23:37 -0700424 }
425
426 if (remove_fd_refs) {
427 GRPC_FD_UNREF(pi->fds[i], "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700428 }
429 }
430
431 pi->fd_cnt = 0;
432}
433
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700434/* The caller is expected to hold pi->mu lock before calling this function */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700435static void polling_island_remove_fd_locked(polling_island* pi, grpc_fd* fd,
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700436 bool is_fd_closed,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700437 grpc_error** error) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700438 int err;
439 size_t i;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700440 char* err_msg;
441 const char* err_desc = "polling_island_remove_fd";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700442
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700443 /* If fd is already closed, then it would have been automatically been removed
444 from the epoll set */
445 if (!is_fd_closed) {
Craig Tiller4782d922017-11-10 09:53:21 -0800446 err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, nullptr);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700447 if (err < 0 && errno != ENOENT) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700448 gpr_asprintf(
449 &err_msg,
450 "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
451 pi->epoll_fd, fd->fd, errno, strerror(errno));
452 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
453 gpr_free(err_msg);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700454 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700455 }
456
457 for (i = 0; i < pi->fd_cnt; i++) {
458 if (pi->fds[i] == fd) {
459 pi->fds[i] = pi->fds[--pi->fd_cnt];
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700460 GRPC_FD_UNREF(fd, "polling_island");
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700461 break;
462 }
463 }
464}
465
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700466/* Might return NULL in case of an error */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800467static polling_island* polling_island_create(grpc_fd* initial_fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700468 grpc_error** error) {
Craig Tiller4782d922017-11-10 09:53:21 -0800469 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700470 const char* err_desc = "polling_island_create";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700471
Craig Tillerb39307d2016-06-30 15:39:13 -0700472 *error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700473
Noah Eisenbe82e642018-02-09 09:16:55 -0800474 pi = static_cast<polling_island*>(gpr_malloc(sizeof(*pi)));
Craig Tillerb39307d2016-06-30 15:39:13 -0700475 gpr_mu_init(&pi->mu);
476 pi->fd_cnt = 0;
477 pi->fd_capacity = 0;
Craig Tiller4782d922017-11-10 09:53:21 -0800478 pi->fds = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700479 pi->epoll_fd = -1;
Craig Tillerd8a3c042016-09-09 12:42:37 -0700480
Craig Tiller15007612016-07-06 09:36:16 -0700481 gpr_atm_rel_store(&pi->ref_count, 0);
Craig Tillerd8a3c042016-09-09 12:42:37 -0700482 gpr_atm_rel_store(&pi->poller_count, 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800483 gpr_atm_rel_store(&pi->merged_to, (gpr_atm) nullptr);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700484
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700485 pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
Sree Kuchibhotla41622a82016-06-13 16:43:14 -0700486
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700487 if (pi->epoll_fd < 0) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700488 append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
489 goto done;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700490 }
491
Craig Tiller4782d922017-11-10 09:53:21 -0800492 if (initial_fd != nullptr) {
Craig Tillerb39307d2016-06-30 15:39:13 -0700493 polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
Craig Tillerb39307d2016-06-30 15:39:13 -0700494 }
495
Craig Tillerb39307d2016-06-30 15:39:13 -0700496done:
497 if (*error != GRPC_ERROR_NONE) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800498 polling_island_delete(pi);
Craig Tiller4782d922017-11-10 09:53:21 -0800499 pi = nullptr;
Craig Tillerb39307d2016-06-30 15:39:13 -0700500 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700501 return pi;
502}
503
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800504static void polling_island_delete(polling_island* pi) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700505 GPR_ASSERT(pi->fd_cnt == 0);
506
Craig Tiller0a06cd72016-07-14 13:21:24 -0700507 if (pi->epoll_fd >= 0) {
508 close(pi->epoll_fd);
509 }
Craig Tillerb39307d2016-06-30 15:39:13 -0700510 gpr_mu_destroy(&pi->mu);
511 gpr_free(pi->fds);
512 gpr_free(pi);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700513}
514
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700515/* Attempts to gets the last polling island in the linked list (liked by the
516 * 'merged_to' field). Since this does not lock the polling island, there are no
517 * guarantees that the island returned is the last island */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700518static polling_island* polling_island_maybe_get_latest(polling_island* pi) {
519 polling_island* next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800520 while (next != nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700521 pi = next;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700522 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700523 }
524
525 return pi;
526}
527
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700528/* Gets the lock on the *latest* polling island i.e the last polling island in
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700529 the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700530 returned polling island's mu.
531 Usage: To lock/unlock polling island "pi", do the following:
532 polling_island *pi_latest = polling_island_lock(pi);
533 ...
534 ... critical section ..
535 ...
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700536 gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700537static polling_island* polling_island_lock(polling_island* pi) {
Craig Tiller4782d922017-11-10 09:53:21 -0800538 polling_island* next = nullptr;
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700539
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700540 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700541 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800542 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700543 /* Looks like 'pi' is the last node in the linked list but unless we check
544 this by holding the pi->mu lock, we cannot be sure (i.e without the
545 pi->mu lock, we don't prevent island merges).
546 To be absolutely sure, check once more by holding the pi->mu lock */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700547 gpr_mu_lock(&pi->mu);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700548 next = (polling_island*)gpr_atm_acq_load(&pi->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800549 if (next == nullptr) {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700550 /* pi is infact the last node and we have the pi->mu lock. we're done */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700551 break;
552 }
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700553
554 /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
555 * isn't the lock we are interested in. Continue traversing the list */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700556 gpr_mu_unlock(&pi->mu);
557 }
558
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700559 pi = next;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700560 }
561
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700562 return pi;
563}
564
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700565/* Gets the lock on the *latest* polling islands in the linked lists pointed by
566 *p and *q (and also updates *p and *q to point to the latest polling islands)
567
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700568 This function is needed because calling the following block of code to obtain
569 locks on polling islands (*p and *q) is prone to deadlocks.
570 {
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700571 polling_island_lock(*p, true);
572 polling_island_lock(*q, true);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700573 }
574
Sree Kuchibhotla229533b12016-06-21 20:42:52 -0700575 Usage/example:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700576 polling_island *p1;
577 polling_island *p2;
578 ..
579 polling_island_lock_pair(&p1, &p2);
580 ..
581 .. Critical section with both p1 and p2 locked
582 ..
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700583 // Release locks: Always call polling_island_unlock_pair() to release locks
584 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700585*/
Craig Tillerbaa14a92017-11-03 09:09:36 -0700586static void polling_island_lock_pair(polling_island** p, polling_island** q) {
587 polling_island* pi_1 = *p;
588 polling_island* pi_2 = *q;
Craig Tiller4782d922017-11-10 09:53:21 -0800589 polling_island* next_1 = nullptr;
590 polling_island* next_2 = nullptr;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700591
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700592 /* The algorithm is simple:
593 - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
594 keep updating pi_1 and pi_2)
595 - Then obtain locks on the islands by following a lock order rule of
596 locking polling_island with lower address first
597 Special case: Before obtaining the locks, check if pi_1 and pi_2 are
598 pointing to the same island. If that is the case, we can just call
599 polling_island_lock()
600 - After obtaining both the locks, double check that the polling islands
601 are still the last polling islands in their respective linked lists
602 (this is because there might have been polling island merges before
603 we got the lock)
604 - If the polling islands are the last islands, we are done. If not,
605 release the locks and continue the process from the first step */
606 while (true) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700607 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800608 while (next_1 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700609 pi_1 = next_1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700610 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700611 }
612
Craig Tillerbaa14a92017-11-03 09:09:36 -0700613 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800614 while (next_2 != nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700615 pi_2 = next_2;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700616 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700617 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700618
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700619 if (pi_1 == pi_2) {
620 pi_1 = pi_2 = polling_island_lock(pi_1);
621 break;
622 }
623
624 if (pi_1 < pi_2) {
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700625 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700626 gpr_mu_lock(&pi_2->mu);
627 } else {
628 gpr_mu_lock(&pi_2->mu);
629 gpr_mu_lock(&pi_1->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700630 }
631
Craig Tillerbaa14a92017-11-03 09:09:36 -0700632 next_1 = (polling_island*)gpr_atm_acq_load(&pi_1->merged_to);
633 next_2 = (polling_island*)gpr_atm_acq_load(&pi_2->merged_to);
Craig Tiller4782d922017-11-10 09:53:21 -0800634 if (next_1 == nullptr && next_2 == nullptr) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700635 break;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700636 }
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700637
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700638 gpr_mu_unlock(&pi_1->mu);
639 gpr_mu_unlock(&pi_2->mu);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700640 }
641
642 *p = pi_1;
643 *q = pi_2;
644}
645
Craig Tillerbaa14a92017-11-03 09:09:36 -0700646static void polling_island_unlock_pair(polling_island* p, polling_island* q) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700647 if (p == q) {
648 gpr_mu_unlock(&p->mu);
649 } else {
650 gpr_mu_unlock(&p->mu);
651 gpr_mu_unlock(&q->mu);
652 }
653}
654
Craig Tillerbaa14a92017-11-03 09:09:36 -0700655static polling_island* polling_island_merge(polling_island* p,
656 polling_island* q,
657 grpc_error** error) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700658 /* Get locks on both the polling islands */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700659 polling_island_lock_pair(&p, &q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700660
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700661 if (p != q) {
662 /* Make sure that p points to the polling island with fewer fds than q */
663 if (p->fd_cnt > q->fd_cnt) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700664 GPR_SWAP(polling_island*, p, q);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700665 }
666
667 /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
668 Note that the refcounts on the fds being moved will not change here.
669 This is why the last param in the following two functions is 'false') */
670 polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
671 polling_island_remove_all_fds_locked(p, false, error);
672
673 /* Wakeup all the pollers (if any) on p so that they pickup this change */
674 polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
675
676 /* Add the 'merged_to' link from p --> q */
677 gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
678 PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700679 }
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700680 /* else if p == q, nothing needs to be done */
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700681
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700682 polling_island_unlock_pair(p, q);
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700683
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700684 /* Return the merged polling island (Note that no merge would have happened
685 if p == q which is ok) */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700686 return q;
Sree Kuchibhotla9442bab2016-05-20 17:54:06 -0700687}
688
Craig Tillerbaa14a92017-11-03 09:09:36 -0700689static grpc_error* polling_island_global_init() {
690 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700691
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700692 error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
693 if (error == GRPC_ERROR_NONE) {
694 error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
695 }
696
697 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700698}
699
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700700static void polling_island_global_shutdown() {
Sree Kuchibhotla24b10622016-06-08 15:20:17 -0700701 grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700702}
703
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700704/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700705 * Fd Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700706 */
707
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700708/* We need to keep a freelist not because of any concerns of malloc performance
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700709 * but instead so that implementations with multiple threads in (for example)
710 * epoll_wait deal with the race between pollset removal and incoming poll
711 * notifications.
712 *
713 * The problem is that the poller ultimately holds a reference to this
714 * object, so it is very difficult to know when is safe to free it, at least
715 * without some expensive synchronization.
716 *
717 * If we keep the object freelisted, in the worst case losing this race just
718 * becomes a spurious read notification on a reused fd.
719 */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700720
721/* The alarm system needs to be able to wakeup 'some poller' sometimes
722 * (specifically when a new alarm needs to be triggered earlier than the next
723 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
724 * case occurs. */
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700725
Craig Tiller4782d922017-11-10 09:53:21 -0800726static grpc_fd* fd_freelist = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700727static gpr_mu fd_freelist_mu;
728
ncteisend39010e2017-06-08 17:08:07 -0700729#ifndef NDEBUG
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700730#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
731#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700732static void ref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700733 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800734 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700735 gpr_log(GPR_DEBUG,
736 "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700737 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700738 gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
739 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700740#else
741#define REF_BY(fd, n, reason) ref_by(fd, n)
742#define UNREF_BY(fd, n, reason) unref_by(fd, n)
Craig Tillerbaa14a92017-11-03 09:09:36 -0700743static void ref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700744#endif
745 GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
746}
747
ncteisend39010e2017-06-08 17:08:07 -0700748#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700749static void unref_by(grpc_fd* fd, int n, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700750 int line) {
ncteisen9ffb1492017-11-10 14:00:49 -0800751 if (grpc_trace_fd_refcount.enabled()) {
ncteisen973863d2017-06-12 10:28:50 -0700752 gpr_log(GPR_DEBUG,
753 "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]",
ncteisen3ac64f82017-06-19 17:35:44 -0700754 fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst),
ncteisend39010e2017-06-08 17:08:07 -0700755 gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
756 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700757#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700758static void unref_by(grpc_fd* fd, int n) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700759#endif
Noah Eisen264879f2017-06-20 17:14:47 -0700760 gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700761 if (old == n) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700762 /* Add the fd to the freelist */
763 gpr_mu_lock(&fd_freelist_mu);
764 fd->freelist_next = fd_freelist;
765 fd_freelist = fd;
766 grpc_iomgr_unregister_object(&fd->iomgr_object);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800767
yang-ged49fe52017-11-20 13:49:54 -0800768 fd->read_closure->DestroyEvent();
769 fd->write_closure->DestroyEvent();
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700770
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700771 gpr_mu_unlock(&fd_freelist_mu);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700772 } else {
773 GPR_ASSERT(old > n);
774 }
775}
776
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700777/* Increment refcount by two to avoid changing the orphan bit */
ncteisend39010e2017-06-08 17:08:07 -0700778#ifndef NDEBUG
Craig Tillerbaa14a92017-11-03 09:09:36 -0700779static void fd_ref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700780 int line) {
781 ref_by(fd, 2, reason, file, line);
782}
783
Craig Tillerbaa14a92017-11-03 09:09:36 -0700784static void fd_unref(grpc_fd* fd, const char* reason, const char* file,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700785 int line) {
786 unref_by(fd, 2, reason, file, line);
787}
788#else
Craig Tillerbaa14a92017-11-03 09:09:36 -0700789static void fd_ref(grpc_fd* fd) { ref_by(fd, 2); }
790static void fd_unref(grpc_fd* fd) { unref_by(fd, 2); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700791#endif
792
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700793static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
794
795static void fd_global_shutdown(void) {
796 gpr_mu_lock(&fd_freelist_mu);
797 gpr_mu_unlock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800798 while (fd_freelist != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700799 grpc_fd* fd = fd_freelist;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700800 fd_freelist = fd_freelist->freelist_next;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800801 gpr_mu_destroy(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700802 gpr_free(fd);
803 }
804 gpr_mu_destroy(&fd_freelist_mu);
805}
806
Craig Tillerbaa14a92017-11-03 09:09:36 -0700807static grpc_fd* fd_create(int fd, const char* name) {
Craig Tiller4782d922017-11-10 09:53:21 -0800808 grpc_fd* new_fd = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700809
810 gpr_mu_lock(&fd_freelist_mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800811 if (fd_freelist != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700812 new_fd = fd_freelist;
813 fd_freelist = fd_freelist->freelist_next;
814 }
815 gpr_mu_unlock(&fd_freelist_mu);
816
Craig Tiller4782d922017-11-10 09:53:21 -0800817 if (new_fd == nullptr) {
Noah Eisenbe82e642018-02-09 09:16:55 -0800818 new_fd = static_cast<grpc_fd*>(gpr_malloc(sizeof(grpc_fd)));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800819 gpr_mu_init(&new_fd->po.mu);
yang-g26521b32017-11-17 17:15:37 -0800820 new_fd->read_closure.Init();
821 new_fd->write_closure.Init();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700822 }
823
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800824 /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
825 * is a newly created fd (or an fd we got from the freelist), no one else
826 * would be holding a lock to it anyway. */
827 gpr_mu_lock(&new_fd->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -0800828 new_fd->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -0400829#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -0800830 new_fd->po.obj_type = POLL_OBJ_FD;
831#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700832
Sree Kuchibhotla0224dcc2016-06-22 18:04:00 -0700833 gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700834 new_fd->fd = fd;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700835 new_fd->orphaned = false;
yang-ged49fe52017-11-20 13:49:54 -0800836 new_fd->read_closure->InitEvent();
837 new_fd->write_closure->InitEvent();
Sree Kuchibhotla61fe0942017-02-14 12:26:56 -0800838 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
Sree Kuchibhotla82d73412017-02-09 18:27:45 -0800839
Craig Tiller4782d922017-11-10 09:53:21 -0800840 new_fd->freelist_next = nullptr;
841 new_fd->on_done_closure = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700842
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800843 gpr_mu_unlock(&new_fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700844
Craig Tillerbaa14a92017-11-03 09:09:36 -0700845 char* fd_name;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700846 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
847 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Sree Kuchibhotla6a295452016-06-23 15:53:10 -0700848 gpr_free(fd_name);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700849 return new_fd;
850}
851
Craig Tillerbaa14a92017-11-03 09:09:36 -0700852static int fd_wrapped_fd(grpc_fd* fd) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700853 int ret_fd = -1;
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800854 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700855 if (!fd->orphaned) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700856 ret_fd = fd->fd;
857 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800858 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700859
860 return ret_fd;
861}
862
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800863static void fd_orphan(grpc_fd* fd, grpc_closure* on_done, int* release_fd,
Craig Tillerbaa14a92017-11-03 09:09:36 -0700864 bool already_closed, const char* reason) {
865 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -0800866 polling_island* unref_pi = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -0700867
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800868 gpr_mu_lock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700869 fd->on_done_closure = on_done;
870
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700871 /* Remove the active status but keep referenced. We want this grpc_fd struct
872 to be alive (and not added to freelist) until the end of this function */
873 REF_BY(fd, 1, reason);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700874
875 /* Remove the fd from the polling island:
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700876 - Get a lock on the latest polling island (i.e the last island in the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800877 linked list pointed by fd->po.pi). This is the island that
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700878 would actually contain the fd
879 - Remove the fd from the latest polling island
880 - Unlock the latest polling island
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800881 - Set fd->po.pi to NULL (but remove the ref on the polling island
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700882 before doing this.) */
Craig Tiller4782d922017-11-10 09:53:21 -0800883 if (fd->po.pi != nullptr) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700884 polling_island* pi_latest = polling_island_lock(fd->po.pi);
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700885 polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -0700886 gpr_mu_unlock(&pi_latest->mu);
Sree Kuchibhotla79a62332016-06-04 14:01:03 -0700887
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800888 unref_pi = fd->po.pi;
Craig Tiller4782d922017-11-10 09:53:21 -0800889 fd->po.pi = nullptr;
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -0700890 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700891
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700892 /* If release_fd is not NULL, we should be relinquishing control of the file
893 descriptor fd->fd (but we still own the grpc_fd structure). */
Craig Tiller4782d922017-11-10 09:53:21 -0800894 if (release_fd != nullptr) {
Yuchen Zeng5b48dea2017-07-12 19:00:35 -0700895 *release_fd = fd->fd;
896 } else {
897 close(fd->fd);
898 }
899
900 fd->orphaned = true;
901
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800902 GRPC_CLOSURE_SCHED(fd->on_done_closure, GRPC_ERROR_REF(error));
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700903
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -0800904 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700905 UNREF_BY(fd, 2, reason); /* Drop the reference */
Craig Tiller4782d922017-11-10 09:53:21 -0800906 if (unref_pi != nullptr) {
Craig Tiller0a06cd72016-07-14 13:21:24 -0700907 /* Unref stale polling island here, outside the fd lock above.
908 The polling island owns a workqueue which owns an fd, and unreffing
909 inside the lock can cause an eventual lock loop that makes TSAN very
910 unhappy. */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800911 PI_UNREF(unref_pi, "fd_orphan");
Craig Tiller15007612016-07-06 09:36:16 -0700912 }
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700913 if (error != GRPC_ERROR_NONE) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700914 const char* msg = grpc_error_string(error);
Yuchen Zeng4ebace72017-06-05 17:24:06 -0700915 gpr_log(GPR_DEBUG, "fd_orphan: %s", msg);
916 }
Yuchen Zenga0399f22016-08-04 17:52:53 -0700917 GRPC_ERROR_UNREF(error);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700918}
919
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800920static grpc_pollset* fd_get_read_notifier_pollset(grpc_fd* fd) {
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -0800921 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700922 return (grpc_pollset*)notifier;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700923}
924
Craig Tillerbaa14a92017-11-03 09:09:36 -0700925static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800926 return fd->read_closure->IsShutdown();
Sree Kuchibhotla24b6eae2016-06-21 18:01:14 -0700927}
928
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700929/* Might be called multiple times */
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800930static void fd_shutdown(grpc_fd* fd, grpc_error* why) {
931 if (fd->read_closure->SetShutdown(GRPC_ERROR_REF(why))) {
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700932 shutdown(fd->fd, SHUT_RDWR);
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800933 fd->write_closure->SetShutdown(GRPC_ERROR_REF(why));
Sree Kuchibhotla0100b2f2016-06-21 17:38:13 -0700934 }
Craig Tiller376887d2017-04-06 08:27:03 -0700935 GRPC_ERROR_UNREF(why);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700936}
937
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800938static void fd_notify_on_read(grpc_fd* fd, grpc_closure* closure) {
939 fd->read_closure->NotifyOn(closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700940}
941
Yash Tibrewal8cf14702017-12-06 09:47:54 -0800942static void fd_notify_on_write(grpc_fd* fd, grpc_closure* closure) {
943 fd->write_closure->NotifyOn(closure);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700944}
945
946/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700947 * Pollset Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700948 */
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700949GPR_TLS_DECL(g_current_thread_pollset);
950GPR_TLS_DECL(g_current_thread_worker);
Craig Tiller19196992016-06-27 18:45:56 -0700951static __thread bool g_initialized_sigmask;
952static __thread sigset_t g_orig_sigmask;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700953
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700954static void sig_handler(int sig_num) {
Sree Kuchibhotlad627c102016-06-06 15:49:32 -0700955#ifdef GRPC_EPOLL_DEBUG
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700956 gpr_log(GPR_INFO, "Received signal %d", sig_num);
Sree Kuchibhotla9bc3d2d2016-06-06 10:27:56 -0700957#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700958}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700959
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -0700960static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700961
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700962/* Global state management */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700963static grpc_error* pollset_global_init(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700964 gpr_tls_init(&g_current_thread_pollset);
965 gpr_tls_init(&g_current_thread_worker);
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700966 poller_kick_init();
Craig Tillerc3571792017-05-02 12:33:38 -0700967 return GRPC_ERROR_NONE;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700968}
969
970static void pollset_global_shutdown(void) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -0700971 gpr_tls_destroy(&g_current_thread_pollset);
972 gpr_tls_destroy(&g_current_thread_worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700973}
974
Craig Tillerbaa14a92017-11-03 09:09:36 -0700975static grpc_error* pollset_worker_kick(grpc_pollset_worker* worker) {
976 grpc_error* err = GRPC_ERROR_NONE;
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700977
978 /* Kick the worker only if it was not already kicked */
Noah Eisen4d20a662018-02-09 09:34:04 -0800979 if (gpr_atm_no_barrier_cas(&worker->is_kicked, static_cast<gpr_atm>(0),
980 static_cast<gpr_atm>(1))) {
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700981 GRPC_POLLING_TRACE(
982 "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
Craig Tillerbaa14a92017-11-03 09:09:36 -0700983 (void*)worker, (long int)worker->pt_id);
Sree Kuchibhotla34217242016-06-29 00:19:07 -0700984 int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
985 if (err_num != 0) {
986 err = GRPC_OS_ERROR(err_num, "pthread_kill");
987 }
Sree Kuchibhotla3131c262016-06-21 17:28:28 -0700988 }
989 return err;
Sree Kuchibhotla5855c472016-06-08 12:56:56 -0700990}
991
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700992/* Return 1 if the pollset has active threads in pollset_work (pollset must
993 * be locked) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700994static int pollset_has_workers(grpc_pollset* p) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -0700995 return p->root_worker.next != &p->root_worker;
996}
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700997
Craig Tillerbaa14a92017-11-03 09:09:36 -0700998static void remove_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -0700999 worker->prev->next = worker->next;
1000 worker->next->prev = worker->prev;
1001}
1002
Craig Tillerbaa14a92017-11-03 09:09:36 -07001003static grpc_pollset_worker* pop_front_worker(grpc_pollset* p) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001004 if (pollset_has_workers(p)) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001005 grpc_pollset_worker* w = p->root_worker.next;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001006 remove_worker(p, w);
1007 return w;
1008 } else {
Craig Tiller4782d922017-11-10 09:53:21 -08001009 return nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001010 }
1011}
1012
Craig Tillerbaa14a92017-11-03 09:09:36 -07001013static void push_back_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001014 worker->next = &p->root_worker;
1015 worker->prev = worker->next->prev;
1016 worker->prev->next = worker->next->prev = worker;
1017}
1018
Craig Tillerbaa14a92017-11-03 09:09:36 -07001019static void push_front_worker(grpc_pollset* p, grpc_pollset_worker* worker) {
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001020 worker->prev = &p->root_worker;
1021 worker->next = worker->prev->next;
1022 worker->prev->next = worker->next->prev = worker;
1023}
1024
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001025/* p->mu must be held before calling this function */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001026static grpc_error* pollset_kick(grpc_pollset* p,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001027 grpc_pollset_worker* specific_worker) {
yang-gce1cfea2018-01-31 15:59:50 -08001028 GPR_TIMER_SCOPE("pollset_kick", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001029 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001030 GRPC_STATS_INC_POLLSET_KICK();
Craig Tillerbaa14a92017-11-03 09:09:36 -07001031 const char* err_desc = "Kick Failure";
1032 grpc_pollset_worker* worker = specific_worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001033 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001034 if (worker == GRPC_POLLSET_KICK_BROADCAST) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001035 if (pollset_has_workers(p)) {
yang-gce1cfea2018-01-31 15:59:50 -08001036 GPR_TIMER_SCOPE("pollset_kick.broadcast", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001037 for (worker = p->root_worker.next; worker != &p->root_worker;
1038 worker = worker->next) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001039 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001040 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001041 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001042 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001043 } else {
1044 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001045 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001046 } else {
1047 GPR_TIMER_MARK("kicked_specifically", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001048 if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001049 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001050 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001051 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001052 } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
1053 /* Since worker == NULL, it means that we can kick "any" worker on this
1054 pollset 'p'. If 'p' happens to be the same pollset this thread is
1055 currently polling (i.e in pollset_work() function), then there is no need
1056 to kick any other worker since the current thread can just absorb the
1057 kick. This is the reason why we enter this case only when
1058 g_current_thread_pollset is != p */
1059
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001060 GPR_TIMER_MARK("kick_anonymous", 0);
1061 worker = pop_front_worker(p);
Craig Tiller4782d922017-11-10 09:53:21 -08001062 if (worker != nullptr) {
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001063 GPR_TIMER_MARK("finally_kick", 0);
1064 push_back_worker(p, worker);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001065 append_error(&error, pollset_worker_kick(worker), err_desc);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001066 } else {
1067 GPR_TIMER_MARK("kicked_no_pollers", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001068 p->kicked_without_pollers = true;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001069 }
1070 }
1071
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001072 GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
1073 return error;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001074}
1075
Craig Tillerbaa14a92017-11-03 09:09:36 -07001076static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001077 gpr_mu_init(&pollset->po.mu);
1078 *mu = &pollset->po.mu;
Craig Tiller4782d922017-11-10 09:53:21 -08001079 pollset->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001080#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001081 pollset->po.obj_type = POLL_OBJ_POLLSET;
1082#endif
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001083
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001084 pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001085 pollset->kicked_without_pollers = false;
1086
1087 pollset->shutting_down = false;
1088 pollset->finish_shutdown_called = false;
Craig Tiller4782d922017-11-10 09:53:21 -08001089 pollset->shutdown_done = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001090}
1091
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001092static int poll_deadline_to_millis_timeout(grpc_millis millis) {
Craig Tiller20397792017-07-18 11:35:27 -07001093 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001094 grpc_millis delta = millis - grpc_core::ExecCtx::Get()->Now();
Craig Tiller20397792017-07-18 11:35:27 -07001095 if (delta > INT_MAX)
1096 return INT_MAX;
1097 else if (delta < 0)
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001098 return 0;
Craig Tiller20397792017-07-18 11:35:27 -07001099 else
Noah Eisenbe82e642018-02-09 09:16:55 -08001100 return static_cast<int>(delta);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001101}
1102
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001103static void fd_become_readable(grpc_fd* fd, grpc_pollset* notifier) {
1104 fd->read_closure->SetReady();
Sree Kuchibhotla82d73412017-02-09 18:27:45 -08001105
Sree Kuchibhotla4db8c822017-02-12 17:07:31 -08001106 /* Note, it is possible that fd_become_readable might be called twice with
Sree Kuchibhotla4c60d0d2017-02-12 17:09:08 -08001107 different 'notifier's when an fd becomes readable and it is in two epoll
1108 sets (This can happen briefly during polling island merges). In such cases
1109 it does not really matter which notifer is set as the read_notifier_pollset
1110 (They would both point to the same polling island anyway) */
Sree Kuchibhotlafb7ced62017-02-15 18:31:57 -08001111 /* Use release store to match with acquire load in fd_get_read_notifier */
1112 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001113}
1114
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001115static void fd_become_writable(grpc_fd* fd) { fd->write_closure->SetReady(); }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001116
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001117static void pollset_release_polling_island(grpc_pollset* ps,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001118 const char* reason) {
Craig Tiller4782d922017-11-10 09:53:21 -08001119 if (ps->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001120 PI_UNREF(ps->po.pi, reason);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001121 }
Craig Tiller4782d922017-11-10 09:53:21 -08001122 ps->po.pi = nullptr;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001123}
1124
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001125static void finish_shutdown_locked(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001126 /* The pollset cannot have any workers if we are at this stage */
1127 GPR_ASSERT(!pollset_has_workers(pollset));
1128
1129 pollset->finish_shutdown_called = true;
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001130
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001131 /* Release the ref and set pollset->po.pi to NULL */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001132 pollset_release_polling_island(pollset, "ps_shutdown");
1133 GRPC_CLOSURE_SCHED(pollset->shutdown_done, GRPC_ERROR_NONE);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001134}
1135
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001136/* pollset->po.mu lock must be held by the caller before calling this */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001137static void pollset_shutdown(grpc_pollset* pollset, grpc_closure* closure) {
yang-gce1cfea2018-01-31 15:59:50 -08001138 GPR_TIMER_SCOPE("pollset_shutdown", 0);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001139 GPR_ASSERT(!pollset->shutting_down);
1140 pollset->shutting_down = true;
1141 pollset->shutdown_done = closure;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001142 pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001143
1144 /* If the pollset has any workers, we cannot call finish_shutdown_locked()
1145 because it would release the underlying polling island. In such a case, we
1146 let the last worker call finish_shutdown_locked() from pollset_work() */
1147 if (!pollset_has_workers(pollset)) {
1148 GPR_ASSERT(!pollset->finish_shutdown_called);
1149 GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001150 finish_shutdown_locked(pollset);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001151 }
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001152}
1153
1154/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
1155 * than destroying the mutexes, there is nothing special that needs to be done
1156 * here */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001157static void pollset_destroy(grpc_pollset* pollset) {
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001158 GPR_ASSERT(!pollset_has_workers(pollset));
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001159 gpr_mu_destroy(&pollset->po.mu);
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001160}
1161
Craig Tiller84ea3412016-09-08 14:57:56 -07001162#define GRPC_EPOLL_MAX_EVENTS 100
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001163/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001164static void pollset_work_and_unlock(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001165 grpc_pollset_worker* worker, int timeout_ms,
1166 sigset_t* sig_mask, grpc_error** error) {
yang-gce1cfea2018-01-31 15:59:50 -08001167 GPR_TIMER_SCOPE("pollset_work_and_unlock", 0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001168 struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001169 int epoll_fd = -1;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001170 int ep_rv;
Craig Tiller4782d922017-11-10 09:53:21 -08001171 polling_island* pi = nullptr;
Craig Tillerbaa14a92017-11-03 09:09:36 -07001172 char* err_msg;
1173 const char* err_desc = "pollset_work_and_unlock";
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001174
1175 /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001176 latest polling island pointed by pollset->po.pi
Sree Kuchibhotla229533b12016-06-21 20:42:52 -07001177
1178 Since epoll_fd is immutable, we can read it without obtaining the polling
1179 island lock. There is however a possibility that the polling island (from
1180 which we got the epoll_fd) got merged with another island while we are
1181 in this function. This is still okay because in such a case, we will wakeup
1182 right-away from epoll_wait() and pick up the latest polling_island the next
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001183 this function (i.e pollset_work_and_unlock()) is called */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001184
Craig Tiller4782d922017-11-10 09:53:21 -08001185 if (pollset->po.pi == nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001186 pollset->po.pi = polling_island_create(nullptr, error);
Craig Tiller4782d922017-11-10 09:53:21 -08001187 if (pollset->po.pi == nullptr) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001188 return; /* Fatal error. We cannot continue */
1189 }
1190
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001191 PI_ADD_REF(pollset->po.pi, "ps");
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001192 GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001193 (void*)pollset, (void*)pollset->po.pi);
Sree Kuchibhotla88ee12f2016-06-03 19:26:48 -07001194 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001195
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001196 pi = polling_island_maybe_get_latest(pollset->po.pi);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001197 epoll_fd = pi->epoll_fd;
1198
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001199 /* Update the pollset->po.pi since the island being pointed by
1200 pollset->po.pi maybe older than the one pointed by pi) */
1201 if (pollset->po.pi != pi) {
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001202 /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
1203 polling island to be deleted */
1204 PI_ADD_REF(pi, "ps");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001205 PI_UNREF(pollset->po.pi, "ps");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001206 pollset->po.pi = pi;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001207 }
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001208
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001209 /* Add an extra ref so that the island does not get destroyed (which means
1210 the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
1211 epoll_fd */
1212 PI_ADD_REF(pi, "ps_work");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001213 gpr_mu_unlock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001214
Craig Tiller61f96c12017-05-12 13:36:39 -07001215 gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
1216 g_current_thread_polling_island = pi;
Craig Tillerd8a3c042016-09-09 12:42:37 -07001217
Craig Tiller61f96c12017-05-12 13:36:39 -07001218 GRPC_SCHEDULING_START_BLOCKING_REGION;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001219 GRPC_STATS_INC_SYSCALL_POLL();
Craig Tiller61f96c12017-05-12 13:36:39 -07001220 ep_rv =
1221 epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001222 GRPC_SCHEDULING_END_BLOCKING_REGION;
Craig Tiller61f96c12017-05-12 13:36:39 -07001223 if (ep_rv < 0) {
1224 if (errno != EINTR) {
1225 gpr_asprintf(&err_msg,
1226 "epoll_wait() epoll fd: %d failed with error: %d (%s)",
1227 epoll_fd, errno, strerror(errno));
1228 append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
1229 } else {
1230 /* We were interrupted. Save an interation by doing a zero timeout
1231 epoll_wait to see if there are any other events of interest */
1232 GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001233 (void*)pollset, (void*)worker);
Craig Tiller61f96c12017-05-12 13:36:39 -07001234 ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001235 }
Craig Tiller61f96c12017-05-12 13:36:39 -07001236 }
Sree Kuchibhotla79a62332016-06-04 14:01:03 -07001237
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001238#ifdef GRPC_TSAN
Craig Tiller61f96c12017-05-12 13:36:39 -07001239 /* See the definition of g_poll_sync for more details */
1240 gpr_atm_acq_load(&g_epoll_sync);
Sree Kuchibhotlaad2c4772016-06-13 19:06:54 -07001241#endif /* defined(GRPC_TSAN) */
Sree Kuchibhotla41622a82016-06-13 16:43:14 -07001242
Craig Tiller61f96c12017-05-12 13:36:39 -07001243 for (int i = 0; i < ep_rv; ++i) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001244 void* data_ptr = ep_ev[i].data.ptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001245 if (data_ptr == &polling_island_wakeup_fd) {
1246 GRPC_POLLING_TRACE(
1247 "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
1248 "%d) got merged",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001249 (void*)pollset, (void*)worker, epoll_fd);
Craig Tiller61f96c12017-05-12 13:36:39 -07001250 /* This means that our polling island is merged with a different
1251 island. We do not have to do anything here since the subsequent call
1252 to the function pollset_work_and_unlock() will pick up the correct
1253 epoll_fd */
1254 } else {
Noah Eisenbe82e642018-02-09 09:16:55 -08001255 grpc_fd* fd = static_cast<grpc_fd*>(data_ptr);
Craig Tiller61f96c12017-05-12 13:36:39 -07001256 int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
1257 int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
1258 int write_ev = ep_ev[i].events & EPOLLOUT;
1259 if (read_ev || cancel) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001260 fd_become_readable(fd, pollset);
Craig Tiller61f96c12017-05-12 13:36:39 -07001261 }
1262 if (write_ev || cancel) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001263 fd_become_writable(fd);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001264 }
Sree Kuchibhotlae5012ba2016-06-06 16:01:45 -07001265 }
Craig Tillerd8a3c042016-09-09 12:42:37 -07001266 }
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001267
Craig Tiller4782d922017-11-10 09:53:21 -08001268 g_current_thread_polling_island = nullptr;
Craig Tiller61f96c12017-05-12 13:36:39 -07001269 gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
1270
Craig Tiller4782d922017-11-10 09:53:21 -08001271 GPR_ASSERT(pi != nullptr);
Sree Kuchibhotla24b10622016-06-08 15:20:17 -07001272
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001273 /* Before leaving, release the extra ref we added to the polling island. It
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001274 is important to use "pi" here (i.e our old copy of pollset->po.pi
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001275 that we got before releasing the polling island lock). This is because
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001276 pollset->po.pi pointer might get udpated in other parts of the
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001277 code when there is an island merge while we are doing epoll_wait() above */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001278 PI_UNREF(pi, "ps_work");
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001279}
1280
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001281/* pollset->po.mu lock must be held by the caller before calling this.
1282 The function pollset_work() may temporarily release the lock (pollset->po.mu)
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001283 during the course of its execution but it will always re-acquire the lock and
1284 ensure that it is held by the time the function returns */
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001285static grpc_error* pollset_work(grpc_pollset* pollset,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001286 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -07001287 grpc_millis deadline) {
yang-gce1cfea2018-01-31 15:59:50 -08001288 GPR_TIMER_SCOPE("pollset_work", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -07001289 grpc_error* error = GRPC_ERROR_NONE;
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001290 int timeout_ms = poll_deadline_to_millis_timeout(deadline);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001291
1292 sigset_t new_mask;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001293
1294 grpc_pollset_worker worker;
Craig Tiller4782d922017-11-10 09:53:21 -08001295 worker.next = worker.prev = nullptr;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001296 worker.pt_id = pthread_self();
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001297 gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001298
Craig Tiller557c88c2017-04-05 17:20:18 -07001299 if (worker_hdl) *worker_hdl = &worker;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001300
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001301 gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
1302 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001303
1304 if (pollset->kicked_without_pollers) {
1305 /* If the pollset was kicked without pollers, pretend that the current
1306 worker got the kick and skip polling. A kick indicates that there is some
1307 work that needs attention like an event on the completion queue or an
1308 alarm */
1309 GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
1310 pollset->kicked_without_pollers = 0;
1311 } else if (!pollset->shutting_down) {
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001312 /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001313 (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
1314 worker that there is some pending work that needs immediate attention
1315 (like an event on the completion queue, or a polling island merge that
1316 results in a new epoll-fd to wait on) and that the worker should not
1317 spend time waiting in epoll_pwait().
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001318
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001319 A worker can be kicked anytime from the point it is added to the pollset
1320 via push_front_worker() (or push_back_worker()) to the point it is
1321 removed via remove_worker().
1322 If the worker is kicked before/during it calls epoll_pwait(), it should
1323 immediately exit from epoll_wait(). If the worker is kicked after it
1324 returns from epoll_wait(), then nothing really needs to be done.
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001325
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001326 To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001327 times *except* when it is in epoll_pwait(). This way, the worker never
1328 misses acting on a kick */
1329
Craig Tiller19196992016-06-27 18:45:56 -07001330 if (!g_initialized_sigmask) {
1331 sigemptyset(&new_mask);
1332 sigaddset(&new_mask, grpc_wakeup_signal);
1333 pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask);
1334 sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
1335 g_initialized_sigmask = true;
1336 /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
1337 This is the mask used at all times *except during
1338 epoll_wait()*"
1339 g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
Craig Tiller510ff692016-06-27 20:31:49 -07001340 this is the mask to use *during epoll_wait()*
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001341
Craig Tiller19196992016-06-27 18:45:56 -07001342 The new_mask is set on the worker before it is added to the pollset
Craig Tiller510ff692016-06-27 20:31:49 -07001343 (i.e before it can be kicked) */
Craig Tiller19196992016-06-27 18:45:56 -07001344 }
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001345
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001346 push_front_worker(pollset, &worker); /* Add worker to pollset */
1347
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001348 pollset_work_and_unlock(pollset, &worker, timeout_ms, &g_orig_sigmask,
1349 &error);
1350 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001351
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001352 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla34217242016-06-29 00:19:07 -07001353
1354 /* Note: There is no need to reset worker.is_kicked to 0 since we are no
1355 longer going to use this worker */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001356 remove_worker(pollset, &worker);
1357 }
1358
1359 /* If we are the last worker on the pollset (i.e pollset_has_workers() is
1360 false at this point) and the pollset is shutting down, we may have to
1361 finish the shutdown process by calling finish_shutdown_locked().
1362 See pollset_shutdown() for more details.
1363
1364 Note: Continuing to access pollset here is safe; it is the caller's
1365 responsibility to not destroy a pollset when it has outstanding calls to
1366 pollset_work() */
1367 if (pollset->shutting_down && !pollset_has_workers(pollset) &&
1368 !pollset->finish_shutdown_called) {
1369 GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001370 finish_shutdown_locked(pollset);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001371
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001372 gpr_mu_unlock(&pollset->po.mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001373 grpc_core::ExecCtx::Get()->Flush();
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001374 gpr_mu_lock(&pollset->po.mu);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001375 }
1376
Craig Tiller4782d922017-11-10 09:53:21 -08001377 if (worker_hdl) *worker_hdl = nullptr;
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001378
Sree Kuchibhotla8e4926c2016-06-08 20:33:19 -07001379 gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
1380 gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001381
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001382 GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
1383 return error;
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001384}
1385
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001386static void add_poll_object(poll_obj* bag, poll_obj_type bag_type,
1387 poll_obj* item, poll_obj_type item_type) {
yang-gce1cfea2018-01-31 15:59:50 -08001388 GPR_TIMER_SCOPE("add_poll_object", 0);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001389
ncteisene9cd8a82017-06-29 06:03:52 -04001390#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001391 GPR_ASSERT(item->obj_type == item_type);
1392 GPR_ASSERT(bag->obj_type == bag_type);
1393#endif
Craig Tiller57726ca2016-09-12 11:59:45 -07001394
Craig Tillerbaa14a92017-11-03 09:09:36 -07001395 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4782d922017-11-10 09:53:21 -08001396 polling_island* pi_new = nullptr;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001397
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001398 gpr_mu_lock(&bag->mu);
1399 gpr_mu_lock(&item->mu);
1400
Craig Tiller7212c232016-07-06 13:11:09 -07001401retry:
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001402 /*
1403 * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
1404 * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
1405 * a refcount of 2) and point item->pi and bag->pi to the new island
1406 * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
1407 * the other's non-NULL pi
1408 * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
1409 * polling islands and update item->pi and bag->pi to point to the new
1410 * island
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001411 */
Craig Tiller8e8027b2016-07-07 10:42:09 -07001412
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001413 /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
1414 * orphaned */
1415 if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
1416 gpr_mu_unlock(&item->mu);
1417 gpr_mu_unlock(&bag->mu);
Craig Tiller42ac6db2016-07-06 17:13:56 -07001418 return;
1419 }
1420
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001421 if (item->pi == bag->pi) {
1422 pi_new = item->pi;
Craig Tiller4782d922017-11-10 09:53:21 -08001423 if (pi_new == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001424 /* GPR_ASSERT(item->pi == bag->pi == NULL) */
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001425
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001426 /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
1427 * we need to do some extra work to make TSAN happy */
1428 if (item_type == POLL_OBJ_FD) {
1429 /* Unlock before creating a new polling island: the polling island will
1430 create a workqueue which creates a file descriptor, and holding an fd
1431 lock here can eventually cause a loop to appear to TSAN (making it
1432 unhappy). We don't think it's a real loop (there's an epoch point
1433 where that loop possibility disappears), but the advantages of
1434 keeping TSAN happy outweigh any performance advantage we might have
1435 by keeping the lock held. */
1436 gpr_mu_unlock(&item->mu);
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001437 pi_new = polling_island_create(FD_FROM_PO(item), &error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001438 gpr_mu_lock(&item->mu);
Sree Kuchibhotlaa129adf2016-10-26 16:44:44 -07001439
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001440 /* Need to reverify any assumptions made between the initial lock and
1441 getting to this branch: if they've changed, we need to throw away our
1442 work and figure things out again. */
Craig Tiller4782d922017-11-10 09:53:21 -08001443 if (item->pi != nullptr) {
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001444 GRPC_POLLING_TRACE(
1445 "add_poll_object: Raced creating new polling island. pi_new: %p "
1446 "(fd: %d, %s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001447 (void*)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
1448 (void*)bag);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001449 /* No need to lock 'pi_new' here since this is a new polling island
Sree Kuchibhotla8214b872017-02-23 12:49:48 -08001450 and no one has a reference to it yet */
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001451 polling_island_remove_all_fds_locked(pi_new, true, &error);
1452
1453 /* Ref and unref so that the polling island gets deleted during unref
1454 */
1455 PI_ADD_REF(pi_new, "dance_of_destruction");
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001456 PI_UNREF(pi_new, "dance_of_destruction");
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001457 goto retry;
1458 }
Craig Tiller27da6422016-07-06 13:14:46 -07001459 } else {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001460 pi_new = polling_island_create(nullptr, &error);
Craig Tiller7212c232016-07-06 13:11:09 -07001461 }
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001462
1463 GRPC_POLLING_TRACE(
1464 "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
1465 "%s: %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001466 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1467 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001468 } else {
1469 GRPC_POLLING_TRACE(
1470 "add_poll_object: Same polling island. pi: %p (%s, %s)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001471 (void*)pi_new, poll_obj_string(item_type), poll_obj_string(bag_type));
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001472 }
Craig Tiller4782d922017-11-10 09:53:21 -08001473 } else if (item->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001474 /* GPR_ASSERT(bag->pi != NULL) */
1475 /* Make pi_new point to latest pi*/
1476 pi_new = polling_island_lock(bag->pi);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001477
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001478 if (item_type == POLL_OBJ_FD) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001479 grpc_fd* fd = FD_FROM_PO(item);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001480 polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
1481 }
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001482
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001483 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001484 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001485 "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
1486 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001487 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1488 poll_obj_string(bag_type), (void*)bag);
Craig Tiller4782d922017-11-10 09:53:21 -08001489 } else if (bag->pi == nullptr) {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001490 /* GPR_ASSERT(item->pi != NULL) */
1491 /* Make pi_new to point to latest pi */
1492 pi_new = polling_island_lock(item->pi);
1493 gpr_mu_unlock(&pi_new->mu);
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001494 GRPC_POLLING_TRACE(
1495 "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
1496 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001497 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1498 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotla5098f912016-05-31 10:58:17 -07001499 } else {
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001500 pi_new = polling_island_merge(item->pi, bag->pi, &error);
Sree Kuchibhotla1e776682016-06-28 14:09:26 -07001501 GRPC_POLLING_TRACE(
Sree Kuchibhotla499b94b2016-11-18 14:35:47 -08001502 "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
1503 "bag(%s): %p)",
Craig Tillerbaa14a92017-11-03 09:09:36 -07001504 (void*)pi_new, poll_obj_string(item_type), (void*)item,
1505 poll_obj_string(bag_type), (void*)bag);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001506 }
1507
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001508 /* At this point, pi_new is the polling island that both item->pi and bag->pi
1509 MUST be pointing to */
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001510
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001511 if (item->pi != pi_new) {
1512 PI_ADD_REF(pi_new, poll_obj_string(item_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001513 if (item->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001514 PI_UNREF(item->pi, poll_obj_string(item_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001515 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001516 item->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001517 }
1518
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001519 if (bag->pi != pi_new) {
1520 PI_ADD_REF(pi_new, poll_obj_string(bag_type));
Craig Tiller4782d922017-11-10 09:53:21 -08001521 if (bag->pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001522 PI_UNREF(bag->pi, poll_obj_string(bag_type));
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001523 }
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001524 bag->pi = pi_new;
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001525 }
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001526
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001527 gpr_mu_unlock(&item->mu);
1528 gpr_mu_unlock(&bag->mu);
Craig Tiller15007612016-07-06 09:36:16 -07001529
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001530 GRPC_LOG_IF_ERROR("add_poll_object", error);
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001531}
Craig Tiller57726ca2016-09-12 11:59:45 -07001532
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001533static void pollset_add_fd(grpc_pollset* pollset, grpc_fd* fd) {
1534 add_poll_object(&pollset->po, POLL_OBJ_POLLSET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001535}
1536
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001537/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001538 * Pollset-set Definitions
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001539 */
1540
Craig Tillerbaa14a92017-11-03 09:09:36 -07001541static grpc_pollset_set* pollset_set_create(void) {
Noah Eisen4d20a662018-02-09 09:34:04 -08001542 grpc_pollset_set* pss =
1543 static_cast<grpc_pollset_set*>(gpr_malloc(sizeof(*pss)));
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001544 gpr_mu_init(&pss->po.mu);
Craig Tiller4782d922017-11-10 09:53:21 -08001545 pss->po.pi = nullptr;
ncteisene9cd8a82017-06-29 06:03:52 -04001546#ifndef NDEBUG
Sree Kuchibhotlaa0749a62016-11-18 20:22:09 -08001547 pss->po.obj_type = POLL_OBJ_POLLSET_SET;
1548#endif
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001549 return pss;
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001550}
1551
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001552static void pollset_set_destroy(grpc_pollset_set* pss) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001553 gpr_mu_destroy(&pss->po.mu);
1554
Craig Tiller4782d922017-11-10 09:53:21 -08001555 if (pss->po.pi != nullptr) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001556 PI_UNREF(pss->po.pi, "pss_destroy");
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001557 }
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001558
1559 gpr_free(pss);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001560}
1561
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001562static void pollset_set_add_fd(grpc_pollset_set* pss, grpc_fd* fd) {
1563 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &fd->po, POLL_OBJ_FD);
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001564}
1565
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001566static void pollset_set_del_fd(grpc_pollset_set* pss, grpc_fd* fd) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001567 /* Nothing to do */
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001568}
1569
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001570static void pollset_set_add_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
1571 add_poll_object(&pss->po, POLL_OBJ_POLLSET_SET, &ps->po, POLL_OBJ_POLLSET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001572}
1573
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001574static void pollset_set_del_pollset(grpc_pollset_set* pss, grpc_pollset* ps) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001575 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001576}
1577
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001578static void pollset_set_add_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001579 grpc_pollset_set* item) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001580 add_poll_object(&bag->po, POLL_OBJ_POLLSET_SET, &item->po,
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001581 POLL_OBJ_POLLSET_SET);
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001582}
1583
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001584static void pollset_set_del_pollset_set(grpc_pollset_set* bag,
Craig Tillerbaa14a92017-11-03 09:09:36 -07001585 grpc_pollset_set* item) {
Sree Kuchibhotla2385fd72016-11-18 16:30:41 -08001586 /* Nothing to do */
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001587}
1588
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001589/* Test helper functions
1590 * */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001591void* grpc_fd_get_polling_island(grpc_fd* fd) {
1592 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001593
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001594 gpr_mu_lock(&fd->po.mu);
1595 pi = fd->po.pi;
1596 gpr_mu_unlock(&fd->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001597
1598 return pi;
1599}
1600
Craig Tillerbaa14a92017-11-03 09:09:36 -07001601void* grpc_pollset_get_polling_island(grpc_pollset* ps) {
1602 polling_island* pi;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001603
Sree Kuchibhotlaf6f33d72016-11-18 11:38:52 -08001604 gpr_mu_lock(&ps->po.mu);
1605 pi = ps->po.pi;
1606 gpr_mu_unlock(&ps->po.mu);
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001607
1608 return pi;
1609}
1610
Craig Tillerbaa14a92017-11-03 09:09:36 -07001611bool grpc_are_polling_islands_equal(void* p, void* q) {
Noah Eisenbe82e642018-02-09 09:16:55 -08001612 polling_island* p1 = static_cast<polling_island*>(p);
1613 polling_island* p2 = static_cast<polling_island*>(q);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001614
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001615 /* Note: polling_island_lock_pair() may change p1 and p2 to point to the
1616 latest polling islands in their respective linked lists */
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001617 polling_island_lock_pair(&p1, &p2);
Sree Kuchibhotla20d0a162016-06-23 15:14:03 -07001618 polling_island_unlock_pair(p1, p2);
Sree Kuchibhotla2f8ade02016-06-17 13:28:38 -07001619
1620 return p1 == p2;
Sree Kuchibhotla2e12db92016-06-16 16:53:59 -07001621}
1622
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001623/*******************************************************************************
Sree Kuchibhotla0bcbd792016-06-01 15:43:03 -07001624 * Event engine binding
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001625 */
1626
1627static void shutdown_engine(void) {
1628 fd_global_shutdown();
1629 pollset_global_shutdown();
Sree Kuchibhotlad627c102016-06-06 15:49:32 -07001630 polling_island_global_shutdown();
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001631}
1632
1633static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001634 sizeof(grpc_pollset),
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001635
Yash Tibrewal533d1182017-09-18 10:48:22 -07001636 fd_create,
1637 fd_wrapped_fd,
1638 fd_orphan,
1639 fd_shutdown,
1640 fd_notify_on_read,
1641 fd_notify_on_write,
1642 fd_is_shutdown,
1643 fd_get_read_notifier_pollset,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001644
Yash Tibrewal533d1182017-09-18 10:48:22 -07001645 pollset_init,
1646 pollset_shutdown,
1647 pollset_destroy,
1648 pollset_work,
1649 pollset_kick,
1650 pollset_add_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001651
Yash Tibrewal533d1182017-09-18 10:48:22 -07001652 pollset_set_create,
1653 pollset_set_destroy,
1654 pollset_set_add_pollset,
1655 pollset_set_del_pollset,
1656 pollset_set_add_pollset_set,
1657 pollset_set_del_pollset_set,
1658 pollset_set_add_fd,
1659 pollset_set_del_fd,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001660
Yash Tibrewal533d1182017-09-18 10:48:22 -07001661 shutdown_engine,
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001662};
1663
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001664/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
1665 * Create a dummy epoll_fd to make sure epoll support is available */
1666static bool is_epoll_available() {
1667 int fd = epoll_create1(EPOLL_CLOEXEC);
1668 if (fd < 0) {
1669 gpr_log(
1670 GPR_ERROR,
1671 "epoll_create1 failed with error: %d. Not using epoll polling engine",
1672 fd);
1673 return false;
1674 }
1675 close(fd);
1676 return true;
1677}
1678
Craig Tillerbaa14a92017-11-03 09:09:36 -07001679const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001680 bool explicit_request) {
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001681 /* If use of signals is disabled, we cannot use epoll engine*/
1682 if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
yang-g30101b02017-11-06 14:35:30 -08001683 gpr_log(GPR_ERROR, "Skipping epollsig because use of signals is disabled.");
Craig Tiller4782d922017-11-10 09:53:21 -08001684 return nullptr;
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001685 }
1686
Ken Paysoncd7d0472016-10-11 12:24:20 -07001687 if (!grpc_has_wakeup_fd()) {
yang-g30101b02017-11-06 14:35:30 -08001688 gpr_log(GPR_ERROR, "Skipping epollsig because of no wakeup fd.");
Craig Tiller4782d922017-11-10 09:53:21 -08001689 return nullptr;
Ken Paysonbc544be2016-10-06 19:23:47 -07001690 }
1691
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001692 if (!is_epoll_available()) {
yang-g30101b02017-11-06 14:35:30 -08001693 gpr_log(GPR_ERROR, "Skipping epollsig because epoll is unavailable.");
Craig Tiller4782d922017-11-10 09:53:21 -08001694 return nullptr;
Sree Kuchibhotla72744022016-06-09 09:42:06 -07001695 }
1696
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001697 if (!is_grpc_wakeup_signal_initialized) {
Sree Kuchibhotla0fda8802017-08-30 20:34:51 -07001698 if (explicit_request) {
Craig Tillerf8382b82017-04-27 15:09:48 -07001699 grpc_use_signal(SIGRTMIN + 6);
1700 } else {
yang-g30101b02017-11-06 14:35:30 -08001701 gpr_log(GPR_ERROR,
1702 "Skipping epollsig because uninitialized wakeup signal.");
Craig Tiller4782d922017-11-10 09:53:21 -08001703 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001704 }
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001705 }
1706
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001707 fd_global_init();
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001708
1709 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001710 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001711 }
1712
1713 if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
1714 polling_island_global_init())) {
Craig Tiller4782d922017-11-10 09:53:21 -08001715 return nullptr;
Sree Kuchibhotla3131c262016-06-21 17:28:28 -07001716 }
1717
Sree Kuchibhotlaf448c342016-05-19 10:51:24 -07001718 return &vtable;
1719}
1720
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001721#else /* defined(GRPC_LINUX_EPOLL_CREATE1) */
murgatroid99623dd4f2016-08-08 17:31:27 -07001722#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001723#include "src/core/lib/iomgr/ev_epollsig_linux.h"
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001724/* If GRPC_LINUX_EPOLL_CREATE1 is not defined, it means
1725 epoll_create1 is not available. Return NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001726const grpc_event_engine_vtable* grpc_init_epollsig_linux(
Craig Tillerf8382b82017-04-27 15:09:48 -07001727 bool explicit_request) {
Yash Tibrewal8cf14702017-12-06 09:47:54 -08001728 return nullptr;
Craig Tillerf8382b82017-04-27 15:09:48 -07001729}
murgatroid99623dd4f2016-08-08 17:31:27 -07001730#endif /* defined(GRPC_POSIX_SOCKET) */
Sree Kuchibhotla5855c472016-06-08 12:56:56 -07001731
Sree Kuchibhotlac7be7c62016-06-09 17:08:50 -07001732void grpc_use_signal(int signum) {}
Mehrdad Afsharifb669002018-01-17 15:37:56 -08001733#endif /* !defined(GRPC_LINUX_EPOLL_CREATE1) */