blob: 9e3643fa28625ea9e08bb9bd15b576bbeaab78e9 [file] [log] [blame]
Craig Tillerc67cc992017-04-27 10:15:51 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2017 gRPC authors.
Craig Tillerc67cc992017-04-27 10:15:51 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Craig Tillerc67cc992017-04-27 10:15:51 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Craig Tillerc67cc992017-04-27 10:15:51 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Craig Tillerc67cc992017-04-27 10:15:51 -070016 *
17 */
18
19#include "src/core/lib/iomgr/port.h"
20
21/* This polling engine is only relevant on linux kernels supporting epoll() */
22#ifdef GRPC_LINUX_EPOLL
Craig Tiller4509c472017-04-27 19:05:13 +000023#include "src/core/lib/iomgr/ev_epoll1_linux.h"
Craig Tillerc67cc992017-04-27 10:15:51 -070024
25#include <assert.h>
26#include <errno.h>
Craig Tiller20397792017-07-18 11:35:27 -070027#include <limits.h>
Craig Tillerc67cc992017-04-27 10:15:51 -070028#include <poll.h>
29#include <pthread.h>
30#include <string.h>
31#include <sys/epoll.h>
32#include <sys/socket.h>
33#include <unistd.h>
34
35#include <grpc/support/alloc.h>
Craig Tiller6de05932017-04-28 09:17:38 -070036#include <grpc/support/cpu.h>
Craig Tillerc67cc992017-04-27 10:15:51 -070037#include <grpc/support/log.h>
38#include <grpc/support/string_util.h>
39#include <grpc/support/tls.h>
40#include <grpc/support/useful.h>
41
Craig Tillerb4bb1cd2017-07-20 14:18:17 -070042#include "src/core/lib/debug/stats.h"
Craig Tiller6b7c1fb2017-07-19 15:45:03 -070043#include "src/core/lib/iomgr/block_annotate.h"
Craig Tillerc67cc992017-04-27 10:15:51 -070044#include "src/core/lib/iomgr/ev_posix.h"
45#include "src/core/lib/iomgr/iomgr_internal.h"
46#include "src/core/lib/iomgr/lockfree_event.h"
Craig Tillerc67cc992017-04-27 10:15:51 -070047#include "src/core/lib/iomgr/wakeup_fd_posix.h"
Craig Tillerc67cc992017-04-27 10:15:51 -070048#include "src/core/lib/profiling/timers.h"
Craig Tillerfbf61bb2017-11-08 11:50:14 -080049#include "src/core/lib/support/manual_constructor.h"
Craig Tillerb89bac02017-05-26 15:20:32 +000050#include "src/core/lib/support/string.h"
Craig Tillerc67cc992017-04-27 10:15:51 -070051
Craig Tillerc67cc992017-04-27 10:15:51 -070052static grpc_wakeup_fd global_wakeup_fd;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070053
54/*******************************************************************************
55 * Singleton epoll set related fields
56 */
57
58#define MAX_EPOLL_EVENTS 100
Sree Kuchibhotla19614522017-08-25 17:10:10 -070059#define MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION 1
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070060
Sree Kuchibhotlae01940f2017-08-27 18:10:12 -070061/* NOTE ON SYNCHRONIZATION:
62 * - Fields in this struct are only modified by the designated poller. Hence
63 * there is no need for any locks to protect the struct.
64 * - num_events and cursor fields have to be of atomic type to provide memory
65 * visibility guarantees only. i.e In case of multiple pollers, the designated
66 * polling thread keeps changing; the thread that wrote these values may be
67 * different from the thread reading the values
68 */
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070069typedef struct epoll_set {
70 int epfd;
71
72 /* The epoll_events after the last call to epoll_wait() */
73 struct epoll_event events[MAX_EPOLL_EVENTS];
74
75 /* The number of epoll_events after the last call to epoll_wait() */
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -070076 gpr_atm num_events;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070077
78 /* Index of the first event in epoll_events that has to be processed. This
79 * field is only valid if num_events > 0 */
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -070080 gpr_atm cursor;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070081} epoll_set;
82
83/* The global singleton epoll set */
84static epoll_set g_epoll_set;
85
86/* Must be called *only* once */
87static bool epoll_set_init() {
88 g_epoll_set.epfd = epoll_create1(EPOLL_CLOEXEC);
89 if (g_epoll_set.epfd < 0) {
90 gpr_log(GPR_ERROR, "epoll unavailable");
91 return false;
92 }
93
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -070094 gpr_log(GPR_INFO, "grpc epoll fd: %d", g_epoll_set.epfd);
95 gpr_atm_no_barrier_store(&g_epoll_set.num_events, 0);
96 gpr_atm_no_barrier_store(&g_epoll_set.cursor, 0);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -070097 return true;
98}
99
100/* epoll_set_init() MUST be called before calling this. */
101static void epoll_set_shutdown() {
102 if (g_epoll_set.epfd >= 0) {
103 close(g_epoll_set.epfd);
104 g_epoll_set.epfd = -1;
105 }
106}
Craig Tillerc67cc992017-04-27 10:15:51 -0700107
108/*******************************************************************************
109 * Fd Declarations
110 */
111
112struct grpc_fd {
113 int fd;
114
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800115 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> read_closure;
116 grpc_core::ManualConstructor<grpc_core::LockfreeEvent> write_closure;
Craig Tillerc67cc992017-04-27 10:15:51 -0700117
Craig Tillerbaa14a92017-11-03 09:09:36 -0700118 struct grpc_fd* freelist_next;
Craig Tillerc67cc992017-04-27 10:15:51 -0700119
120 /* The pollset that last noticed that the fd is readable. The actual type
121 * stored in this is (grpc_pollset *) */
122 gpr_atm read_notifier_pollset;
123
124 grpc_iomgr_object iomgr_object;
125};
126
127static void fd_global_init(void);
128static void fd_global_shutdown(void);
129
130/*******************************************************************************
131 * Pollset Declarations
132 */
133
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700134typedef enum { UNKICKED, KICKED, DESIGNATED_POLLER } kick_state;
Craig Tillerc67cc992017-04-27 10:15:51 -0700135
Craig Tillerbaa14a92017-11-03 09:09:36 -0700136static const char* kick_state_string(kick_state st) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700137 switch (st) {
138 case UNKICKED:
139 return "UNKICKED";
140 case KICKED:
141 return "KICKED";
142 case DESIGNATED_POLLER:
143 return "DESIGNATED_POLLER";
144 }
145 GPR_UNREACHABLE_CODE(return "UNKNOWN");
146}
147
Craig Tillerc67cc992017-04-27 10:15:51 -0700148struct grpc_pollset_worker {
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700149 kick_state state;
Craig Tiller55624a32017-05-26 08:14:44 -0700150 int kick_state_mutator; // which line of code last changed kick state
Craig Tillerc67cc992017-04-27 10:15:51 -0700151 bool initialized_cv;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700152 grpc_pollset_worker* next;
153 grpc_pollset_worker* prev;
Craig Tillerc67cc992017-04-27 10:15:51 -0700154 gpr_cv cv;
Craig Tiller50da5ec2017-05-01 13:51:14 -0700155 grpc_closure_list schedule_on_end_work;
Craig Tillerc67cc992017-04-27 10:15:51 -0700156};
157
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700158#define SET_KICK_STATE(worker, kick_state) \
Craig Tiller55624a32017-05-26 08:14:44 -0700159 do { \
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700160 (worker)->state = (kick_state); \
Craig Tiller55624a32017-05-26 08:14:44 -0700161 (worker)->kick_state_mutator = __LINE__; \
162 } while (false)
163
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700164#define MAX_NEIGHBORHOODS 1024
Craig Tillerba550da2017-05-01 14:26:31 +0000165
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700166typedef struct pollset_neighborhood {
Craig Tiller6de05932017-04-28 09:17:38 -0700167 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700168 grpc_pollset* active_root;
Craig Tiller6de05932017-04-28 09:17:38 -0700169 char pad[GPR_CACHELINE_SIZE];
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700170} pollset_neighborhood;
Craig Tiller6de05932017-04-28 09:17:38 -0700171
Craig Tillerc67cc992017-04-27 10:15:51 -0700172struct grpc_pollset {
Craig Tiller6de05932017-04-28 09:17:38 -0700173 gpr_mu mu;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700174 pollset_neighborhood* neighborhood;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700175 bool reassigning_neighborhood;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700176 grpc_pollset_worker* root_worker;
Craig Tiller4509c472017-04-27 19:05:13 +0000177 bool kicked_without_poller;
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700178
179 /* Set to true if the pollset is observed to have no workers available to
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700180 poll */
Craig Tiller6de05932017-04-28 09:17:38 -0700181 bool seen_inactive;
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700182 bool shutting_down; /* Is the pollset shutting down ? */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700183 grpc_closure* shutdown_closure; /* Called after after shutdown is complete */
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700184
185 /* Number of workers who are *about-to* attach themselves to the pollset
186 * worker list */
Craig Tillerba550da2017-05-01 14:26:31 +0000187 int begin_refs;
Craig Tiller6de05932017-04-28 09:17:38 -0700188
Craig Tillerbaa14a92017-11-03 09:09:36 -0700189 grpc_pollset* next;
190 grpc_pollset* prev;
Craig Tillerc67cc992017-04-27 10:15:51 -0700191};
192
193/*******************************************************************************
194 * Pollset-set Declarations
195 */
Craig Tiller6de05932017-04-28 09:17:38 -0700196
Craig Tiller61f96c12017-05-12 13:36:39 -0700197struct grpc_pollset_set {
198 char unused;
199};
Craig Tillerc67cc992017-04-27 10:15:51 -0700200
201/*******************************************************************************
202 * Common helpers
203 */
204
Craig Tillerbaa14a92017-11-03 09:09:36 -0700205static bool append_error(grpc_error** composite, grpc_error* error,
206 const char* desc) {
Craig Tillerc67cc992017-04-27 10:15:51 -0700207 if (error == GRPC_ERROR_NONE) return true;
208 if (*composite == GRPC_ERROR_NONE) {
209 *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
210 }
211 *composite = grpc_error_add_child(*composite, error);
212 return false;
213}
214
215/*******************************************************************************
216 * Fd Definitions
217 */
218
219/* We need to keep a freelist not because of any concerns of malloc performance
220 * but instead so that implementations with multiple threads in (for example)
221 * epoll_wait deal with the race between pollset removal and incoming poll
222 * notifications.
223 *
224 * The problem is that the poller ultimately holds a reference to this
225 * object, so it is very difficult to know when is safe to free it, at least
226 * without some expensive synchronization.
227 *
228 * If we keep the object freelisted, in the worst case losing this race just
229 * becomes a spurious read notification on a reused fd.
230 */
231
232/* The alarm system needs to be able to wakeup 'some poller' sometimes
233 * (specifically when a new alarm needs to be triggered earlier than the next
234 * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
235 * case occurs. */
236
Craig Tillerbaa14a92017-11-03 09:09:36 -0700237static grpc_fd* fd_freelist = NULL;
Craig Tillerc67cc992017-04-27 10:15:51 -0700238static gpr_mu fd_freelist_mu;
239
Craig Tillerc67cc992017-04-27 10:15:51 -0700240static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
241
242static void fd_global_shutdown(void) {
243 gpr_mu_lock(&fd_freelist_mu);
244 gpr_mu_unlock(&fd_freelist_mu);
245 while (fd_freelist != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700246 grpc_fd* fd = fd_freelist;
Craig Tillerc67cc992017-04-27 10:15:51 -0700247 fd_freelist = fd_freelist->freelist_next;
Craig Tillerc67cc992017-04-27 10:15:51 -0700248 gpr_free(fd);
249 }
250 gpr_mu_destroy(&fd_freelist_mu);
251}
252
Craig Tillerbaa14a92017-11-03 09:09:36 -0700253static grpc_fd* fd_create(int fd, const char* name) {
254 grpc_fd* new_fd = NULL;
Craig Tillerc67cc992017-04-27 10:15:51 -0700255
256 gpr_mu_lock(&fd_freelist_mu);
257 if (fd_freelist != NULL) {
258 new_fd = fd_freelist;
259 fd_freelist = fd_freelist->freelist_next;
260 }
261 gpr_mu_unlock(&fd_freelist_mu);
262
263 if (new_fd == NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700264 new_fd = (grpc_fd*)gpr_malloc(sizeof(grpc_fd));
Craig Tillerc67cc992017-04-27 10:15:51 -0700265 }
266
Craig Tillerc67cc992017-04-27 10:15:51 -0700267 new_fd->fd = fd;
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800268 new_fd->read_closure.Init();
269 new_fd->write_closure.Init();
Craig Tillerc67cc992017-04-27 10:15:51 -0700270 gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
271
272 new_fd->freelist_next = NULL;
Craig Tillerc67cc992017-04-27 10:15:51 -0700273
Craig Tillerbaa14a92017-11-03 09:09:36 -0700274 char* fd_name;
Craig Tillerc67cc992017-04-27 10:15:51 -0700275 gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
276 grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
Noah Eisen264879f2017-06-20 17:14:47 -0700277#ifndef NDEBUG
278 if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) {
279 gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name);
280 }
Craig Tillerc67cc992017-04-27 10:15:51 -0700281#endif
282 gpr_free(fd_name);
Craig Tiller9ddb3152017-04-27 21:32:56 +0000283
Yash Tibrewal533d1182017-09-18 10:48:22 -0700284 struct epoll_event ev;
285 ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
286 ev.data.ptr = new_fd;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700287 if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, fd, &ev) != 0) {
Craig Tiller9ddb3152017-04-27 21:32:56 +0000288 gpr_log(GPR_ERROR, "epoll_ctl failed: %s", strerror(errno));
289 }
290
Craig Tillerc67cc992017-04-27 10:15:51 -0700291 return new_fd;
292}
293
Craig Tillerbaa14a92017-11-03 09:09:36 -0700294static int fd_wrapped_fd(grpc_fd* fd) { return fd->fd; }
Craig Tillerc67cc992017-04-27 10:15:51 -0700295
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700296/* if 'releasing_fd' is true, it means that we are going to detach the internal
297 * fd from grpc_fd structure (i.e which means we should not be calling
298 * shutdown() syscall on that fd) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700299static void fd_shutdown_internal(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
300 grpc_error* why, bool releasing_fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800301 if (fd->read_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why))) {
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700302 if (!releasing_fd) {
303 shutdown(fd->fd, SHUT_RDWR);
304 }
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800305 fd->write_closure->SetShutdown(exec_ctx, GRPC_ERROR_REF(why));
Craig Tiller9ddb3152017-04-27 21:32:56 +0000306 }
307 GRPC_ERROR_UNREF(why);
308}
309
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700310/* Might be called multiple times */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700311static void fd_shutdown(grpc_exec_ctx* exec_ctx, grpc_fd* fd, grpc_error* why) {
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700312 fd_shutdown_internal(exec_ctx, fd, why, false);
313}
314
Craig Tillerbaa14a92017-11-03 09:09:36 -0700315static void fd_orphan(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
316 grpc_closure* on_done, int* release_fd,
317 bool already_closed, const char* reason) {
318 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700319 bool is_release_fd = (release_fd != NULL);
Craig Tillerc67cc992017-04-27 10:15:51 -0700320
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800321 if (!fd->read_closure->IsShutdown()) {
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700322 fd_shutdown_internal(exec_ctx, fd,
323 GRPC_ERROR_CREATE_FROM_COPIED_STRING(reason),
324 is_release_fd);
Craig Tiller9ddb3152017-04-27 21:32:56 +0000325 }
326
Craig Tillerc67cc992017-04-27 10:15:51 -0700327 /* If release_fd is not NULL, we should be relinquishing control of the file
328 descriptor fd->fd (but we still own the grpc_fd structure). */
Sree Kuchibhotlaf2641472017-08-02 23:46:40 -0700329 if (is_release_fd) {
Craig Tillerc67cc992017-04-27 10:15:51 -0700330 *release_fd = fd->fd;
Yuchen Zengd40a7ae2017-07-12 15:59:56 -0700331 } else if (!already_closed) {
Craig Tillerc67cc992017-04-27 10:15:51 -0700332 close(fd->fd);
Craig Tillerc67cc992017-04-27 10:15:51 -0700333 }
334
ncteisen969b46e2017-06-08 14:57:11 -0700335 GRPC_CLOSURE_SCHED(exec_ctx, on_done, GRPC_ERROR_REF(error));
Craig Tillerc67cc992017-04-27 10:15:51 -0700336
Craig Tiller4509c472017-04-27 19:05:13 +0000337 grpc_iomgr_unregister_object(&fd->iomgr_object);
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800338 fd->read_closure.Destroy();
339 fd->write_closure.Destroy();
Craig Tillerc67cc992017-04-27 10:15:51 -0700340
Craig Tiller4509c472017-04-27 19:05:13 +0000341 gpr_mu_lock(&fd_freelist_mu);
342 fd->freelist_next = fd_freelist;
343 fd_freelist = fd;
344 gpr_mu_unlock(&fd_freelist_mu);
Craig Tillerc67cc992017-04-27 10:15:51 -0700345}
346
Craig Tillerbaa14a92017-11-03 09:09:36 -0700347static grpc_pollset* fd_get_read_notifier_pollset(grpc_exec_ctx* exec_ctx,
348 grpc_fd* fd) {
Craig Tillerc67cc992017-04-27 10:15:51 -0700349 gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700350 return (grpc_pollset*)notifier;
Craig Tillerc67cc992017-04-27 10:15:51 -0700351}
352
Craig Tillerbaa14a92017-11-03 09:09:36 -0700353static bool fd_is_shutdown(grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800354 return fd->read_closure->IsShutdown();
Craig Tillerc67cc992017-04-27 10:15:51 -0700355}
356
Craig Tillerbaa14a92017-11-03 09:09:36 -0700357static void fd_notify_on_read(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
358 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800359 fd->read_closure->NotifyOn(exec_ctx, closure);
Craig Tillerc67cc992017-04-27 10:15:51 -0700360}
361
Craig Tillerbaa14a92017-11-03 09:09:36 -0700362static void fd_notify_on_write(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
363 grpc_closure* closure) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800364 fd->write_closure->NotifyOn(exec_ctx, closure);
Craig Tillerc67cc992017-04-27 10:15:51 -0700365}
366
Craig Tillerbaa14a92017-11-03 09:09:36 -0700367static void fd_become_readable(grpc_exec_ctx* exec_ctx, grpc_fd* fd,
368 grpc_pollset* notifier) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800369 fd->read_closure->SetReady(exec_ctx);
Craig Tiller4509c472017-04-27 19:05:13 +0000370 /* Use release store to match with acquire load in fd_get_read_notifier */
371 gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
372}
373
Craig Tillerbaa14a92017-11-03 09:09:36 -0700374static void fd_become_writable(grpc_exec_ctx* exec_ctx, grpc_fd* fd) {
Craig Tillerfbf61bb2017-11-08 11:50:14 -0800375 fd->write_closure->SetReady(exec_ctx);
Craig Tillerc67cc992017-04-27 10:15:51 -0700376}
377
378/*******************************************************************************
379 * Pollset Definitions
380 */
381
Craig Tiller6de05932017-04-28 09:17:38 -0700382GPR_TLS_DECL(g_current_thread_pollset);
383GPR_TLS_DECL(g_current_thread_worker);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700384
385/* The designated poller */
Craig Tiller6de05932017-04-28 09:17:38 -0700386static gpr_atm g_active_poller;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700387
Craig Tillerbaa14a92017-11-03 09:09:36 -0700388static pollset_neighborhood* g_neighborhoods;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700389static size_t g_num_neighborhoods;
Craig Tiller6de05932017-04-28 09:17:38 -0700390
Craig Tillerc67cc992017-04-27 10:15:51 -0700391/* Return true if first in list */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700392static bool worker_insert(grpc_pollset* pollset, grpc_pollset_worker* worker) {
Craig Tiller32f90ee2017-04-28 12:46:41 -0700393 if (pollset->root_worker == NULL) {
394 pollset->root_worker = worker;
395 worker->next = worker->prev = worker;
Craig Tillerc67cc992017-04-27 10:15:51 -0700396 return true;
397 } else {
Craig Tiller32f90ee2017-04-28 12:46:41 -0700398 worker->next = pollset->root_worker;
399 worker->prev = worker->next->prev;
400 worker->next->prev = worker;
401 worker->prev->next = worker;
Craig Tillerc67cc992017-04-27 10:15:51 -0700402 return false;
403 }
404}
405
406/* Return true if last in list */
407typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result;
408
Craig Tillerbaa14a92017-11-03 09:09:36 -0700409static worker_remove_result worker_remove(grpc_pollset* pollset,
410 grpc_pollset_worker* worker) {
Craig Tiller32f90ee2017-04-28 12:46:41 -0700411 if (worker == pollset->root_worker) {
412 if (worker == worker->next) {
413 pollset->root_worker = NULL;
Craig Tillerc67cc992017-04-27 10:15:51 -0700414 return EMPTIED;
415 } else {
Craig Tiller32f90ee2017-04-28 12:46:41 -0700416 pollset->root_worker = worker->next;
417 worker->prev->next = worker->next;
418 worker->next->prev = worker->prev;
Craig Tillerc67cc992017-04-27 10:15:51 -0700419 return NEW_ROOT;
420 }
421 } else {
Craig Tiller32f90ee2017-04-28 12:46:41 -0700422 worker->prev->next = worker->next;
423 worker->next->prev = worker->prev;
Craig Tillerc67cc992017-04-27 10:15:51 -0700424 return REMOVED;
425 }
426}
427
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700428static size_t choose_neighborhood(void) {
429 return (size_t)gpr_cpu_current_cpu() % g_num_neighborhoods;
Craig Tillerba550da2017-05-01 14:26:31 +0000430}
431
Craig Tillerbaa14a92017-11-03 09:09:36 -0700432static grpc_error* pollset_global_init(void) {
Craig Tiller4509c472017-04-27 19:05:13 +0000433 gpr_tls_init(&g_current_thread_pollset);
434 gpr_tls_init(&g_current_thread_worker);
Craig Tiller6de05932017-04-28 09:17:38 -0700435 gpr_atm_no_barrier_store(&g_active_poller, 0);
Craig Tiller375eb252017-04-27 23:29:12 +0000436 global_wakeup_fd.read_fd = -1;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700437 grpc_error* err = grpc_wakeup_fd_init(&global_wakeup_fd);
Craig Tiller375eb252017-04-27 23:29:12 +0000438 if (err != GRPC_ERROR_NONE) return err;
Yash Tibrewal533d1182017-09-18 10:48:22 -0700439 struct epoll_event ev;
440 ev.events = (uint32_t)(EPOLLIN | EPOLLET);
441 ev.data.ptr = &global_wakeup_fd;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700442 if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, global_wakeup_fd.read_fd,
443 &ev) != 0) {
Craig Tiller4509c472017-04-27 19:05:13 +0000444 return GRPC_OS_ERROR(errno, "epoll_ctl");
445 }
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700446 g_num_neighborhoods = GPR_CLAMP(gpr_cpu_num_cores(), 1, MAX_NEIGHBORHOODS);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700447 g_neighborhoods = (pollset_neighborhood*)gpr_zalloc(sizeof(*g_neighborhoods) *
448 g_num_neighborhoods);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700449 for (size_t i = 0; i < g_num_neighborhoods; i++) {
450 gpr_mu_init(&g_neighborhoods[i].mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700451 }
Craig Tiller4509c472017-04-27 19:05:13 +0000452 return GRPC_ERROR_NONE;
453}
454
455static void pollset_global_shutdown(void) {
Craig Tiller4509c472017-04-27 19:05:13 +0000456 gpr_tls_destroy(&g_current_thread_pollset);
457 gpr_tls_destroy(&g_current_thread_worker);
Craig Tiller375eb252017-04-27 23:29:12 +0000458 if (global_wakeup_fd.read_fd != -1) grpc_wakeup_fd_destroy(&global_wakeup_fd);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700459 for (size_t i = 0; i < g_num_neighborhoods; i++) {
460 gpr_mu_destroy(&g_neighborhoods[i].mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700461 }
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700462 gpr_free(g_neighborhoods);
Craig Tiller4509c472017-04-27 19:05:13 +0000463}
464
Craig Tillerbaa14a92017-11-03 09:09:36 -0700465static void pollset_init(grpc_pollset* pollset, gpr_mu** mu) {
Craig Tiller6de05932017-04-28 09:17:38 -0700466 gpr_mu_init(&pollset->mu);
467 *mu = &pollset->mu;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700468 pollset->neighborhood = &g_neighborhoods[choose_neighborhood()];
469 pollset->reassigning_neighborhood = false;
Sree Kuchibhotla30882302017-08-16 13:46:52 -0700470 pollset->root_worker = NULL;
471 pollset->kicked_without_poller = false;
Craig Tiller6de05932017-04-28 09:17:38 -0700472 pollset->seen_inactive = true;
Sree Kuchibhotla30882302017-08-16 13:46:52 -0700473 pollset->shutting_down = false;
474 pollset->shutdown_closure = NULL;
475 pollset->begin_refs = 0;
476 pollset->next = pollset->prev = NULL;
Craig Tiller6de05932017-04-28 09:17:38 -0700477}
478
Craig Tillerbaa14a92017-11-03 09:09:36 -0700479static void pollset_destroy(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset) {
Craig Tillere00d7332017-05-01 15:43:51 +0000480 gpr_mu_lock(&pollset->mu);
Craig Tillerba550da2017-05-01 14:26:31 +0000481 if (!pollset->seen_inactive) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700482 pollset_neighborhood* neighborhood = pollset->neighborhood;
Craig Tillere00d7332017-05-01 15:43:51 +0000483 gpr_mu_unlock(&pollset->mu);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700484 retry_lock_neighborhood:
485 gpr_mu_lock(&neighborhood->mu);
Craig Tillere00d7332017-05-01 15:43:51 +0000486 gpr_mu_lock(&pollset->mu);
487 if (!pollset->seen_inactive) {
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700488 if (pollset->neighborhood != neighborhood) {
489 gpr_mu_unlock(&neighborhood->mu);
490 neighborhood = pollset->neighborhood;
Craig Tillere00d7332017-05-01 15:43:51 +0000491 gpr_mu_unlock(&pollset->mu);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700492 goto retry_lock_neighborhood;
Craig Tillere00d7332017-05-01 15:43:51 +0000493 }
494 pollset->prev->next = pollset->next;
495 pollset->next->prev = pollset->prev;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700496 if (pollset == pollset->neighborhood->active_root) {
497 pollset->neighborhood->active_root =
Craig Tillere00d7332017-05-01 15:43:51 +0000498 pollset->next == pollset ? NULL : pollset->next;
499 }
Craig Tillerba550da2017-05-01 14:26:31 +0000500 }
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700501 gpr_mu_unlock(&pollset->neighborhood->mu);
Craig Tiller6de05932017-04-28 09:17:38 -0700502 }
Craig Tillere00d7332017-05-01 15:43:51 +0000503 gpr_mu_unlock(&pollset->mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700504 gpr_mu_destroy(&pollset->mu);
Craig Tiller4509c472017-04-27 19:05:13 +0000505}
506
Craig Tillerbaa14a92017-11-03 09:09:36 -0700507static grpc_error* pollset_kick_all(grpc_exec_ctx* exec_ctx,
508 grpc_pollset* pollset) {
yang-gdf92a642017-08-21 22:38:45 -0700509 GPR_TIMER_BEGIN("pollset_kick_all", 0);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700510 grpc_error* error = GRPC_ERROR_NONE;
Craig Tiller4509c472017-04-27 19:05:13 +0000511 if (pollset->root_worker != NULL) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700512 grpc_pollset_worker* worker = pollset->root_worker;
Craig Tiller4509c472017-04-27 19:05:13 +0000513 do {
Craig Tiller0ff222a2017-09-01 09:41:43 -0700514 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700515 switch (worker->state) {
Craig Tiller55624a32017-05-26 08:14:44 -0700516 case KICKED:
Craig Tiller480f5d82017-09-13 09:36:07 -0700517 GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx);
Craig Tiller55624a32017-05-26 08:14:44 -0700518 break;
519 case UNKICKED:
520 SET_KICK_STATE(worker, KICKED);
521 if (worker->initialized_cv) {
Craig Tillerebacb2f2017-09-13 12:32:33 -0700522 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tiller55624a32017-05-26 08:14:44 -0700523 gpr_cv_signal(&worker->cv);
524 }
525 break;
526 case DESIGNATED_POLLER:
Craig Tiller480f5d82017-09-13 09:36:07 -0700527 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx);
Craig Tiller55624a32017-05-26 08:14:44 -0700528 SET_KICK_STATE(worker, KICKED);
529 append_error(&error, grpc_wakeup_fd_wakeup(&global_wakeup_fd),
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700530 "pollset_kick_all");
Craig Tiller55624a32017-05-26 08:14:44 -0700531 break;
Craig Tiller4509c472017-04-27 19:05:13 +0000532 }
533
Craig Tiller32f90ee2017-04-28 12:46:41 -0700534 worker = worker->next;
Craig Tiller4509c472017-04-27 19:05:13 +0000535 } while (worker != pollset->root_worker);
536 }
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700537 // TODO: sreek. Check if we need to set 'kicked_without_poller' to true here
538 // in the else case
yang-gdf92a642017-08-21 22:38:45 -0700539 GPR_TIMER_END("pollset_kick_all", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000540 return error;
541}
542
Craig Tillerbaa14a92017-11-03 09:09:36 -0700543static void pollset_maybe_finish_shutdown(grpc_exec_ctx* exec_ctx,
544 grpc_pollset* pollset) {
Craig Tillerba550da2017-05-01 14:26:31 +0000545 if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL &&
546 pollset->begin_refs == 0) {
yang-gdf92a642017-08-21 22:38:45 -0700547 GPR_TIMER_MARK("pollset_finish_shutdown", 0);
ncteisen969b46e2017-06-08 14:57:11 -0700548 GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE);
Craig Tiller4509c472017-04-27 19:05:13 +0000549 pollset->shutdown_closure = NULL;
550 }
551}
552
Craig Tillerbaa14a92017-11-03 09:09:36 -0700553static void pollset_shutdown(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
554 grpc_closure* closure) {
yang-gdf92a642017-08-21 22:38:45 -0700555 GPR_TIMER_BEGIN("pollset_shutdown", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000556 GPR_ASSERT(pollset->shutdown_closure == NULL);
Craig Tillerc81512a2017-05-26 09:53:58 -0700557 GPR_ASSERT(!pollset->shutting_down);
Craig Tiller4509c472017-04-27 19:05:13 +0000558 pollset->shutdown_closure = closure;
Craig Tillerc81512a2017-05-26 09:53:58 -0700559 pollset->shutting_down = true;
Craig Tiller0ff222a2017-09-01 09:41:43 -0700560 GRPC_LOG_IF_ERROR("pollset_shutdown", pollset_kick_all(exec_ctx, pollset));
Craig Tiller4509c472017-04-27 19:05:13 +0000561 pollset_maybe_finish_shutdown(exec_ctx, pollset);
yang-gdf92a642017-08-21 22:38:45 -0700562 GPR_TIMER_END("pollset_shutdown", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000563}
564
Craig Tillerbaa14a92017-11-03 09:09:36 -0700565static int poll_deadline_to_millis_timeout(grpc_exec_ctx* exec_ctx,
Craig Tiller20397792017-07-18 11:35:27 -0700566 grpc_millis millis) {
567 if (millis == GRPC_MILLIS_INF_FUTURE) return -1;
568 grpc_millis delta = millis - grpc_exec_ctx_now(exec_ctx);
Craig Tillerd9b82bd2017-08-29 12:16:56 -0700569 if (delta > INT_MAX) {
Craig Tiller20397792017-07-18 11:35:27 -0700570 return INT_MAX;
Craig Tillerd9b82bd2017-08-29 12:16:56 -0700571 } else if (delta < 0) {
Craig Tiller4509c472017-04-27 19:05:13 +0000572 return 0;
Craig Tillerd9b82bd2017-08-29 12:16:56 -0700573 } else {
Craig Tiller20397792017-07-18 11:35:27 -0700574 return (int)delta;
Craig Tiller4509c472017-04-27 19:05:13 +0000575 }
Craig Tiller4509c472017-04-27 19:05:13 +0000576}
577
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700578/* Process the epoll events found by do_epoll_wait() function.
579 - g_epoll_set.cursor points to the index of the first event to be processed
580 - This function then processes up-to MAX_EPOLL_EVENTS_PER_ITERATION and
581 updates the g_epoll_set.cursor
Craig Tiller4509c472017-04-27 19:05:13 +0000582
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700583 NOTE ON SYNCRHONIZATION: Similar to do_epoll_wait(), this function is only
584 called by g_active_poller thread. So there is no need for synchronization
585 when accessing fields in g_epoll_set */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700586static grpc_error* process_epoll_events(grpc_exec_ctx* exec_ctx,
587 grpc_pollset* pollset) {
588 static const char* err_desc = "process_events";
589 grpc_error* error = GRPC_ERROR_NONE;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700590
Sree Kuchibhotla3d609f12017-08-25 10:00:18 -0700591 GPR_TIMER_BEGIN("process_epoll_events", 0);
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -0700592 long num_events = gpr_atm_acq_load(&g_epoll_set.num_events);
593 long cursor = gpr_atm_acq_load(&g_epoll_set.cursor);
594 for (int idx = 0;
595 (idx < MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION) && cursor != num_events;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700596 idx++) {
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -0700597 long c = cursor++;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700598 struct epoll_event* ev = &g_epoll_set.events[c];
599 void* data_ptr = ev->data.ptr;
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700600
Craig Tiller4509c472017-04-27 19:05:13 +0000601 if (data_ptr == &global_wakeup_fd) {
Craig Tiller4509c472017-04-27 19:05:13 +0000602 append_error(&error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd),
603 err_desc);
604 } else {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700605 grpc_fd* fd = (grpc_fd*)(data_ptr);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700606 bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0;
607 bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0;
608 bool write_ev = (ev->events & EPOLLOUT) != 0;
609
Craig Tiller4509c472017-04-27 19:05:13 +0000610 if (read_ev || cancel) {
611 fd_become_readable(exec_ctx, fd, pollset);
612 }
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700613
Craig Tiller4509c472017-04-27 19:05:13 +0000614 if (write_ev || cancel) {
615 fd_become_writable(exec_ctx, fd);
616 }
617 }
618 }
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -0700619 gpr_atm_rel_store(&g_epoll_set.cursor, cursor);
Sree Kuchibhotla3d609f12017-08-25 10:00:18 -0700620 GPR_TIMER_END("process_epoll_events", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000621 return error;
622}
623
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700624/* Do epoll_wait and store the events in g_epoll_set.events field. This does not
625 "process" any of the events yet; that is done in process_epoll_events().
626 *See process_epoll_events() function for more details.
627
628 NOTE ON SYNCHRONIZATION: At any point of time, only the g_active_poller
629 (i.e the designated poller thread) will be calling this function. So there is
630 no need for any synchronization when accesing fields in g_epoll_set */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700631static grpc_error* do_epoll_wait(grpc_exec_ctx* exec_ctx, grpc_pollset* ps,
Craig Tiller20397792017-07-18 11:35:27 -0700632 grpc_millis deadline) {
Sree Kuchibhotla3d609f12017-08-25 10:00:18 -0700633 GPR_TIMER_BEGIN("do_epoll_wait", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000634
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700635 int r;
Craig Tiller20397792017-07-18 11:35:27 -0700636 int timeout = poll_deadline_to_millis_timeout(exec_ctx, deadline);
Craig Tiller4509c472017-04-27 19:05:13 +0000637 if (timeout != 0) {
638 GRPC_SCHEDULING_START_BLOCKING_REGION;
639 }
Craig Tiller4509c472017-04-27 19:05:13 +0000640 do {
Craig Tillerb4bb1cd2017-07-20 14:18:17 -0700641 GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700642 r = epoll_wait(g_epoll_set.epfd, g_epoll_set.events, MAX_EPOLL_EVENTS,
643 timeout);
Craig Tiller4509c472017-04-27 19:05:13 +0000644 } while (r < 0 && errno == EINTR);
645 if (timeout != 0) {
Craig Tiller781e91a2017-07-17 16:21:00 -0700646 GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
Craig Tiller4509c472017-04-27 19:05:13 +0000647 }
648
649 if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait");
650
Craig Tiller0ff222a2017-09-01 09:41:43 -0700651 GRPC_STATS_INC_POLL_EVENTS_RETURNED(exec_ctx, r);
652
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700653 if (GRPC_TRACER_ON(grpc_polling_trace)) {
654 gpr_log(GPR_DEBUG, "ps: %p poll got %d events", ps, r);
Craig Tiller4509c472017-04-27 19:05:13 +0000655 }
656
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -0700657 gpr_atm_rel_store(&g_epoll_set.num_events, r);
658 gpr_atm_rel_store(&g_epoll_set.cursor, 0);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700659
Sree Kuchibhotla3d609f12017-08-25 10:00:18 -0700660 GPR_TIMER_END("do_epoll_wait", 0);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700661 return GRPC_ERROR_NONE;
Craig Tiller4509c472017-04-27 19:05:13 +0000662}
663
Craig Tillerbaa14a92017-11-03 09:09:36 -0700664static bool begin_worker(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
665 grpc_pollset_worker* worker,
666 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -0700667 grpc_millis deadline) {
yang-gdf92a642017-08-21 22:38:45 -0700668 GPR_TIMER_BEGIN("begin_worker", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000669 if (worker_hdl != NULL) *worker_hdl = worker;
670 worker->initialized_cv = false;
Craig Tiller55624a32017-05-26 08:14:44 -0700671 SET_KICK_STATE(worker, UNKICKED);
Craig Tiller50da5ec2017-05-01 13:51:14 -0700672 worker->schedule_on_end_work = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT;
Craig Tillerba550da2017-05-01 14:26:31 +0000673 pollset->begin_refs++;
Craig Tiller4509c472017-04-27 19:05:13 +0000674
Craig Tiller830e82a2017-05-31 16:26:27 -0700675 if (GRPC_TRACER_ON(grpc_polling_trace)) {
676 gpr_log(GPR_ERROR, "PS:%p BEGIN_STARTS:%p", pollset, worker);
677 }
678
Craig Tiller32f90ee2017-04-28 12:46:41 -0700679 if (pollset->seen_inactive) {
680 // pollset has been observed to be inactive, we need to move back to the
681 // active list
Craig Tillere00d7332017-05-01 15:43:51 +0000682 bool is_reassigning = false;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700683 if (!pollset->reassigning_neighborhood) {
Craig Tillere00d7332017-05-01 15:43:51 +0000684 is_reassigning = true;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700685 pollset->reassigning_neighborhood = true;
686 pollset->neighborhood = &g_neighborhoods[choose_neighborhood()];
Craig Tillere00d7332017-05-01 15:43:51 +0000687 }
Craig Tillerbaa14a92017-11-03 09:09:36 -0700688 pollset_neighborhood* neighborhood = pollset->neighborhood;
Craig Tiller32f90ee2017-04-28 12:46:41 -0700689 gpr_mu_unlock(&pollset->mu);
Craig Tillerba550da2017-05-01 14:26:31 +0000690 // pollset unlocked: state may change (even worker->kick_state)
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700691 retry_lock_neighborhood:
692 gpr_mu_lock(&neighborhood->mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700693 gpr_mu_lock(&pollset->mu);
Craig Tiller830e82a2017-05-31 16:26:27 -0700694 if (GRPC_TRACER_ON(grpc_polling_trace)) {
695 gpr_log(GPR_ERROR, "PS:%p BEGIN_REORG:%p kick_state=%s is_reassigning=%d",
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700696 pollset, worker, kick_state_string(worker->state),
Craig Tiller830e82a2017-05-31 16:26:27 -0700697 is_reassigning);
698 }
Craig Tiller32f90ee2017-04-28 12:46:41 -0700699 if (pollset->seen_inactive) {
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700700 if (neighborhood != pollset->neighborhood) {
701 gpr_mu_unlock(&neighborhood->mu);
702 neighborhood = pollset->neighborhood;
Craig Tiller2acab6e2017-04-30 23:06:33 +0000703 gpr_mu_unlock(&pollset->mu);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700704 goto retry_lock_neighborhood;
Craig Tiller2acab6e2017-04-30 23:06:33 +0000705 }
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700706
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700707 /* In the brief time we released the pollset locks above, the worker MAY
708 have been kicked. In this case, the worker should get out of this
709 pollset ASAP and hence this should neither add the pollset to
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700710 neighborhood nor mark the pollset as active.
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700711
712 On a side note, the only way a worker's kick state could have changed
713 at this point is if it were "kicked specifically". Since the worker has
714 not added itself to the pollset yet (by calling worker_insert()), it is
715 not visible in the "kick any" path yet */
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700716 if (worker->state == UNKICKED) {
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700717 pollset->seen_inactive = false;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700718 if (neighborhood->active_root == NULL) {
719 neighborhood->active_root = pollset->next = pollset->prev = pollset;
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700720 /* Make this the designated poller if there isn't one already */
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700721 if (worker->state == UNKICKED &&
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700722 gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) {
723 SET_KICK_STATE(worker, DESIGNATED_POLLER);
724 }
725 } else {
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700726 pollset->next = neighborhood->active_root;
Sree Kuchibhotlafb349402017-09-06 10:58:06 -0700727 pollset->prev = pollset->next->prev;
728 pollset->next->prev = pollset->prev->next = pollset;
Craig Tiller32f90ee2017-04-28 12:46:41 -0700729 }
Craig Tiller4509c472017-04-27 19:05:13 +0000730 }
731 }
Craig Tillere00d7332017-05-01 15:43:51 +0000732 if (is_reassigning) {
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700733 GPR_ASSERT(pollset->reassigning_neighborhood);
734 pollset->reassigning_neighborhood = false;
Craig Tillere00d7332017-05-01 15:43:51 +0000735 }
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700736 gpr_mu_unlock(&neighborhood->mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700737 }
Sree Kuchibhotlae6506bc2017-07-18 21:43:45 -0700738
Craig Tiller32f90ee2017-04-28 12:46:41 -0700739 worker_insert(pollset, worker);
Craig Tillerba550da2017-05-01 14:26:31 +0000740 pollset->begin_refs--;
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700741 if (worker->state == UNKICKED && !pollset->kicked_without_poller) {
Craig Tillera4b8eb02017-04-29 00:13:52 +0000742 GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700743 worker->initialized_cv = true;
744 gpr_cv_init(&worker->cv);
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700745 while (worker->state == UNKICKED && !pollset->shutting_down) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700746 if (GRPC_TRACER_ON(grpc_polling_trace)) {
747 gpr_log(GPR_ERROR, "PS:%p BEGIN_WAIT:%p kick_state=%s shutdown=%d",
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700748 pollset, worker, kick_state_string(worker->state),
Craig Tiller830e82a2017-05-31 16:26:27 -0700749 pollset->shutting_down);
750 }
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700751
Craig Tiller20397792017-07-18 11:35:27 -0700752 if (gpr_cv_wait(&worker->cv, &pollset->mu,
753 grpc_millis_to_timespec(deadline, GPR_CLOCK_REALTIME)) &&
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700754 worker->state == UNKICKED) {
Sree Kuchibhotla0d8431a2017-07-18 16:21:54 -0700755 /* If gpr_cv_wait returns true (i.e a timeout), pretend that the worker
756 received a kick */
Craig Tiller55624a32017-05-26 08:14:44 -0700757 SET_KICK_STATE(worker, KICKED);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700758 }
Craig Tillerba550da2017-05-01 14:26:31 +0000759 }
Craig Tiller20397792017-07-18 11:35:27 -0700760 grpc_exec_ctx_invalidate_now(exec_ctx);
Craig Tiller4509c472017-04-27 19:05:13 +0000761 }
762
Craig Tiller830e82a2017-05-31 16:26:27 -0700763 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Sree Kuchibhotla949d0752017-07-20 23:49:15 -0700764 gpr_log(GPR_ERROR,
765 "PS:%p BEGIN_DONE:%p kick_state=%s shutdown=%d "
766 "kicked_without_poller: %d",
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700767 pollset, worker, kick_state_string(worker->state),
Sree Kuchibhotla949d0752017-07-20 23:49:15 -0700768 pollset->shutting_down, pollset->kicked_without_poller);
Craig Tiller830e82a2017-05-31 16:26:27 -0700769 }
Craig Tiller4509c472017-04-27 19:05:13 +0000770
Sree Kuchibhotlae6506bc2017-07-18 21:43:45 -0700771 /* We release pollset lock in this function at a couple of places:
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700772 * 1. Briefly when assigning pollset to a neighborhood
Sree Kuchibhotlae6506bc2017-07-18 21:43:45 -0700773 * 2. When doing gpr_cv_wait()
774 * It is possible that 'kicked_without_poller' was set to true during (1) and
775 * 'shutting_down' is set to true during (1) or (2). If either of them is
776 * true, this worker cannot do polling */
Sree Kuchibhotlae6506bc2017-07-18 21:43:45 -0700777 /* TODO(sreek): Perhaps there is a better way to handle kicked_without_poller
778 * case; especially when the worker is the DESIGNATED_POLLER */
779
Sree Kuchibhotlaa0616ef2017-07-18 23:49:49 -0700780 if (pollset->kicked_without_poller) {
781 pollset->kicked_without_poller = false;
yang-gdf92a642017-08-21 22:38:45 -0700782 GPR_TIMER_END("begin_worker", 0);
Sree Kuchibhotlaa0616ef2017-07-18 23:49:49 -0700783 return false;
784 }
785
yang-gdf92a642017-08-21 22:38:45 -0700786 GPR_TIMER_END("begin_worker", 0);
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700787 return worker->state == DESIGNATED_POLLER && !pollset->shutting_down;
Craig Tiller4509c472017-04-27 19:05:13 +0000788}
789
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700790static bool check_neighborhood_for_available_poller(
Craig Tillerbaa14a92017-11-03 09:09:36 -0700791 grpc_exec_ctx* exec_ctx, pollset_neighborhood* neighborhood) {
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700792 GPR_TIMER_BEGIN("check_neighborhood_for_available_poller", 0);
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700793 bool found_worker = false;
794 do {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700795 grpc_pollset* inspect = neighborhood->active_root;
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700796 if (inspect == NULL) {
797 break;
798 }
799 gpr_mu_lock(&inspect->mu);
800 GPR_ASSERT(!inspect->seen_inactive);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700801 grpc_pollset_worker* inspect_worker = inspect->root_worker;
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700802 if (inspect_worker != NULL) {
Craig Tillera4b8eb02017-04-29 00:13:52 +0000803 do {
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700804 switch (inspect_worker->state) {
Craig Tillerba550da2017-05-01 14:26:31 +0000805 case UNKICKED:
806 if (gpr_atm_no_barrier_cas(&g_active_poller, 0,
807 (gpr_atm)inspect_worker)) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700808 if (GRPC_TRACER_ON(grpc_polling_trace)) {
809 gpr_log(GPR_DEBUG, " .. choose next poller to be %p",
810 inspect_worker);
811 }
Craig Tiller55624a32017-05-26 08:14:44 -0700812 SET_KICK_STATE(inspect_worker, DESIGNATED_POLLER);
Craig Tillerba550da2017-05-01 14:26:31 +0000813 if (inspect_worker->initialized_cv) {
yang-gdf92a642017-08-21 22:38:45 -0700814 GPR_TIMER_MARK("signal worker", 0);
Craig Tillercf34fa52017-09-13 12:37:01 -0700815 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tillerba550da2017-05-01 14:26:31 +0000816 gpr_cv_signal(&inspect_worker->cv);
817 }
Craig Tiller830e82a2017-05-31 16:26:27 -0700818 } else {
819 if (GRPC_TRACER_ON(grpc_polling_trace)) {
820 gpr_log(GPR_DEBUG, " .. beaten to choose next poller");
821 }
Craig Tillera4b8eb02017-04-29 00:13:52 +0000822 }
Craig Tillerba550da2017-05-01 14:26:31 +0000823 // even if we didn't win the cas, there's a worker, we can stop
824 found_worker = true;
825 break;
826 case KICKED:
827 break;
828 case DESIGNATED_POLLER:
829 found_worker = true; // ok, so someone else found the worker, but
830 // we'll accept that
831 break;
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700832 }
Craig Tillera4b8eb02017-04-29 00:13:52 +0000833 inspect_worker = inspect_worker->next;
Craig Tiller830e82a2017-05-31 16:26:27 -0700834 } while (!found_worker && inspect_worker != inspect->root_worker);
Craig Tillera4b8eb02017-04-29 00:13:52 +0000835 }
836 if (!found_worker) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700837 if (GRPC_TRACER_ON(grpc_polling_trace)) {
838 gpr_log(GPR_DEBUG, " .. mark pollset %p inactive", inspect);
839 }
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700840 inspect->seen_inactive = true;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700841 if (inspect == neighborhood->active_root) {
842 neighborhood->active_root =
Craig Tillera95bacf2017-05-01 12:51:24 -0700843 inspect->next == inspect ? NULL : inspect->next;
Craig Tiller2acab6e2017-04-30 23:06:33 +0000844 }
845 inspect->next->prev = inspect->prev;
846 inspect->prev->next = inspect->next;
Craig Tillere00d7332017-05-01 15:43:51 +0000847 inspect->next = inspect->prev = NULL;
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700848 }
849 gpr_mu_unlock(&inspect->mu);
850 } while (!found_worker);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700851 GPR_TIMER_END("check_neighborhood_for_available_poller", 0);
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700852 return found_worker;
853}
854
Craig Tillerbaa14a92017-11-03 09:09:36 -0700855static void end_worker(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
856 grpc_pollset_worker* worker,
857 grpc_pollset_worker** worker_hdl) {
yang-gdf92a642017-08-21 22:38:45 -0700858 GPR_TIMER_BEGIN("end_worker", 0);
Craig Tiller830e82a2017-05-31 16:26:27 -0700859 if (GRPC_TRACER_ON(grpc_polling_trace)) {
860 gpr_log(GPR_DEBUG, "PS:%p END_WORKER:%p", pollset, worker);
861 }
Craig Tiller8502ecb2017-04-28 14:22:01 -0700862 if (worker_hdl != NULL) *worker_hdl = NULL;
Craig Tiller830e82a2017-05-31 16:26:27 -0700863 /* Make sure we appear kicked */
Craig Tiller55624a32017-05-26 08:14:44 -0700864 SET_KICK_STATE(worker, KICKED);
Craig Tiller50da5ec2017-05-01 13:51:14 -0700865 grpc_closure_list_move(&worker->schedule_on_end_work,
866 &exec_ctx->closure_list);
Craig Tiller8502ecb2017-04-28 14:22:01 -0700867 if (gpr_atm_no_barrier_load(&g_active_poller) == (gpr_atm)worker) {
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -0700868 if (worker->next != worker && worker->next->state == UNKICKED) {
Craig Tiller830e82a2017-05-31 16:26:27 -0700869 if (GRPC_TRACER_ON(grpc_polling_trace)) {
870 gpr_log(GPR_DEBUG, " .. choose next poller to be peer %p", worker);
871 }
Craig Tiller2acab6e2017-04-30 23:06:33 +0000872 GPR_ASSERT(worker->next->initialized_cv);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700873 gpr_atm_no_barrier_store(&g_active_poller, (gpr_atm)worker->next);
Craig Tiller55624a32017-05-26 08:14:44 -0700874 SET_KICK_STATE(worker->next, DESIGNATED_POLLER);
Craig Tiller1a012bb2017-09-13 14:29:00 -0700875 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700876 gpr_cv_signal(&worker->next->cv);
Craig Tiller8502ecb2017-04-28 14:22:01 -0700877 if (grpc_exec_ctx_has_work(exec_ctx)) {
878 gpr_mu_unlock(&pollset->mu);
879 grpc_exec_ctx_flush(exec_ctx);
880 gpr_mu_lock(&pollset->mu);
881 }
Craig Tiller32f90ee2017-04-28 12:46:41 -0700882 } else {
883 gpr_atm_no_barrier_store(&g_active_poller, 0);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700884 size_t poller_neighborhood_idx =
885 (size_t)(pollset->neighborhood - g_neighborhoods);
Craig Tillerbb742672017-05-17 22:19:05 +0000886 gpr_mu_unlock(&pollset->mu);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700887 bool found_worker = false;
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700888 bool scan_state[MAX_NEIGHBORHOODS];
889 for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700890 pollset_neighborhood* neighborhood =
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700891 &g_neighborhoods[(poller_neighborhood_idx + i) %
892 g_num_neighborhoods];
893 if (gpr_mu_trylock(&neighborhood->mu)) {
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700894 found_worker =
Craig Tiller64f8b122017-09-13 12:39:21 -0700895 check_neighborhood_for_available_poller(exec_ctx, neighborhood);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700896 gpr_mu_unlock(&neighborhood->mu);
Craig Tillerba550da2017-05-01 14:26:31 +0000897 scan_state[i] = true;
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700898 } else {
Craig Tillerba550da2017-05-01 14:26:31 +0000899 scan_state[i] = false;
Craig Tiller32f90ee2017-04-28 12:46:41 -0700900 }
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700901 }
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700902 for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) {
Craig Tillerba550da2017-05-01 14:26:31 +0000903 if (scan_state[i]) continue;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700904 pollset_neighborhood* neighborhood =
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700905 &g_neighborhoods[(poller_neighborhood_idx + i) %
906 g_num_neighborhoods];
907 gpr_mu_lock(&neighborhood->mu);
Craig Tillercf34fa52017-09-13 12:37:01 -0700908 found_worker =
Craig Tiller64f8b122017-09-13 12:39:21 -0700909 check_neighborhood_for_available_poller(exec_ctx, neighborhood);
Vijay Pai4b7ef4d2017-09-11 23:09:22 -0700910 gpr_mu_unlock(&neighborhood->mu);
Craig Tillerbbf4c7a2017-04-28 15:12:10 -0700911 }
Craig Tiller8502ecb2017-04-28 14:22:01 -0700912 grpc_exec_ctx_flush(exec_ctx);
Craig Tiller32f90ee2017-04-28 12:46:41 -0700913 gpr_mu_lock(&pollset->mu);
914 }
Craig Tiller50da5ec2017-05-01 13:51:14 -0700915 } else if (grpc_exec_ctx_has_work(exec_ctx)) {
916 gpr_mu_unlock(&pollset->mu);
917 grpc_exec_ctx_flush(exec_ctx);
918 gpr_mu_lock(&pollset->mu);
Craig Tiller4509c472017-04-27 19:05:13 +0000919 }
920 if (worker->initialized_cv) {
921 gpr_cv_destroy(&worker->cv);
922 }
Craig Tiller830e82a2017-05-31 16:26:27 -0700923 if (GRPC_TRACER_ON(grpc_polling_trace)) {
924 gpr_log(GPR_DEBUG, " .. remove worker");
925 }
Craig Tiller32f90ee2017-04-28 12:46:41 -0700926 if (EMPTIED == worker_remove(pollset, worker)) {
Craig Tiller4509c472017-04-27 19:05:13 +0000927 pollset_maybe_finish_shutdown(exec_ctx, pollset);
928 }
Craig Tillera4b8eb02017-04-29 00:13:52 +0000929 GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker);
yang-gdf92a642017-08-21 22:38:45 -0700930 GPR_TIMER_END("end_worker", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000931}
932
933/* pollset->po.mu lock must be held by the caller before calling this.
934 The function pollset_work() may temporarily release the lock (pollset->po.mu)
935 during the course of its execution but it will always re-acquire the lock and
936 ensure that it is held by the time the function returns */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700937static grpc_error* pollset_work(grpc_exec_ctx* exec_ctx, grpc_pollset* ps,
938 grpc_pollset_worker** worker_hdl,
Craig Tiller20397792017-07-18 11:35:27 -0700939 grpc_millis deadline) {
Craig Tiller4509c472017-04-27 19:05:13 +0000940 grpc_pollset_worker worker;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700941 grpc_error* error = GRPC_ERROR_NONE;
942 static const char* err_desc = "pollset_work";
yang-gdf92a642017-08-21 22:38:45 -0700943 GPR_TIMER_BEGIN("pollset_work", 0);
Sree Kuchibhotlab154cd12017-08-25 10:33:41 -0700944 if (ps->kicked_without_poller) {
945 ps->kicked_without_poller = false;
yang-gdf92a642017-08-21 22:38:45 -0700946 GPR_TIMER_END("pollset_work", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000947 return GRPC_ERROR_NONE;
948 }
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700949
Craig Tillerd9b82bd2017-08-29 12:16:56 -0700950 if (begin_worker(exec_ctx, ps, &worker, worker_hdl, deadline)) {
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700951 gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps);
Craig Tiller4509c472017-04-27 19:05:13 +0000952 gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700953 GPR_ASSERT(!ps->shutting_down);
954 GPR_ASSERT(!ps->seen_inactive);
955
956 gpr_mu_unlock(&ps->mu); /* unlock */
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700957 /* This is the designated polling thread at this point and should ideally do
958 polling. However, if there are unprocessed events left from a previous
959 call to do_epoll_wait(), skip calling epoll_wait() in this iteration and
960 process the pending epoll events.
961
962 The reason for decoupling do_epoll_wait and process_epoll_events is to
963 better distrubute the work (i.e handling epoll events) across multiple
964 threads
965
966 process_epoll_events() returns very quickly: It just queues the work on
967 exec_ctx but does not execute it (the actual exectution or more
968 accurately grpc_exec_ctx_flush() happens in end_worker() AFTER selecting
969 a designated poller). So we are not waiting long periods without a
970 designated poller */
Sree Kuchibhotlaa92a9cc2017-08-27 14:02:15 -0700971 if (gpr_atm_acq_load(&g_epoll_set.cursor) ==
972 gpr_atm_acq_load(&g_epoll_set.num_events)) {
Craig Tillerd9b82bd2017-08-29 12:16:56 -0700973 append_error(&error, do_epoll_wait(exec_ctx, ps, deadline), err_desc);
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700974 }
975 append_error(&error, process_epoll_events(exec_ctx, ps), err_desc);
976
977 gpr_mu_lock(&ps->mu); /* lock */
978
Craig Tiller4509c472017-04-27 19:05:13 +0000979 gpr_tls_set(&g_current_thread_worker, 0);
Craig Tiller830e82a2017-05-31 16:26:27 -0700980 } else {
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700981 gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps);
Craig Tiller4509c472017-04-27 19:05:13 +0000982 }
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -0700983 end_worker(exec_ctx, ps, &worker, worker_hdl);
984
Craig Tiller8502ecb2017-04-28 14:22:01 -0700985 gpr_tls_set(&g_current_thread_pollset, 0);
yang-gdf92a642017-08-21 22:38:45 -0700986 GPR_TIMER_END("pollset_work", 0);
Craig Tiller4509c472017-04-27 19:05:13 +0000987 return error;
988}
989
Craig Tillerbaa14a92017-11-03 09:09:36 -0700990static grpc_error* pollset_kick(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
991 grpc_pollset_worker* specific_worker) {
yang-gdf92a642017-08-21 22:38:45 -0700992 GPR_TIMER_BEGIN("pollset_kick", 0);
Craig Tiller0ff222a2017-09-01 09:41:43 -0700993 GRPC_STATS_INC_POLLSET_KICK(exec_ctx);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700994 grpc_error* ret_err = GRPC_ERROR_NONE;
Craig Tillerb89bac02017-05-26 15:20:32 +0000995 if (GRPC_TRACER_ON(grpc_polling_trace)) {
996 gpr_strvec log;
997 gpr_strvec_init(&log);
Craig Tillerbaa14a92017-11-03 09:09:36 -0700998 char* tmp;
999 gpr_asprintf(&tmp, "PS:%p KICK:%p curps=%p curworker=%p root=%p", pollset,
1000 specific_worker, (void*)gpr_tls_get(&g_current_thread_pollset),
1001 (void*)gpr_tls_get(&g_current_thread_worker),
1002 pollset->root_worker);
Craig Tillerb89bac02017-05-26 15:20:32 +00001003 gpr_strvec_add(&log, tmp);
1004 if (pollset->root_worker != NULL) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001005 gpr_asprintf(&tmp, " {kick_state=%s next=%p {kick_state=%s}}",
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001006 kick_state_string(pollset->root_worker->state),
Craig Tiller830e82a2017-05-31 16:26:27 -07001007 pollset->root_worker->next,
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001008 kick_state_string(pollset->root_worker->next->state));
Craig Tillerb89bac02017-05-26 15:20:32 +00001009 gpr_strvec_add(&log, tmp);
1010 }
1011 if (specific_worker != NULL) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001012 gpr_asprintf(&tmp, " worker_kick_state=%s",
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001013 kick_state_string(specific_worker->state));
Craig Tillerb89bac02017-05-26 15:20:32 +00001014 gpr_strvec_add(&log, tmp);
1015 }
1016 tmp = gpr_strvec_flatten(&log, NULL);
1017 gpr_strvec_destroy(&log);
Craig Tiller830e82a2017-05-31 16:26:27 -07001018 gpr_log(GPR_ERROR, "%s", tmp);
Craig Tillerb89bac02017-05-26 15:20:32 +00001019 gpr_free(tmp);
1020 }
Sree Kuchibhotlafb349402017-09-06 10:58:06 -07001021
Craig Tiller4509c472017-04-27 19:05:13 +00001022 if (specific_worker == NULL) {
1023 if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) {
Craig Tillerbaa14a92017-11-03 09:09:36 -07001024 grpc_pollset_worker* root_worker = pollset->root_worker;
Craig Tiller375eb252017-04-27 23:29:12 +00001025 if (root_worker == NULL) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001026 GRPC_STATS_INC_POLLSET_KICKED_WITHOUT_POLLER(exec_ctx);
Craig Tiller4509c472017-04-27 19:05:13 +00001027 pollset->kicked_without_poller = true;
Craig Tiller75aef7f2017-05-26 08:26:08 -07001028 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001029 gpr_log(GPR_ERROR, " .. kicked_without_poller");
Craig Tiller75aef7f2017-05-26 08:26:08 -07001030 }
yang-gdf92a642017-08-21 22:38:45 -07001031 goto done;
Craig Tiller375eb252017-04-27 23:29:12 +00001032 }
Craig Tillerbaa14a92017-11-03 09:09:36 -07001033 grpc_pollset_worker* next_worker = root_worker->next;
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001034 if (root_worker->state == KICKED) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001035 GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001036 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001037 gpr_log(GPR_ERROR, " .. already kicked %p", root_worker);
1038 }
1039 SET_KICK_STATE(root_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001040 goto done;
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001041 } else if (next_worker->state == KICKED) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001042 GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx);
Craig Tiller830e82a2017-05-31 16:26:27 -07001043 if (GRPC_TRACER_ON(grpc_polling_trace)) {
1044 gpr_log(GPR_ERROR, " .. already kicked %p", next_worker);
1045 }
1046 SET_KICK_STATE(next_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001047 goto done;
Craig Tiller830e82a2017-05-31 16:26:27 -07001048 } else if (root_worker ==
1049 next_worker && // only try and wake up a poller if
1050 // there is no next worker
Craig Tillerbaa14a92017-11-03 09:09:36 -07001051 root_worker == (grpc_pollset_worker*)gpr_atm_no_barrier_load(
Craig Tiller830e82a2017-05-31 16:26:27 -07001052 &g_active_poller)) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001053 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx);
Craig Tiller830e82a2017-05-31 16:26:27 -07001054 if (GRPC_TRACER_ON(grpc_polling_trace)) {
1055 gpr_log(GPR_ERROR, " .. kicked %p", root_worker);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001056 }
Craig Tiller55624a32017-05-26 08:14:44 -07001057 SET_KICK_STATE(root_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001058 ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd);
1059 goto done;
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001060 } else if (next_worker->state == UNKICKED) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001061 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001062 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001063 gpr_log(GPR_ERROR, " .. kicked %p", next_worker);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001064 }
Craig Tiller8502ecb2017-04-28 14:22:01 -07001065 GPR_ASSERT(next_worker->initialized_cv);
Craig Tiller55624a32017-05-26 08:14:44 -07001066 SET_KICK_STATE(next_worker, KICKED);
Craig Tiller375eb252017-04-27 23:29:12 +00001067 gpr_cv_signal(&next_worker->cv);
yang-gdf92a642017-08-21 22:38:45 -07001068 goto done;
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001069 } else if (next_worker->state == DESIGNATED_POLLER) {
1070 if (root_worker->state != DESIGNATED_POLLER) {
Craig Tiller75aef7f2017-05-26 08:26:08 -07001071 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001072 gpr_log(
1073 GPR_ERROR,
1074 " .. kicked root non-poller %p (initialized_cv=%d) (poller=%p)",
1075 root_worker, root_worker->initialized_cv, next_worker);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001076 }
Craig Tiller55624a32017-05-26 08:14:44 -07001077 SET_KICK_STATE(root_worker, KICKED);
1078 if (root_worker->initialized_cv) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001079 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tiller55624a32017-05-26 08:14:44 -07001080 gpr_cv_signal(&root_worker->cv);
1081 }
yang-gdf92a642017-08-21 22:38:45 -07001082 goto done;
Craig Tiller55624a32017-05-26 08:14:44 -07001083 } else {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001084 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001085 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001086 gpr_log(GPR_ERROR, " .. non-root poller %p (root=%p)", next_worker,
Craig Tiller75aef7f2017-05-26 08:26:08 -07001087 root_worker);
1088 }
Craig Tiller55624a32017-05-26 08:14:44 -07001089 SET_KICK_STATE(next_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001090 ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd);
1091 goto done;
Craig Tiller55624a32017-05-26 08:14:44 -07001092 }
Craig Tiller8502ecb2017-04-28 14:22:01 -07001093 } else {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001094 GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx);
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001095 GPR_ASSERT(next_worker->state == KICKED);
Craig Tiller55624a32017-05-26 08:14:44 -07001096 SET_KICK_STATE(next_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001097 goto done;
Craig Tiller4509c472017-04-27 19:05:13 +00001098 }
1099 } else {
Craig Tiller1a012bb2017-09-13 14:29:00 -07001100 GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx);
Craig Tiller830e82a2017-05-31 16:26:27 -07001101 if (GRPC_TRACER_ON(grpc_polling_trace)) {
1102 gpr_log(GPR_ERROR, " .. kicked while waking up");
1103 }
yang-gdf92a642017-08-21 22:38:45 -07001104 goto done;
Craig Tiller4509c472017-04-27 19:05:13 +00001105 }
Sree Kuchibhotlafb349402017-09-06 10:58:06 -07001106
1107 GPR_UNREACHABLE_CODE(goto done);
1108 }
1109
Yash Tibrewalb2a54ac2017-09-13 10:18:07 -07001110 if (specific_worker->state == KICKED) {
Craig Tiller75aef7f2017-05-26 08:26:08 -07001111 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001112 gpr_log(GPR_ERROR, " .. specific worker already kicked");
Craig Tiller75aef7f2017-05-26 08:26:08 -07001113 }
yang-gdf92a642017-08-21 22:38:45 -07001114 goto done;
Craig Tiller4509c472017-04-27 19:05:13 +00001115 } else if (gpr_tls_get(&g_current_thread_worker) ==
1116 (intptr_t)specific_worker) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001117 GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001118 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001119 gpr_log(GPR_ERROR, " .. mark %p kicked", specific_worker);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001120 }
Craig Tiller55624a32017-05-26 08:14:44 -07001121 SET_KICK_STATE(specific_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001122 goto done;
Craig Tiller32f90ee2017-04-28 12:46:41 -07001123 } else if (specific_worker ==
Craig Tillerbaa14a92017-11-03 09:09:36 -07001124 (grpc_pollset_worker*)gpr_atm_no_barrier_load(&g_active_poller)) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001125 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001126 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001127 gpr_log(GPR_ERROR, " .. kick active poller");
Craig Tiller75aef7f2017-05-26 08:26:08 -07001128 }
Craig Tiller55624a32017-05-26 08:14:44 -07001129 SET_KICK_STATE(specific_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001130 ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd);
1131 goto done;
Craig Tiller8502ecb2017-04-28 14:22:01 -07001132 } else if (specific_worker->initialized_cv) {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001133 GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001134 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001135 gpr_log(GPR_ERROR, " .. kick waiting worker");
Craig Tiller75aef7f2017-05-26 08:26:08 -07001136 }
Craig Tiller55624a32017-05-26 08:14:44 -07001137 SET_KICK_STATE(specific_worker, KICKED);
Craig Tiller4509c472017-04-27 19:05:13 +00001138 gpr_cv_signal(&specific_worker->cv);
yang-gdf92a642017-08-21 22:38:45 -07001139 goto done;
Craig Tiller8502ecb2017-04-28 14:22:01 -07001140 } else {
Craig Tiller0ff222a2017-09-01 09:41:43 -07001141 GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx);
Craig Tiller75aef7f2017-05-26 08:26:08 -07001142 if (GRPC_TRACER_ON(grpc_polling_trace)) {
Craig Tiller830e82a2017-05-31 16:26:27 -07001143 gpr_log(GPR_ERROR, " .. kick non-waiting worker");
Craig Tiller75aef7f2017-05-26 08:26:08 -07001144 }
Craig Tiller55624a32017-05-26 08:14:44 -07001145 SET_KICK_STATE(specific_worker, KICKED);
yang-gdf92a642017-08-21 22:38:45 -07001146 goto done;
Craig Tiller4509c472017-04-27 19:05:13 +00001147 }
yang-gdf92a642017-08-21 22:38:45 -07001148done:
1149 GPR_TIMER_END("pollset_kick", 0);
1150 return ret_err;
Craig Tiller4509c472017-04-27 19:05:13 +00001151}
1152
Craig Tillerbaa14a92017-11-03 09:09:36 -07001153static void pollset_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset* pollset,
1154 grpc_fd* fd) {}
Craig Tiller4509c472017-04-27 19:05:13 +00001155
Craig Tiller4509c472017-04-27 19:05:13 +00001156/*******************************************************************************
Craig Tillerc67cc992017-04-27 10:15:51 -07001157 * Pollset-set Definitions
1158 */
1159
Craig Tillerbaa14a92017-11-03 09:09:36 -07001160static grpc_pollset_set* pollset_set_create(void) {
1161 return (grpc_pollset_set*)((intptr_t)0xdeafbeef);
Craig Tillerc67cc992017-04-27 10:15:51 -07001162}
1163
Craig Tillerbaa14a92017-11-03 09:09:36 -07001164static void pollset_set_destroy(grpc_exec_ctx* exec_ctx,
1165 grpc_pollset_set* pss) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001166
Craig Tillerbaa14a92017-11-03 09:09:36 -07001167static void pollset_set_add_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1168 grpc_fd* fd) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001169
Craig Tillerbaa14a92017-11-03 09:09:36 -07001170static void pollset_set_del_fd(grpc_exec_ctx* exec_ctx, grpc_pollset_set* pss,
1171 grpc_fd* fd) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001172
Craig Tillerbaa14a92017-11-03 09:09:36 -07001173static void pollset_set_add_pollset(grpc_exec_ctx* exec_ctx,
1174 grpc_pollset_set* pss, grpc_pollset* ps) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001175
Craig Tillerbaa14a92017-11-03 09:09:36 -07001176static void pollset_set_del_pollset(grpc_exec_ctx* exec_ctx,
1177 grpc_pollset_set* pss, grpc_pollset* ps) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001178
Craig Tillerbaa14a92017-11-03 09:09:36 -07001179static void pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx,
1180 grpc_pollset_set* bag,
1181 grpc_pollset_set* item) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001182
Craig Tillerbaa14a92017-11-03 09:09:36 -07001183static void pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx,
1184 grpc_pollset_set* bag,
1185 grpc_pollset_set* item) {}
Craig Tillerc67cc992017-04-27 10:15:51 -07001186
1187/*******************************************************************************
1188 * Event engine binding
1189 */
1190
1191static void shutdown_engine(void) {
1192 fd_global_shutdown();
1193 pollset_global_shutdown();
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -07001194 epoll_set_shutdown();
Craig Tillerc67cc992017-04-27 10:15:51 -07001195}
1196
1197static const grpc_event_engine_vtable vtable = {
Yash Tibrewal533d1182017-09-18 10:48:22 -07001198 sizeof(grpc_pollset),
Craig Tillerc67cc992017-04-27 10:15:51 -07001199
Yash Tibrewal533d1182017-09-18 10:48:22 -07001200 fd_create,
1201 fd_wrapped_fd,
1202 fd_orphan,
1203 fd_shutdown,
1204 fd_notify_on_read,
1205 fd_notify_on_write,
1206 fd_is_shutdown,
1207 fd_get_read_notifier_pollset,
Craig Tillerc67cc992017-04-27 10:15:51 -07001208
Yash Tibrewal533d1182017-09-18 10:48:22 -07001209 pollset_init,
1210 pollset_shutdown,
1211 pollset_destroy,
1212 pollset_work,
1213 pollset_kick,
1214 pollset_add_fd,
Craig Tillerc67cc992017-04-27 10:15:51 -07001215
Yash Tibrewal533d1182017-09-18 10:48:22 -07001216 pollset_set_create,
1217 pollset_set_destroy,
1218 pollset_set_add_pollset,
1219 pollset_set_del_pollset,
1220 pollset_set_add_pollset_set,
1221 pollset_set_del_pollset_set,
1222 pollset_set_add_fd,
1223 pollset_set_del_fd,
Craig Tillerc67cc992017-04-27 10:15:51 -07001224
Yash Tibrewal533d1182017-09-18 10:48:22 -07001225 shutdown_engine,
Craig Tillerc67cc992017-04-27 10:15:51 -07001226};
1227
1228/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -07001229 * Create epoll_fd (epoll_set_init() takes care of that) to make sure epoll
1230 * support is available */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001231const grpc_event_engine_vtable* grpc_init_epoll1_linux(bool explicit_request) {
Craig Tillerc67cc992017-04-27 10:15:51 -07001232 if (!grpc_has_wakeup_fd()) {
1233 return NULL;
1234 }
1235
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -07001236 if (!epoll_set_init()) {
Craig Tillerc67cc992017-04-27 10:15:51 -07001237 return NULL;
1238 }
1239
Craig Tillerc67cc992017-04-27 10:15:51 -07001240 fd_global_init();
1241
1242 if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
Craig Tiller4509c472017-04-27 19:05:13 +00001243 fd_global_shutdown();
Sree Kuchibhotla5efc9132017-08-17 14:10:38 -07001244 epoll_set_shutdown();
Craig Tillerc67cc992017-04-27 10:15:51 -07001245 return NULL;
1246 }
1247
1248 return &vtable;
1249}
1250
1251#else /* defined(GRPC_LINUX_EPOLL) */
1252#if defined(GRPC_POSIX_SOCKET)
Yash Tibrewal1cac2232017-09-26 11:31:11 -07001253#include "src/core/lib/iomgr/ev_epoll1_linux.h"
Craig Tillerc67cc992017-04-27 10:15:51 -07001254/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
1255 * NULL */
Craig Tillerbaa14a92017-11-03 09:09:36 -07001256const grpc_event_engine_vtable* grpc_init_epoll1_linux(bool explicit_request) {
Craig Tiller9ddb3152017-04-27 21:32:56 +00001257 return NULL;
1258}
Craig Tillerc67cc992017-04-27 10:15:51 -07001259#endif /* defined(GRPC_POSIX_SOCKET) */
1260#endif /* !defined(GRPC_LINUX_EPOLL) */