blob: 55ce9c4c648ac21f927f04419ae0a402bbc01a7f [file] [log] [blame]
Peter Collingbourne594c10d2014-11-27 00:12:26 +00001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
6
7package net
8
9#include "runtime.h"
10#include "defs.h"
11#include "arch.h"
12#include "malloc.h"
13
14// Map gccgo field names to gc field names.
15// Eface aka __go_empty_interface.
16#define type __type_descriptor
17#define data __object
18
19// Integrated network poller (platform-independent part).
20// A particular implementation (epoll/kqueue) must define the following functions:
21// void runtime_netpollinit(void); // to initialize the poller
22// int32 runtime_netpollopen(uintptr fd, PollDesc *pd); // to arm edge-triggered notifications
23 // and associate fd with pd.
24// An implementation must call the following function to denote that the pd is ready.
25// void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode);
26
27// PollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
28// goroutines respectively. The semaphore can be in the following states:
29// READY - io readiness notification is pending;
30// a goroutine consumes the notification by changing the state to nil.
31// WAIT - a goroutine prepares to park on the semaphore, but not yet parked;
32// the goroutine commits to park by changing the state to G pointer,
33// or, alternatively, concurrent io notification changes the state to READY,
34// or, alternatively, concurrent timeout/close changes the state to nil.
35// G pointer - the goroutine is blocked on the semaphore;
36// io notification or timeout/close changes the state to READY or nil respectively
37// and unparks the goroutine.
38// nil - nothing of the above.
39#define READY ((G*)1)
40#define WAIT ((G*)2)
41
42enum
43{
44 PollBlockSize = 4*1024,
45};
46
47struct PollDesc
48{
49 PollDesc* link; // in pollcache, protected by pollcache.Lock
50
51 // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
52 // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
53 // pollReset, pollWait, pollWaitCanceled and runtime_netpollready (IO rediness notification)
54 // proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated
55 // in a lock-free way by all operations.
56 Lock lock; // protectes the following fields
57 uintptr fd;
58 bool closing;
59 uintptr seq; // protects from stale timers and ready notifications
60 G* rg; // READY, WAIT, G waiting for read or nil
61 Timer rt; // read deadline timer (set if rt.fv != nil)
62 int64 rd; // read deadline
63 G* wg; // READY, WAIT, G waiting for write or nil
64 Timer wt; // write deadline timer
65 int64 wd; // write deadline
66 void* user; // user settable cookie
67};
68
69static struct
70{
71 Lock lock;
72 PollDesc* first;
73 // PollDesc objects must be type-stable,
74 // because we can get ready notification from epoll/kqueue
75 // after the descriptor is closed/reused.
76 // Stale notifications are detected using seq variable,
77 // seq is incremented when deadlines are changed or descriptor is reused.
78} pollcache;
79
80static bool netpollblock(PollDesc*, int32, bool);
81static G* netpollunblock(PollDesc*, int32, bool);
Peter Collingbourne93c73eb2015-04-05 23:30:42 +000082static void deadline(Eface, uintptr);
83static void readDeadline(Eface, uintptr);
84static void writeDeadline(Eface, uintptr);
Peter Collingbourne594c10d2014-11-27 00:12:26 +000085static PollDesc* allocPollDesc(void);
86static intgo checkerr(PollDesc *pd, int32 mode);
87
88static FuncVal deadlineFn = {(void(*)(void))deadline};
89static FuncVal readDeadlineFn = {(void(*)(void))readDeadline};
90static FuncVal writeDeadlineFn = {(void(*)(void))writeDeadline};
91
92// runtimeNano returns the current value of the runtime clock in nanoseconds.
93func runtimeNano() (ns int64) {
94 ns = runtime_nanotime();
95}
96
97func runtime_pollServerInit() {
98 runtime_netpollinit();
99}
100
101func runtime_pollOpen(fd uintptr) (pd *PollDesc, errno int) {
102 pd = allocPollDesc();
103 runtime_lock(&pd->lock);
104 if(pd->wg != nil && pd->wg != READY)
105 runtime_throw("runtime_pollOpen: blocked write on free descriptor");
106 if(pd->rg != nil && pd->rg != READY)
107 runtime_throw("runtime_pollOpen: blocked read on free descriptor");
108 pd->fd = fd;
109 pd->closing = false;
110 pd->seq++;
111 pd->rg = nil;
112 pd->rd = 0;
113 pd->wg = nil;
114 pd->wd = 0;
115 runtime_unlock(&pd->lock);
116
117 errno = runtime_netpollopen(fd, pd);
118}
119
120func runtime_pollClose(pd *PollDesc) {
121 if(!pd->closing)
122 runtime_throw("runtime_pollClose: close w/o unblock");
123 if(pd->wg != nil && pd->wg != READY)
124 runtime_throw("runtime_pollClose: blocked write on closing descriptor");
125 if(pd->rg != nil && pd->rg != READY)
126 runtime_throw("runtime_pollClose: blocked read on closing descriptor");
127 runtime_netpollclose(pd->fd);
128 runtime_lock(&pollcache.lock);
129 pd->link = pollcache.first;
130 pollcache.first = pd;
131 runtime_unlock(&pollcache.lock);
132}
133
134func runtime_pollReset(pd *PollDesc, mode int) (err int) {
135 err = checkerr(pd, mode);
136 if(err)
137 goto ret;
138 if(mode == 'r')
139 pd->rg = nil;
140 else if(mode == 'w')
141 pd->wg = nil;
142ret:
143}
144
145func runtime_pollWait(pd *PollDesc, mode int) (err int) {
146 err = checkerr(pd, mode);
147 if(err == 0) {
148 // As for now only Solaris uses level-triggered IO.
149 if(Solaris)
150 runtime_netpollarm(pd, mode);
151 while(!netpollblock(pd, mode, false)) {
152 err = checkerr(pd, mode);
153 if(err != 0)
154 break;
155 // Can happen if timeout has fired and unblocked us,
156 // but before we had a chance to run, timeout has been reset.
157 // Pretend it has not happened and retry.
158 }
159 }
160}
161
162func runtime_pollWaitCanceled(pd *PollDesc, mode int) {
163 // This function is used only on windows after a failed attempt to cancel
164 // a pending async IO operation. Wait for ioready, ignore closing or timeouts.
165 while(!netpollblock(pd, mode, true))
166 ;
167}
168
169func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
170 G *rg, *wg;
171
172 runtime_lock(&pd->lock);
173 if(pd->closing) {
174 runtime_unlock(&pd->lock);
175 return;
176 }
177 pd->seq++; // invalidate current timers
178 // Reset current timers.
179 if(pd->rt.fv) {
180 runtime_deltimer(&pd->rt);
181 pd->rt.fv = nil;
182 }
183 if(pd->wt.fv) {
184 runtime_deltimer(&pd->wt);
185 pd->wt.fv = nil;
186 }
187 // Setup new timers.
188 if(d != 0 && d <= runtime_nanotime())
189 d = -1;
190 if(mode == 'r' || mode == 'r'+'w')
191 pd->rd = d;
192 if(mode == 'w' || mode == 'r'+'w')
193 pd->wd = d;
194 if(pd->rd > 0 && pd->rd == pd->wd) {
195 pd->rt.fv = &deadlineFn;
196 pd->rt.when = pd->rd;
197 // Copy current seq into the timer arg.
198 // Timer func will check the seq against current descriptor seq,
199 // if they differ the descriptor was reused or timers were reset.
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000200 pd->rt.arg.type = nil; // should be *pollDesc type descriptor.
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000201 pd->rt.arg.data = pd;
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000202 pd->rt.seq = pd->seq;
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000203 runtime_addtimer(&pd->rt);
204 } else {
205 if(pd->rd > 0) {
206 pd->rt.fv = &readDeadlineFn;
207 pd->rt.when = pd->rd;
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000208 pd->rt.arg.type = nil; // should be *pollDesc type descriptor.
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000209 pd->rt.arg.data = pd;
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000210 pd->rt.seq = pd->seq;
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000211 runtime_addtimer(&pd->rt);
212 }
213 if(pd->wd > 0) {
214 pd->wt.fv = &writeDeadlineFn;
215 pd->wt.when = pd->wd;
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000216 pd->wt.arg.type = nil; // should be *pollDesc type descriptor.
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000217 pd->wt.arg.data = pd;
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000218 pd->wt.seq = pd->seq;
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000219 runtime_addtimer(&pd->wt);
220 }
221 }
222 // If we set the new deadline in the past, unblock currently pending IO if any.
223 rg = nil;
224 runtime_atomicstorep(&wg, nil); // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
225 if(pd->rd < 0)
226 rg = netpollunblock(pd, 'r', false);
227 if(pd->wd < 0)
228 wg = netpollunblock(pd, 'w', false);
229 runtime_unlock(&pd->lock);
230 if(rg)
231 runtime_ready(rg);
232 if(wg)
233 runtime_ready(wg);
234}
235
236func runtime_pollUnblock(pd *PollDesc) {
237 G *rg, *wg;
238
239 runtime_lock(&pd->lock);
240 if(pd->closing)
241 runtime_throw("runtime_pollUnblock: already closing");
242 pd->closing = true;
243 pd->seq++;
244 runtime_atomicstorep(&rg, nil); // full memory barrier between store to closing and read of rg/wg in netpollunblock
245 rg = netpollunblock(pd, 'r', false);
246 wg = netpollunblock(pd, 'w', false);
247 if(pd->rt.fv) {
248 runtime_deltimer(&pd->rt);
249 pd->rt.fv = nil;
250 }
251 if(pd->wt.fv) {
252 runtime_deltimer(&pd->wt);
253 pd->wt.fv = nil;
254 }
255 runtime_unlock(&pd->lock);
256 if(rg)
257 runtime_ready(rg);
258 if(wg)
259 runtime_ready(wg);
260}
261
262uintptr
263runtime_netpollfd(PollDesc *pd)
264{
265 return pd->fd;
266}
267
268void**
269runtime_netpolluser(PollDesc *pd)
270{
271 return &pd->user;
272}
273
274bool
275runtime_netpollclosing(PollDesc *pd)
276{
277 return pd->closing;
278}
279
280void
281runtime_netpolllock(PollDesc *pd)
282{
283 runtime_lock(&pd->lock);
284}
285
286void
287runtime_netpollunlock(PollDesc *pd)
288{
289 runtime_unlock(&pd->lock);
290}
291
292// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
293void
294runtime_netpollready(G **gpp, PollDesc *pd, int32 mode)
295{
296 G *rg, *wg;
297
298 rg = wg = nil;
299 if(mode == 'r' || mode == 'r'+'w')
300 rg = netpollunblock(pd, 'r', true);
301 if(mode == 'w' || mode == 'r'+'w')
302 wg = netpollunblock(pd, 'w', true);
303 if(rg) {
304 rg->schedlink = *gpp;
305 *gpp = rg;
306 }
307 if(wg) {
308 wg->schedlink = *gpp;
309 *gpp = wg;
310 }
311}
312
313static intgo
314checkerr(PollDesc *pd, int32 mode)
315{
316 if(pd->closing)
317 return 1; // errClosing
318 if((mode == 'r' && pd->rd < 0) || (mode == 'w' && pd->wd < 0))
319 return 2; // errTimeout
320 return 0;
321}
322
323static bool
324blockcommit(G *gp, G **gpp)
325{
326 return runtime_casp(gpp, WAIT, gp);
327}
328
329// returns true if IO is ready, or false if timedout or closed
330// waitio - wait only for completed IO, ignore errors
331static bool
332netpollblock(PollDesc *pd, int32 mode, bool waitio)
333{
334 G **gpp, *old;
335
336 gpp = &pd->rg;
337 if(mode == 'w')
338 gpp = &pd->wg;
339
340 // set the gpp semaphore to WAIT
341 for(;;) {
342 old = *gpp;
343 if(old == READY) {
344 *gpp = nil;
345 return true;
346 }
347 if(old != nil)
348 runtime_throw("netpollblock: double wait");
349 if(runtime_casp(gpp, nil, WAIT))
350 break;
351 }
352
353 // need to recheck error states after setting gpp to WAIT
354 // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
355 // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
356 if(waitio || checkerr(pd, mode) == 0)
357 runtime_park((bool(*)(G*, void*))blockcommit, gpp, "IO wait");
358 // be careful to not lose concurrent READY notification
359 old = runtime_xchgp(gpp, nil);
360 if(old > WAIT)
361 runtime_throw("netpollblock: corrupted state");
362 return old == READY;
363}
364
365static G*
366netpollunblock(PollDesc *pd, int32 mode, bool ioready)
367{
368 G **gpp, *old, *new;
369
370 gpp = &pd->rg;
371 if(mode == 'w')
372 gpp = &pd->wg;
373
374 for(;;) {
375 old = *gpp;
376 if(old == READY)
377 return nil;
378 if(old == nil && !ioready) {
379 // Only set READY for ioready. runtime_pollWait
380 // will check for timeout/cancel before waiting.
381 return nil;
382 }
383 new = nil;
384 if(ioready)
385 new = READY;
386 if(runtime_casp(gpp, old, new))
387 break;
388 }
389 if(old > WAIT)
390 return old; // must be G*
391 return nil;
392}
393
394static void
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000395deadlineimpl(Eface arg, uintptr seq, bool read, bool write)
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000396{
397 PollDesc *pd;
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000398 G *rg, *wg;
399
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000400 pd = (PollDesc*)arg.data;
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000401 rg = wg = nil;
402 runtime_lock(&pd->lock);
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000403 // Seq arg is seq when the timer was set.
404 // If it's stale, ignore the timer event.
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000405 if(seq != pd->seq) {
406 // The descriptor was reused or timers were reset.
407 runtime_unlock(&pd->lock);
408 return;
409 }
410 if(read) {
411 if(pd->rd <= 0 || pd->rt.fv == nil)
412 runtime_throw("deadlineimpl: inconsistent read deadline");
413 pd->rd = -1;
414 runtime_atomicstorep(&pd->rt.fv, nil); // full memory barrier between store to rd and load of rg in netpollunblock
415 rg = netpollunblock(pd, 'r', false);
416 }
417 if(write) {
418 if(pd->wd <= 0 || (pd->wt.fv == nil && !read))
419 runtime_throw("deadlineimpl: inconsistent write deadline");
420 pd->wd = -1;
421 runtime_atomicstorep(&pd->wt.fv, nil); // full memory barrier between store to wd and load of wg in netpollunblock
422 wg = netpollunblock(pd, 'w', false);
423 }
424 runtime_unlock(&pd->lock);
425 if(rg)
426 runtime_ready(rg);
427 if(wg)
428 runtime_ready(wg);
429}
430
431static void
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000432deadline(Eface arg, uintptr seq)
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000433{
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000434 deadlineimpl(arg, seq, true, true);
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000435}
436
437static void
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000438readDeadline(Eface arg, uintptr seq)
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000439{
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000440 deadlineimpl(arg, seq, true, false);
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000441}
442
443static void
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000444writeDeadline(Eface arg, uintptr seq)
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000445{
Peter Collingbourne93c73eb2015-04-05 23:30:42 +0000446 deadlineimpl(arg, seq, false, true);
Peter Collingbourne594c10d2014-11-27 00:12:26 +0000447}
448
449static PollDesc*
450allocPollDesc(void)
451{
452 PollDesc *pd;
453 uint32 i, n;
454
455 runtime_lock(&pollcache.lock);
456 if(pollcache.first == nil) {
457 n = PollBlockSize/sizeof(*pd);
458 if(n == 0)
459 n = 1;
460 // Must be in non-GC memory because can be referenced
461 // only from epoll/kqueue internals.
462 pd = runtime_persistentalloc(n*sizeof(*pd), 0, &mstats.other_sys);
463 for(i = 0; i < n; i++) {
464 pd[i].link = pollcache.first;
465 pollcache.first = &pd[i];
466 }
467 }
468 pd = pollcache.first;
469 pollcache.first = pd->link;
470 runtime_unlock(&pollcache.lock);
471 return pd;
472}