blob: 7965390ee66be91217b90de9bf0c6ce30538e4ff [file] [log] [blame]
Christopher Wileye8679812015-07-01 13:36:18 -07001/*
2 * Copyright (c) 2007-2012 Niels Provos and Nick Mathewson
3 * Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30#include <limits.h>
31#include <string.h>
32#include <stdlib.h>
33
34#include "event2/event.h"
35#include "event2/event_struct.h"
36#include "event2/util.h"
37#include "event2/bufferevent.h"
38#include "event2/bufferevent_struct.h"
39#include "event2/buffer.h"
40
41#include "ratelim-internal.h"
42
43#include "bufferevent-internal.h"
44#include "mm-internal.h"
45#include "util-internal.h"
46#include "event-internal.h"
47
48int
49ev_token_bucket_init(struct ev_token_bucket *bucket,
50 const struct ev_token_bucket_cfg *cfg,
51 ev_uint32_t current_tick,
52 int reinitialize)
53{
54 if (reinitialize) {
55 /* on reinitialization, we only clip downwards, since we've
56 already used who-knows-how-much bandwidth this tick. We
57 leave "last_updated" as it is; the next update will add the
58 appropriate amount of bandwidth to the bucket.
59 */
60 if (bucket->read_limit > (ev_int64_t) cfg->read_maximum)
61 bucket->read_limit = cfg->read_maximum;
62 if (bucket->write_limit > (ev_int64_t) cfg->write_maximum)
63 bucket->write_limit = cfg->write_maximum;
64 } else {
65 bucket->read_limit = cfg->read_rate;
66 bucket->write_limit = cfg->write_rate;
67 bucket->last_updated = current_tick;
68 }
69 return 0;
70}
71
72int
73ev_token_bucket_update(struct ev_token_bucket *bucket,
74 const struct ev_token_bucket_cfg *cfg,
75 ev_uint32_t current_tick)
76{
77 /* It's okay if the tick number overflows, since we'll just
78 * wrap around when we do the unsigned substraction. */
79 unsigned n_ticks = current_tick - bucket->last_updated;
80
81 /* Make sure some ticks actually happened, and that time didn't
82 * roll back. */
83 if (n_ticks == 0 || n_ticks > INT_MAX)
84 return 0;
85
86 /* Naively, we would say
87 bucket->limit += n_ticks * cfg->rate;
88
89 if (bucket->limit > cfg->maximum)
90 bucket->limit = cfg->maximum;
91
92 But we're worried about overflow, so we do it like this:
93 */
94
95 if ((cfg->read_maximum - bucket->read_limit) / n_ticks < cfg->read_rate)
96 bucket->read_limit = cfg->read_maximum;
97 else
98 bucket->read_limit += n_ticks * cfg->read_rate;
99
100
101 if ((cfg->write_maximum - bucket->write_limit) / n_ticks < cfg->write_rate)
102 bucket->write_limit = cfg->write_maximum;
103 else
104 bucket->write_limit += n_ticks * cfg->write_rate;
105
106
107 bucket->last_updated = current_tick;
108
109 return 1;
110}
111
112static inline void
113bufferevent_update_buckets(struct bufferevent_private *bev)
114{
115 /* Must hold lock on bev. */
116 struct timeval now;
117 unsigned tick;
118 event_base_gettimeofday_cached(bev->bev.ev_base, &now);
119 tick = ev_token_bucket_get_tick(&now, bev->rate_limiting->cfg);
120 if (tick != bev->rate_limiting->limit.last_updated)
121 ev_token_bucket_update(&bev->rate_limiting->limit,
122 bev->rate_limiting->cfg, tick);
123}
124
125ev_uint32_t
126ev_token_bucket_get_tick(const struct timeval *tv,
127 const struct ev_token_bucket_cfg *cfg)
128{
129 /* This computation uses two multiplies and a divide. We could do
130 * fewer if we knew that the tick length was an integer number of
131 * seconds, or if we knew it divided evenly into a second. We should
132 * investigate that more.
133 */
134
135 /* We cast to an ev_uint64_t first, since we don't want to overflow
136 * before we do the final divide. */
137 ev_uint64_t msec = (ev_uint64_t)tv->tv_sec * 1000 + tv->tv_usec / 1000;
138 return (unsigned)(msec / cfg->msec_per_tick);
139}
140
141struct ev_token_bucket_cfg *
142ev_token_bucket_cfg_new(size_t read_rate, size_t read_burst,
143 size_t write_rate, size_t write_burst,
144 const struct timeval *tick_len)
145{
146 struct ev_token_bucket_cfg *r;
147 struct timeval g;
148 if (! tick_len) {
149 g.tv_sec = 1;
150 g.tv_usec = 0;
151 tick_len = &g;
152 }
153 if (read_rate > read_burst || write_rate > write_burst ||
154 read_rate < 1 || write_rate < 1)
155 return NULL;
156 if (read_rate > EV_RATE_LIMIT_MAX ||
157 write_rate > EV_RATE_LIMIT_MAX ||
158 read_burst > EV_RATE_LIMIT_MAX ||
159 write_burst > EV_RATE_LIMIT_MAX)
160 return NULL;
161 r = mm_calloc(1, sizeof(struct ev_token_bucket_cfg));
162 if (!r)
163 return NULL;
164 r->read_rate = read_rate;
165 r->write_rate = write_rate;
166 r->read_maximum = read_burst;
167 r->write_maximum = write_burst;
168 memcpy(&r->tick_timeout, tick_len, sizeof(struct timeval));
169 r->msec_per_tick = (tick_len->tv_sec * 1000) +
170 (tick_len->tv_usec & COMMON_TIMEOUT_MICROSECONDS_MASK)/1000;
171 return r;
172}
173
174void
175ev_token_bucket_cfg_free(struct ev_token_bucket_cfg *cfg)
176{
177 mm_free(cfg);
178}
179
180/* No matter how big our bucket gets, don't try to read more than this
181 * much in a single read operation. */
182#define MAX_TO_READ_EVER 16384
183/* No matter how big our bucket gets, don't try to write more than this
184 * much in a single write operation. */
185#define MAX_TO_WRITE_EVER 16384
186
187#define LOCK_GROUP(g) EVLOCK_LOCK((g)->lock, 0)
188#define UNLOCK_GROUP(g) EVLOCK_UNLOCK((g)->lock, 0)
189
190static int _bev_group_suspend_reading(struct bufferevent_rate_limit_group *g);
191static int _bev_group_suspend_writing(struct bufferevent_rate_limit_group *g);
192static void _bev_group_unsuspend_reading(struct bufferevent_rate_limit_group *g);
193static void _bev_group_unsuspend_writing(struct bufferevent_rate_limit_group *g);
194
195/** Helper: figure out the maximum amount we should write if is_write, or
196 the maximum amount we should read if is_read. Return that maximum, or
197 0 if our bucket is wholly exhausted.
198 */
199static inline ev_ssize_t
200_bufferevent_get_rlim_max(struct bufferevent_private *bev, int is_write)
201{
202 /* needs lock on bev. */
203 ev_ssize_t max_so_far = is_write?MAX_TO_WRITE_EVER:MAX_TO_READ_EVER;
204
205#define LIM(x) \
206 (is_write ? (x).write_limit : (x).read_limit)
207
208#define GROUP_SUSPENDED(g) \
209 (is_write ? (g)->write_suspended : (g)->read_suspended)
210
211 /* Sets max_so_far to MIN(x, max_so_far) */
212#define CLAMPTO(x) \
213 do { \
214 if (max_so_far > (x)) \
215 max_so_far = (x); \
216 } while (0);
217
218 if (!bev->rate_limiting)
219 return max_so_far;
220
221 /* If rate-limiting is enabled at all, update the appropriate
222 bucket, and take the smaller of our rate limit and the group
223 rate limit.
224 */
225
226 if (bev->rate_limiting->cfg) {
227 bufferevent_update_buckets(bev);
228 max_so_far = LIM(bev->rate_limiting->limit);
229 }
230 if (bev->rate_limiting->group) {
231 struct bufferevent_rate_limit_group *g =
232 bev->rate_limiting->group;
233 ev_ssize_t share;
234 LOCK_GROUP(g);
235 if (GROUP_SUSPENDED(g)) {
236 /* We can get here if we failed to lock this
237 * particular bufferevent while suspending the whole
238 * group. */
239 if (is_write)
240 bufferevent_suspend_write(&bev->bev,
241 BEV_SUSPEND_BW_GROUP);
242 else
243 bufferevent_suspend_read(&bev->bev,
244 BEV_SUSPEND_BW_GROUP);
245 share = 0;
246 } else {
247 /* XXXX probably we should divide among the active
248 * members, not the total members. */
249 share = LIM(g->rate_limit) / g->n_members;
250 if (share < g->min_share)
251 share = g->min_share;
252 }
253 UNLOCK_GROUP(g);
254 CLAMPTO(share);
255 }
256
257 if (max_so_far < 0)
258 max_so_far = 0;
259 return max_so_far;
260}
261
262ev_ssize_t
263_bufferevent_get_read_max(struct bufferevent_private *bev)
264{
265 return _bufferevent_get_rlim_max(bev, 0);
266}
267
268ev_ssize_t
269_bufferevent_get_write_max(struct bufferevent_private *bev)
270{
271 return _bufferevent_get_rlim_max(bev, 1);
272}
273
274int
275_bufferevent_decrement_read_buckets(struct bufferevent_private *bev, ev_ssize_t bytes)
276{
277 /* XXXXX Make sure all users of this function check its return value */
278 int r = 0;
279 /* need to hold lock on bev */
280 if (!bev->rate_limiting)
281 return 0;
282
283 if (bev->rate_limiting->cfg) {
284 bev->rate_limiting->limit.read_limit -= bytes;
285 if (bev->rate_limiting->limit.read_limit <= 0) {
286 bufferevent_suspend_read(&bev->bev, BEV_SUSPEND_BW);
287 if (event_add(&bev->rate_limiting->refill_bucket_event,
288 &bev->rate_limiting->cfg->tick_timeout) < 0)
289 r = -1;
290 } else if (bev->read_suspended & BEV_SUSPEND_BW) {
291 if (!(bev->write_suspended & BEV_SUSPEND_BW))
292 event_del(&bev->rate_limiting->refill_bucket_event);
293 bufferevent_unsuspend_read(&bev->bev, BEV_SUSPEND_BW);
294 }
295 }
296
297 if (bev->rate_limiting->group) {
298 LOCK_GROUP(bev->rate_limiting->group);
299 bev->rate_limiting->group->rate_limit.read_limit -= bytes;
300 bev->rate_limiting->group->total_read += bytes;
301 if (bev->rate_limiting->group->rate_limit.read_limit <= 0) {
302 _bev_group_suspend_reading(bev->rate_limiting->group);
303 } else if (bev->rate_limiting->group->read_suspended) {
304 _bev_group_unsuspend_reading(bev->rate_limiting->group);
305 }
306 UNLOCK_GROUP(bev->rate_limiting->group);
307 }
308
309 return r;
310}
311
312int
313_bufferevent_decrement_write_buckets(struct bufferevent_private *bev, ev_ssize_t bytes)
314{
315 /* XXXXX Make sure all users of this function check its return value */
316 int r = 0;
317 /* need to hold lock */
318 if (!bev->rate_limiting)
319 return 0;
320
321 if (bev->rate_limiting->cfg) {
322 bev->rate_limiting->limit.write_limit -= bytes;
323 if (bev->rate_limiting->limit.write_limit <= 0) {
324 bufferevent_suspend_write(&bev->bev, BEV_SUSPEND_BW);
325 if (event_add(&bev->rate_limiting->refill_bucket_event,
326 &bev->rate_limiting->cfg->tick_timeout) < 0)
327 r = -1;
328 } else if (bev->write_suspended & BEV_SUSPEND_BW) {
329 if (!(bev->read_suspended & BEV_SUSPEND_BW))
330 event_del(&bev->rate_limiting->refill_bucket_event);
331 bufferevent_unsuspend_write(&bev->bev, BEV_SUSPEND_BW);
332 }
333 }
334
335 if (bev->rate_limiting->group) {
336 LOCK_GROUP(bev->rate_limiting->group);
337 bev->rate_limiting->group->rate_limit.write_limit -= bytes;
338 bev->rate_limiting->group->total_written += bytes;
339 if (bev->rate_limiting->group->rate_limit.write_limit <= 0) {
340 _bev_group_suspend_writing(bev->rate_limiting->group);
341 } else if (bev->rate_limiting->group->write_suspended) {
342 _bev_group_unsuspend_writing(bev->rate_limiting->group);
343 }
344 UNLOCK_GROUP(bev->rate_limiting->group);
345 }
346
347 return r;
348}
349
350/** Stop reading on every bufferevent in <b>g</b> */
351static int
352_bev_group_suspend_reading(struct bufferevent_rate_limit_group *g)
353{
354 /* Needs group lock */
355 struct bufferevent_private *bev;
356 g->read_suspended = 1;
357 g->pending_unsuspend_read = 0;
358
359 /* Note that in this loop we call EVLOCK_TRY_LOCK instead of BEV_LOCK,
360 to prevent a deadlock. (Ordinarily, the group lock nests inside
361 the bufferevent locks. If we are unable to lock any individual
362 bufferevent, it will find out later when it looks at its limit
363 and sees that its group is suspended.
364 */
365 TAILQ_FOREACH(bev, &g->members, rate_limiting->next_in_group) {
366 if (EVLOCK_TRY_LOCK(bev->lock)) {
367 bufferevent_suspend_read(&bev->bev,
368 BEV_SUSPEND_BW_GROUP);
369 EVLOCK_UNLOCK(bev->lock, 0);
370 }
371 }
372 return 0;
373}
374
375/** Stop writing on every bufferevent in <b>g</b> */
376static int
377_bev_group_suspend_writing(struct bufferevent_rate_limit_group *g)
378{
379 /* Needs group lock */
380 struct bufferevent_private *bev;
381 g->write_suspended = 1;
382 g->pending_unsuspend_write = 0;
383 TAILQ_FOREACH(bev, &g->members, rate_limiting->next_in_group) {
384 if (EVLOCK_TRY_LOCK(bev->lock)) {
385 bufferevent_suspend_write(&bev->bev,
386 BEV_SUSPEND_BW_GROUP);
387 EVLOCK_UNLOCK(bev->lock, 0);
388 }
389 }
390 return 0;
391}
392
393/** Timer callback invoked on a single bufferevent with one or more exhausted
394 buckets when they are ready to refill. */
395static void
396_bev_refill_callback(evutil_socket_t fd, short what, void *arg)
397{
398 unsigned tick;
399 struct timeval now;
400 struct bufferevent_private *bev = arg;
401 int again = 0;
402 BEV_LOCK(&bev->bev);
403 if (!bev->rate_limiting || !bev->rate_limiting->cfg) {
404 BEV_UNLOCK(&bev->bev);
405 return;
406 }
407
408 /* First, update the bucket */
409 event_base_gettimeofday_cached(bev->bev.ev_base, &now);
410 tick = ev_token_bucket_get_tick(&now,
411 bev->rate_limiting->cfg);
412 ev_token_bucket_update(&bev->rate_limiting->limit,
413 bev->rate_limiting->cfg,
414 tick);
415
416 /* Now unsuspend any read/write operations as appropriate. */
417 if ((bev->read_suspended & BEV_SUSPEND_BW)) {
418 if (bev->rate_limiting->limit.read_limit > 0)
419 bufferevent_unsuspend_read(&bev->bev, BEV_SUSPEND_BW);
420 else
421 again = 1;
422 }
423 if ((bev->write_suspended & BEV_SUSPEND_BW)) {
424 if (bev->rate_limiting->limit.write_limit > 0)
425 bufferevent_unsuspend_write(&bev->bev, BEV_SUSPEND_BW);
426 else
427 again = 1;
428 }
429 if (again) {
430 /* One or more of the buckets may need another refill if they
431 started negative.
432
433 XXXX if we need to be quiet for more ticks, we should
434 maybe figure out what timeout we really want.
435 */
436 /* XXXX Handle event_add failure somehow */
437 event_add(&bev->rate_limiting->refill_bucket_event,
438 &bev->rate_limiting->cfg->tick_timeout);
439 }
440 BEV_UNLOCK(&bev->bev);
441}
442
443/** Helper: grab a random element from a bufferevent group. */
444static struct bufferevent_private *
445_bev_group_random_element(struct bufferevent_rate_limit_group *group)
446{
447 int which;
448 struct bufferevent_private *bev;
449
450 /* requires group lock */
451
452 if (!group->n_members)
453 return NULL;
454
455 EVUTIL_ASSERT(! TAILQ_EMPTY(&group->members));
456
457 which = _evutil_weakrand() % group->n_members;
458
459 bev = TAILQ_FIRST(&group->members);
460 while (which--)
461 bev = TAILQ_NEXT(bev, rate_limiting->next_in_group);
462
463 return bev;
464}
465
466/** Iterate over the elements of a rate-limiting group 'g' with a random
467 starting point, assigning each to the variable 'bev', and executing the
468 block 'block'.
469
470 We do this in a half-baked effort to get fairness among group members.
471 XXX Round-robin or some kind of priority queue would be even more fair.
472 */
473#define FOREACH_RANDOM_ORDER(block) \
474 do { \
475 first = _bev_group_random_element(g); \
476 for (bev = first; bev != TAILQ_END(&g->members); \
477 bev = TAILQ_NEXT(bev, rate_limiting->next_in_group)) { \
478 block ; \
479 } \
480 for (bev = TAILQ_FIRST(&g->members); bev && bev != first; \
481 bev = TAILQ_NEXT(bev, rate_limiting->next_in_group)) { \
482 block ; \
483 } \
484 } while (0)
485
486static void
487_bev_group_unsuspend_reading(struct bufferevent_rate_limit_group *g)
488{
489 int again = 0;
490 struct bufferevent_private *bev, *first;
491
492 g->read_suspended = 0;
493 FOREACH_RANDOM_ORDER({
494 if (EVLOCK_TRY_LOCK(bev->lock)) {
495 bufferevent_unsuspend_read(&bev->bev,
496 BEV_SUSPEND_BW_GROUP);
497 EVLOCK_UNLOCK(bev->lock, 0);
498 } else {
499 again = 1;
500 }
501 });
502 g->pending_unsuspend_read = again;
503}
504
505static void
506_bev_group_unsuspend_writing(struct bufferevent_rate_limit_group *g)
507{
508 int again = 0;
509 struct bufferevent_private *bev, *first;
510 g->write_suspended = 0;
511
512 FOREACH_RANDOM_ORDER({
513 if (EVLOCK_TRY_LOCK(bev->lock)) {
514 bufferevent_unsuspend_write(&bev->bev,
515 BEV_SUSPEND_BW_GROUP);
516 EVLOCK_UNLOCK(bev->lock, 0);
517 } else {
518 again = 1;
519 }
520 });
521 g->pending_unsuspend_write = again;
522}
523
524/** Callback invoked every tick to add more elements to the group bucket
525 and unsuspend group members as needed.
526 */
527static void
528_bev_group_refill_callback(evutil_socket_t fd, short what, void *arg)
529{
530 struct bufferevent_rate_limit_group *g = arg;
531 unsigned tick;
532 struct timeval now;
533
534 event_base_gettimeofday_cached(event_get_base(&g->master_refill_event), &now);
535
536 LOCK_GROUP(g);
537
538 tick = ev_token_bucket_get_tick(&now, &g->rate_limit_cfg);
539 ev_token_bucket_update(&g->rate_limit, &g->rate_limit_cfg, tick);
540
541 if (g->pending_unsuspend_read ||
542 (g->read_suspended && (g->rate_limit.read_limit >= g->min_share))) {
543 _bev_group_unsuspend_reading(g);
544 }
545 if (g->pending_unsuspend_write ||
546 (g->write_suspended && (g->rate_limit.write_limit >= g->min_share))){
547 _bev_group_unsuspend_writing(g);
548 }
549
550 /* XXXX Rather than waiting to the next tick to unsuspend stuff
551 * with pending_unsuspend_write/read, we should do it on the
552 * next iteration of the mainloop.
553 */
554
555 UNLOCK_GROUP(g);
556}
557
558int
559bufferevent_set_rate_limit(struct bufferevent *bev,
560 struct ev_token_bucket_cfg *cfg)
561{
562 struct bufferevent_private *bevp =
563 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
564 int r = -1;
565 struct bufferevent_rate_limit *rlim;
566 struct timeval now;
567 ev_uint32_t tick;
568 int reinit = 0, suspended = 0;
569 /* XXX reference-count cfg */
570
571 BEV_LOCK(bev);
572
573 if (cfg == NULL) {
574 if (bevp->rate_limiting) {
575 rlim = bevp->rate_limiting;
576 rlim->cfg = NULL;
577 bufferevent_unsuspend_read(bev, BEV_SUSPEND_BW);
578 bufferevent_unsuspend_write(bev, BEV_SUSPEND_BW);
579 if (event_initialized(&rlim->refill_bucket_event))
580 event_del(&rlim->refill_bucket_event);
581 }
582 r = 0;
583 goto done;
584 }
585
586 event_base_gettimeofday_cached(bev->ev_base, &now);
587 tick = ev_token_bucket_get_tick(&now, cfg);
588
589 if (bevp->rate_limiting && bevp->rate_limiting->cfg == cfg) {
590 /* no-op */
591 r = 0;
592 goto done;
593 }
594 if (bevp->rate_limiting == NULL) {
595 rlim = mm_calloc(1, sizeof(struct bufferevent_rate_limit));
596 if (!rlim)
597 goto done;
598 bevp->rate_limiting = rlim;
599 } else {
600 rlim = bevp->rate_limiting;
601 }
602 reinit = rlim->cfg != NULL;
603
604 rlim->cfg = cfg;
605 ev_token_bucket_init(&rlim->limit, cfg, tick, reinit);
606
607 if (reinit) {
608 EVUTIL_ASSERT(event_initialized(&rlim->refill_bucket_event));
609 event_del(&rlim->refill_bucket_event);
610 }
611 evtimer_assign(&rlim->refill_bucket_event, bev->ev_base,
612 _bev_refill_callback, bevp);
613
614 if (rlim->limit.read_limit > 0) {
615 bufferevent_unsuspend_read(bev, BEV_SUSPEND_BW);
616 } else {
617 bufferevent_suspend_read(bev, BEV_SUSPEND_BW);
618 suspended=1;
619 }
620 if (rlim->limit.write_limit > 0) {
621 bufferevent_unsuspend_write(bev, BEV_SUSPEND_BW);
622 } else {
623 bufferevent_suspend_write(bev, BEV_SUSPEND_BW);
624 suspended = 1;
625 }
626
627 if (suspended)
628 event_add(&rlim->refill_bucket_event, &cfg->tick_timeout);
629
630 r = 0;
631
632done:
633 BEV_UNLOCK(bev);
634 return r;
635}
636
637struct bufferevent_rate_limit_group *
638bufferevent_rate_limit_group_new(struct event_base *base,
639 const struct ev_token_bucket_cfg *cfg)
640{
641 struct bufferevent_rate_limit_group *g;
642 struct timeval now;
643 ev_uint32_t tick;
644
645 event_base_gettimeofday_cached(base, &now);
646 tick = ev_token_bucket_get_tick(&now, cfg);
647
648 g = mm_calloc(1, sizeof(struct bufferevent_rate_limit_group));
649 if (!g)
650 return NULL;
651 memcpy(&g->rate_limit_cfg, cfg, sizeof(g->rate_limit_cfg));
652 TAILQ_INIT(&g->members);
653
654 ev_token_bucket_init(&g->rate_limit, cfg, tick, 0);
655
656 event_assign(&g->master_refill_event, base, -1, EV_PERSIST,
657 _bev_group_refill_callback, g);
658 /*XXXX handle event_add failure */
659 event_add(&g->master_refill_event, &cfg->tick_timeout);
660
661 EVTHREAD_ALLOC_LOCK(g->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
662
663 bufferevent_rate_limit_group_set_min_share(g, 64);
664
665 return g;
666}
667
668int
669bufferevent_rate_limit_group_set_cfg(
670 struct bufferevent_rate_limit_group *g,
671 const struct ev_token_bucket_cfg *cfg)
672{
673 int same_tick;
674 if (!g || !cfg)
675 return -1;
676
677 LOCK_GROUP(g);
678 same_tick = evutil_timercmp(
679 &g->rate_limit_cfg.tick_timeout, &cfg->tick_timeout, ==);
680 memcpy(&g->rate_limit_cfg, cfg, sizeof(g->rate_limit_cfg));
681
682 if (g->rate_limit.read_limit > (ev_ssize_t)cfg->read_maximum)
683 g->rate_limit.read_limit = cfg->read_maximum;
684 if (g->rate_limit.write_limit > (ev_ssize_t)cfg->write_maximum)
685 g->rate_limit.write_limit = cfg->write_maximum;
686
687 if (!same_tick) {
688 /* This can cause a hiccup in the schedule */
689 event_add(&g->master_refill_event, &cfg->tick_timeout);
690 }
691
692 /* The new limits might force us to adjust min_share differently. */
693 bufferevent_rate_limit_group_set_min_share(g, g->configured_min_share);
694
695 UNLOCK_GROUP(g);
696 return 0;
697}
698
699int
700bufferevent_rate_limit_group_set_min_share(
701 struct bufferevent_rate_limit_group *g,
702 size_t share)
703{
704 if (share > EV_SSIZE_MAX)
705 return -1;
706
707 g->configured_min_share = share;
708
709 /* Can't set share to less than the one-tick maximum. IOW, at steady
710 * state, at least one connection can go per tick. */
711 if (share > g->rate_limit_cfg.read_rate)
712 share = g->rate_limit_cfg.read_rate;
713 if (share > g->rate_limit_cfg.write_rate)
714 share = g->rate_limit_cfg.write_rate;
715
716 g->min_share = share;
717 return 0;
718}
719
720void
721bufferevent_rate_limit_group_free(struct bufferevent_rate_limit_group *g)
722{
723 LOCK_GROUP(g);
724 EVUTIL_ASSERT(0 == g->n_members);
725 event_del(&g->master_refill_event);
726 UNLOCK_GROUP(g);
727 EVTHREAD_FREE_LOCK(g->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
728 mm_free(g);
729}
730
731int
732bufferevent_add_to_rate_limit_group(struct bufferevent *bev,
733 struct bufferevent_rate_limit_group *g)
734{
735 int wsuspend, rsuspend;
736 struct bufferevent_private *bevp =
737 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
738 BEV_LOCK(bev);
739
740 if (!bevp->rate_limiting) {
741 struct bufferevent_rate_limit *rlim;
742 rlim = mm_calloc(1, sizeof(struct bufferevent_rate_limit));
743 if (!rlim) {
744 BEV_UNLOCK(bev);
745 return -1;
746 }
747 evtimer_assign(&rlim->refill_bucket_event, bev->ev_base,
748 _bev_refill_callback, bevp);
749 bevp->rate_limiting = rlim;
750 }
751
752 if (bevp->rate_limiting->group == g) {
753 BEV_UNLOCK(bev);
754 return 0;
755 }
756 if (bevp->rate_limiting->group)
757 bufferevent_remove_from_rate_limit_group(bev);
758
759 LOCK_GROUP(g);
760 bevp->rate_limiting->group = g;
761 ++g->n_members;
762 TAILQ_INSERT_TAIL(&g->members, bevp, rate_limiting->next_in_group);
763
764 rsuspend = g->read_suspended;
765 wsuspend = g->write_suspended;
766
767 UNLOCK_GROUP(g);
768
769 if (rsuspend)
770 bufferevent_suspend_read(bev, BEV_SUSPEND_BW_GROUP);
771 if (wsuspend)
772 bufferevent_suspend_write(bev, BEV_SUSPEND_BW_GROUP);
773
774 BEV_UNLOCK(bev);
775 return 0;
776}
777
778int
779bufferevent_remove_from_rate_limit_group(struct bufferevent *bev)
780{
781 return bufferevent_remove_from_rate_limit_group_internal(bev, 1);
782}
783
784int
785bufferevent_remove_from_rate_limit_group_internal(struct bufferevent *bev,
786 int unsuspend)
787{
788 struct bufferevent_private *bevp =
789 EVUTIL_UPCAST(bev, struct bufferevent_private, bev);
790 BEV_LOCK(bev);
791 if (bevp->rate_limiting && bevp->rate_limiting->group) {
792 struct bufferevent_rate_limit_group *g =
793 bevp->rate_limiting->group;
794 LOCK_GROUP(g);
795 bevp->rate_limiting->group = NULL;
796 --g->n_members;
797 TAILQ_REMOVE(&g->members, bevp, rate_limiting->next_in_group);
798 UNLOCK_GROUP(g);
799 }
800 if (unsuspend) {
801 bufferevent_unsuspend_read(bev, BEV_SUSPEND_BW_GROUP);
802 bufferevent_unsuspend_write(bev, BEV_SUSPEND_BW_GROUP);
803 }
804 BEV_UNLOCK(bev);
805 return 0;
806}
807
808/* ===
809 * API functions to expose rate limits.
810 *
811 * Don't use these from inside Libevent; they're meant to be for use by
812 * the program.
813 * === */
814
815/* Mostly you don't want to use this function from inside libevent;
816 * _bufferevent_get_read_max() is more likely what you want*/
817ev_ssize_t
818bufferevent_get_read_limit(struct bufferevent *bev)
819{
820 ev_ssize_t r;
821 struct bufferevent_private *bevp;
822 BEV_LOCK(bev);
823 bevp = BEV_UPCAST(bev);
824 if (bevp->rate_limiting && bevp->rate_limiting->cfg) {
825 bufferevent_update_buckets(bevp);
826 r = bevp->rate_limiting->limit.read_limit;
827 } else {
828 r = EV_SSIZE_MAX;
829 }
830 BEV_UNLOCK(bev);
831 return r;
832}
833
834/* Mostly you don't want to use this function from inside libevent;
835 * _bufferevent_get_write_max() is more likely what you want*/
836ev_ssize_t
837bufferevent_get_write_limit(struct bufferevent *bev)
838{
839 ev_ssize_t r;
840 struct bufferevent_private *bevp;
841 BEV_LOCK(bev);
842 bevp = BEV_UPCAST(bev);
843 if (bevp->rate_limiting && bevp->rate_limiting->cfg) {
844 bufferevent_update_buckets(bevp);
845 r = bevp->rate_limiting->limit.write_limit;
846 } else {
847 r = EV_SSIZE_MAX;
848 }
849 BEV_UNLOCK(bev);
850 return r;
851}
852
853ev_ssize_t
854bufferevent_get_max_to_read(struct bufferevent *bev)
855{
856 ev_ssize_t r;
857 BEV_LOCK(bev);
858 r = _bufferevent_get_read_max(BEV_UPCAST(bev));
859 BEV_UNLOCK(bev);
860 return r;
861}
862
863ev_ssize_t
864bufferevent_get_max_to_write(struct bufferevent *bev)
865{
866 ev_ssize_t r;
867 BEV_LOCK(bev);
868 r = _bufferevent_get_write_max(BEV_UPCAST(bev));
869 BEV_UNLOCK(bev);
870 return r;
871}
872
873
874/* Mostly you don't want to use this function from inside libevent;
875 * _bufferevent_get_read_max() is more likely what you want*/
876ev_ssize_t
877bufferevent_rate_limit_group_get_read_limit(
878 struct bufferevent_rate_limit_group *grp)
879{
880 ev_ssize_t r;
881 LOCK_GROUP(grp);
882 r = grp->rate_limit.read_limit;
883 UNLOCK_GROUP(grp);
884 return r;
885}
886
887/* Mostly you don't want to use this function from inside libevent;
888 * _bufferevent_get_write_max() is more likely what you want. */
889ev_ssize_t
890bufferevent_rate_limit_group_get_write_limit(
891 struct bufferevent_rate_limit_group *grp)
892{
893 ev_ssize_t r;
894 LOCK_GROUP(grp);
895 r = grp->rate_limit.write_limit;
896 UNLOCK_GROUP(grp);
897 return r;
898}
899
900int
901bufferevent_decrement_read_limit(struct bufferevent *bev, ev_ssize_t decr)
902{
903 int r = 0;
904 ev_ssize_t old_limit, new_limit;
905 struct bufferevent_private *bevp;
906 BEV_LOCK(bev);
907 bevp = BEV_UPCAST(bev);
908 EVUTIL_ASSERT(bevp->rate_limiting && bevp->rate_limiting->cfg);
909 old_limit = bevp->rate_limiting->limit.read_limit;
910
911 new_limit = (bevp->rate_limiting->limit.read_limit -= decr);
912 if (old_limit > 0 && new_limit <= 0) {
913 bufferevent_suspend_read(bev, BEV_SUSPEND_BW);
914 if (event_add(&bevp->rate_limiting->refill_bucket_event,
915 &bevp->rate_limiting->cfg->tick_timeout) < 0)
916 r = -1;
917 } else if (old_limit <= 0 && new_limit > 0) {
918 if (!(bevp->write_suspended & BEV_SUSPEND_BW))
919 event_del(&bevp->rate_limiting->refill_bucket_event);
920 bufferevent_unsuspend_read(bev, BEV_SUSPEND_BW);
921 }
922
923 BEV_UNLOCK(bev);
924 return r;
925}
926
927int
928bufferevent_decrement_write_limit(struct bufferevent *bev, ev_ssize_t decr)
929{
930 /* XXXX this is mostly copy-and-paste from
931 * bufferevent_decrement_read_limit */
932 int r = 0;
933 ev_ssize_t old_limit, new_limit;
934 struct bufferevent_private *bevp;
935 BEV_LOCK(bev);
936 bevp = BEV_UPCAST(bev);
937 EVUTIL_ASSERT(bevp->rate_limiting && bevp->rate_limiting->cfg);
938 old_limit = bevp->rate_limiting->limit.write_limit;
939
940 new_limit = (bevp->rate_limiting->limit.write_limit -= decr);
941 if (old_limit > 0 && new_limit <= 0) {
942 bufferevent_suspend_write(bev, BEV_SUSPEND_BW);
943 if (event_add(&bevp->rate_limiting->refill_bucket_event,
944 &bevp->rate_limiting->cfg->tick_timeout) < 0)
945 r = -1;
946 } else if (old_limit <= 0 && new_limit > 0) {
947 if (!(bevp->read_suspended & BEV_SUSPEND_BW))
948 event_del(&bevp->rate_limiting->refill_bucket_event);
949 bufferevent_unsuspend_write(bev, BEV_SUSPEND_BW);
950 }
951
952 BEV_UNLOCK(bev);
953 return r;
954}
955
956int
957bufferevent_rate_limit_group_decrement_read(
958 struct bufferevent_rate_limit_group *grp, ev_ssize_t decr)
959{
960 int r = 0;
961 ev_ssize_t old_limit, new_limit;
962 LOCK_GROUP(grp);
963 old_limit = grp->rate_limit.read_limit;
964 new_limit = (grp->rate_limit.read_limit -= decr);
965
966 if (old_limit > 0 && new_limit <= 0) {
967 _bev_group_suspend_reading(grp);
968 } else if (old_limit <= 0 && new_limit > 0) {
969 _bev_group_unsuspend_reading(grp);
970 }
971
972 UNLOCK_GROUP(grp);
973 return r;
974}
975
976int
977bufferevent_rate_limit_group_decrement_write(
978 struct bufferevent_rate_limit_group *grp, ev_ssize_t decr)
979{
980 int r = 0;
981 ev_ssize_t old_limit, new_limit;
982 LOCK_GROUP(grp);
983 old_limit = grp->rate_limit.write_limit;
984 new_limit = (grp->rate_limit.write_limit -= decr);
985
986 if (old_limit > 0 && new_limit <= 0) {
987 _bev_group_suspend_writing(grp);
988 } else if (old_limit <= 0 && new_limit > 0) {
989 _bev_group_unsuspend_writing(grp);
990 }
991
992 UNLOCK_GROUP(grp);
993 return r;
994}
995
996void
997bufferevent_rate_limit_group_get_totals(struct bufferevent_rate_limit_group *grp,
998 ev_uint64_t *total_read_out, ev_uint64_t *total_written_out)
999{
1000 EVUTIL_ASSERT(grp != NULL);
1001 if (total_read_out)
1002 *total_read_out = grp->total_read;
1003 if (total_written_out)
1004 *total_written_out = grp->total_written;
1005}
1006
1007void
1008bufferevent_rate_limit_group_reset_totals(struct bufferevent_rate_limit_group *grp)
1009{
1010 grp->total_read = grp->total_written = 0;
1011}