blob: c3828a45b6715f1c88626404b7bcdad1561423c5 [file] [log] [blame]
Paolo Valente70f28712013-05-09 19:10:02 +02001/*
2 * BFQ: Hierarchical B-WF2Q+ scheduler.
3 *
4 * Based on ideas and code from CFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
11 */
12
13#ifdef CONFIG_CGROUP_BFQIO
14#define for_each_entity(entity) \
15 for (; entity != NULL; entity = entity->parent)
16
17#define for_each_entity_safe(entity, parent) \
18 for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
19
20static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
21 int extract,
22 struct bfq_data *bfqd);
23
24static inline void bfq_update_budget(struct bfq_entity *next_in_service)
25{
26 struct bfq_entity *bfqg_entity;
27 struct bfq_group *bfqg;
28 struct bfq_sched_data *group_sd;
29
30 BUG_ON(next_in_service == NULL);
31
32 group_sd = next_in_service->sched_data;
33
34 bfqg = container_of(group_sd, struct bfq_group, sched_data);
35 /*
36 * bfq_group's my_entity field is not NULL only if the group
37 * is not the root group. We must not touch the root entity
38 * as it must never become an in-service entity.
39 */
40 bfqg_entity = bfqg->my_entity;
41 if (bfqg_entity != NULL)
42 bfqg_entity->budget = next_in_service->budget;
43}
44
45static int bfq_update_next_in_service(struct bfq_sched_data *sd)
46{
47 struct bfq_entity *next_in_service;
48
49 if (sd->in_service_entity != NULL)
50 /* will update/requeue at the end of service */
51 return 0;
52
53 /*
54 * NOTE: this can be improved in many ways, such as returning
55 * 1 (and thus propagating upwards the update) only when the
56 * budget changes, or caching the bfqq that will be scheduled
57 * next from this subtree. By now we worry more about
58 * correctness than about performance...
59 */
60 next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
61 sd->next_in_service = next_in_service;
62
63 if (next_in_service != NULL)
64 bfq_update_budget(next_in_service);
65
66 return 1;
67}
68
69static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
70 struct bfq_entity *entity)
71{
72 BUG_ON(sd->next_in_service != entity);
73}
74#else
75#define for_each_entity(entity) \
76 for (; entity != NULL; entity = NULL)
77
78#define for_each_entity_safe(entity, parent) \
79 for (parent = NULL; entity != NULL; entity = parent)
80
81static inline int bfq_update_next_in_service(struct bfq_sched_data *sd)
82{
83 return 0;
84}
85
86static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
87 struct bfq_entity *entity)
88{
89}
90
91static inline void bfq_update_budget(struct bfq_entity *next_in_service)
92{
93}
94#endif
95
96/*
97 * Shift for timestamp calculations. This actually limits the maximum
98 * service allowed in one timestamp delta (small shift values increase it),
99 * the maximum total weight that can be used for the queues in the system
100 * (big shift values increase it), and the period of virtual time
101 * wraparounds.
102 */
103#define WFQ_SERVICE_SHIFT 22
104
105/**
106 * bfq_gt - compare two timestamps.
107 * @a: first ts.
108 * @b: second ts.
109 *
110 * Return @a > @b, dealing with wrapping correctly.
111 */
112static inline int bfq_gt(u64 a, u64 b)
113{
114 return (s64)(a - b) > 0;
115}
116
117static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
118{
119 struct bfq_queue *bfqq = NULL;
120
121 BUG_ON(entity == NULL);
122
123 if (entity->my_sched_data == NULL)
124 bfqq = container_of(entity, struct bfq_queue, entity);
125
126 return bfqq;
127}
128
129
130/**
131 * bfq_delta - map service into the virtual time domain.
132 * @service: amount of service.
133 * @weight: scale factor (weight of an entity or weight sum).
134 */
135static inline u64 bfq_delta(unsigned long service,
136 unsigned long weight)
137{
138 u64 d = (u64)service << WFQ_SERVICE_SHIFT;
139
140 do_div(d, weight);
141 return d;
142}
143
144/**
145 * bfq_calc_finish - assign the finish time to an entity.
146 * @entity: the entity to act upon.
147 * @service: the service to be charged to the entity.
148 */
149static inline void bfq_calc_finish(struct bfq_entity *entity,
150 unsigned long service)
151{
152 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
153
154 BUG_ON(entity->weight == 0);
155
156 entity->finish = entity->start +
157 bfq_delta(service, entity->weight);
158
159 if (bfqq != NULL) {
160 bfq_log_bfqq(bfqq->bfqd, bfqq,
161 "calc_finish: serv %lu, w %d",
162 service, entity->weight);
163 bfq_log_bfqq(bfqq->bfqd, bfqq,
164 "calc_finish: start %llu, finish %llu, delta %llu",
165 entity->start, entity->finish,
166 bfq_delta(service, entity->weight));
167 }
168}
169
170/**
171 * bfq_entity_of - get an entity from a node.
172 * @node: the node field of the entity.
173 *
174 * Convert a node pointer to the relative entity. This is used only
175 * to simplify the logic of some functions and not as the generic
176 * conversion mechanism because, e.g., in the tree walking functions,
177 * the check for a %NULL value would be redundant.
178 */
179static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
180{
181 struct bfq_entity *entity = NULL;
182
183 if (node != NULL)
184 entity = rb_entry(node, struct bfq_entity, rb_node);
185
186 return entity;
187}
188
189/**
190 * bfq_extract - remove an entity from a tree.
191 * @root: the tree root.
192 * @entity: the entity to remove.
193 */
194static inline void bfq_extract(struct rb_root *root,
195 struct bfq_entity *entity)
196{
197 BUG_ON(entity->tree != root);
198
199 entity->tree = NULL;
200 rb_erase(&entity->rb_node, root);
201}
202
203/**
204 * bfq_idle_extract - extract an entity from the idle tree.
205 * @st: the service tree of the owning @entity.
206 * @entity: the entity being removed.
207 */
208static void bfq_idle_extract(struct bfq_service_tree *st,
209 struct bfq_entity *entity)
210{
211 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
212 struct rb_node *next;
213
214 BUG_ON(entity->tree != &st->idle);
215
216 if (entity == st->first_idle) {
217 next = rb_next(&entity->rb_node);
218 st->first_idle = bfq_entity_of(next);
219 }
220
221 if (entity == st->last_idle) {
222 next = rb_prev(&entity->rb_node);
223 st->last_idle = bfq_entity_of(next);
224 }
225
226 bfq_extract(&st->idle, entity);
227
228 if (bfqq != NULL)
229 list_del(&bfqq->bfqq_list);
230}
231
232/**
233 * bfq_insert - generic tree insertion.
234 * @root: tree root.
235 * @entity: entity to insert.
236 *
237 * This is used for the idle and the active tree, since they are both
238 * ordered by finish time.
239 */
240static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
241{
242 struct bfq_entity *entry;
243 struct rb_node **node = &root->rb_node;
244 struct rb_node *parent = NULL;
245
246 BUG_ON(entity->tree != NULL);
247
248 while (*node != NULL) {
249 parent = *node;
250 entry = rb_entry(parent, struct bfq_entity, rb_node);
251
252 if (bfq_gt(entry->finish, entity->finish))
253 node = &parent->rb_left;
254 else
255 node = &parent->rb_right;
256 }
257
258 rb_link_node(&entity->rb_node, parent, node);
259 rb_insert_color(&entity->rb_node, root);
260
261 entity->tree = root;
262}
263
264/**
265 * bfq_update_min - update the min_start field of a entity.
266 * @entity: the entity to update.
267 * @node: one of its children.
268 *
269 * This function is called when @entity may store an invalid value for
270 * min_start due to updates to the active tree. The function assumes
271 * that the subtree rooted at @node (which may be its left or its right
272 * child) has a valid min_start value.
273 */
274static inline void bfq_update_min(struct bfq_entity *entity,
275 struct rb_node *node)
276{
277 struct bfq_entity *child;
278
279 if (node != NULL) {
280 child = rb_entry(node, struct bfq_entity, rb_node);
281 if (bfq_gt(entity->min_start, child->min_start))
282 entity->min_start = child->min_start;
283 }
284}
285
286/**
287 * bfq_update_active_node - recalculate min_start.
288 * @node: the node to update.
289 *
290 * @node may have changed position or one of its children may have moved,
291 * this function updates its min_start value. The left and right subtrees
292 * are assumed to hold a correct min_start value.
293 */
294static inline void bfq_update_active_node(struct rb_node *node)
295{
296 struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
297
298 entity->min_start = entity->start;
299 bfq_update_min(entity, node->rb_right);
300 bfq_update_min(entity, node->rb_left);
301}
302
303/**
304 * bfq_update_active_tree - update min_start for the whole active tree.
305 * @node: the starting node.
306 *
307 * @node must be the deepest modified node after an update. This function
308 * updates its min_start using the values held by its children, assuming
309 * that they did not change, and then updates all the nodes that may have
310 * changed in the path to the root. The only nodes that may have changed
311 * are the ones in the path or their siblings.
312 */
313static void bfq_update_active_tree(struct rb_node *node)
314{
315 struct rb_node *parent;
316
317up:
318 bfq_update_active_node(node);
319
320 parent = rb_parent(node);
321 if (parent == NULL)
322 return;
323
324 if (node == parent->rb_left && parent->rb_right != NULL)
325 bfq_update_active_node(parent->rb_right);
326 else if (parent->rb_left != NULL)
327 bfq_update_active_node(parent->rb_left);
328
329 node = parent;
330 goto up;
331}
332
333static void bfq_weights_tree_add(struct bfq_data *bfqd,
334 struct bfq_entity *entity,
335 struct rb_root *root);
336
337static void bfq_weights_tree_remove(struct bfq_data *bfqd,
338 struct bfq_entity *entity,
339 struct rb_root *root);
340
341
342/**
343 * bfq_active_insert - insert an entity in the active tree of its
344 * group/device.
345 * @st: the service tree of the entity.
346 * @entity: the entity being inserted.
347 *
348 * The active tree is ordered by finish time, but an extra key is kept
349 * per each node, containing the minimum value for the start times of
350 * its children (and the node itself), so it's possible to search for
351 * the eligible node with the lowest finish time in logarithmic time.
352 */
353static void bfq_active_insert(struct bfq_service_tree *st,
354 struct bfq_entity *entity)
355{
356 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
357 struct rb_node *node = &entity->rb_node;
358#ifdef CONFIG_CGROUP_BFQIO
359 struct bfq_sched_data *sd = NULL;
360 struct bfq_group *bfqg = NULL;
361 struct bfq_data *bfqd = NULL;
362#endif
363
364 bfq_insert(&st->active, entity);
365
366 if (node->rb_left != NULL)
367 node = node->rb_left;
368 else if (node->rb_right != NULL)
369 node = node->rb_right;
370
371 bfq_update_active_tree(node);
372
373#ifdef CONFIG_CGROUP_BFQIO
374 sd = entity->sched_data;
375 bfqg = container_of(sd, struct bfq_group, sched_data);
376 BUG_ON(!bfqg);
377 bfqd = (struct bfq_data *)bfqg->bfqd;
378#endif
379 if (bfqq != NULL)
380 list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
381#ifdef CONFIG_CGROUP_BFQIO
382 else { /* bfq_group */
383 BUG_ON(!bfqd);
384 bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
385 }
386 if (bfqg != bfqd->root_group) {
387 BUG_ON(!bfqg);
388 BUG_ON(!bfqd);
389 bfqg->active_entities++;
390 if (bfqg->active_entities == 2)
391 bfqd->active_numerous_groups++;
392 }
393#endif
394}
395
396/**
397 * bfq_ioprio_to_weight - calc a weight from an ioprio.
398 * @ioprio: the ioprio value to convert.
399 */
400static inline unsigned short bfq_ioprio_to_weight(int ioprio)
401{
402 BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
403 return IOPRIO_BE_NR - ioprio;
404}
405
406/**
407 * bfq_weight_to_ioprio - calc an ioprio from a weight.
408 * @weight: the weight value to convert.
409 *
410 * To preserve as mush as possible the old only-ioprio user interface,
411 * 0 is used as an escape ioprio value for weights (numerically) equal or
412 * larger than IOPRIO_BE_NR
413 */
414static inline unsigned short bfq_weight_to_ioprio(int weight)
415{
416 BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
417 return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
418}
419
420static inline void bfq_get_entity(struct bfq_entity *entity)
421{
422 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
423
424 if (bfqq != NULL) {
425 atomic_inc(&bfqq->ref);
426 bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
427 bfqq, atomic_read(&bfqq->ref));
428 }
429}
430
431/**
432 * bfq_find_deepest - find the deepest node that an extraction can modify.
433 * @node: the node being removed.
434 *
435 * Do the first step of an extraction in an rb tree, looking for the
436 * node that will replace @node, and returning the deepest node that
437 * the following modifications to the tree can touch. If @node is the
438 * last node in the tree return %NULL.
439 */
440static struct rb_node *bfq_find_deepest(struct rb_node *node)
441{
442 struct rb_node *deepest;
443
444 if (node->rb_right == NULL && node->rb_left == NULL)
445 deepest = rb_parent(node);
446 else if (node->rb_right == NULL)
447 deepest = node->rb_left;
448 else if (node->rb_left == NULL)
449 deepest = node->rb_right;
450 else {
451 deepest = rb_next(node);
452 if (deepest->rb_right != NULL)
453 deepest = deepest->rb_right;
454 else if (rb_parent(deepest) != node)
455 deepest = rb_parent(deepest);
456 }
457
458 return deepest;
459}
460
461/**
462 * bfq_active_extract - remove an entity from the active tree.
463 * @st: the service_tree containing the tree.
464 * @entity: the entity being removed.
465 */
466static void bfq_active_extract(struct bfq_service_tree *st,
467 struct bfq_entity *entity)
468{
469 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
470 struct rb_node *node;
471#ifdef CONFIG_CGROUP_BFQIO
472 struct bfq_sched_data *sd = NULL;
473 struct bfq_group *bfqg = NULL;
474 struct bfq_data *bfqd = NULL;
475#endif
476
477 node = bfq_find_deepest(&entity->rb_node);
478 bfq_extract(&st->active, entity);
479
480 if (node != NULL)
481 bfq_update_active_tree(node);
482
483#ifdef CONFIG_CGROUP_BFQIO
484 sd = entity->sched_data;
485 bfqg = container_of(sd, struct bfq_group, sched_data);
486 BUG_ON(!bfqg);
487 bfqd = (struct bfq_data *)bfqg->bfqd;
488#endif
489 if (bfqq != NULL)
490 list_del(&bfqq->bfqq_list);
491#ifdef CONFIG_CGROUP_BFQIO
492 else { /* bfq_group */
493 BUG_ON(!bfqd);
494 bfq_weights_tree_remove(bfqd, entity,
495 &bfqd->group_weights_tree);
496 }
497 if (bfqg != bfqd->root_group) {
498 BUG_ON(!bfqg);
499 BUG_ON(!bfqd);
500 BUG_ON(!bfqg->active_entities);
501 bfqg->active_entities--;
502 if (bfqg->active_entities == 1) {
503 BUG_ON(!bfqd->active_numerous_groups);
504 bfqd->active_numerous_groups--;
505 }
506 }
507#endif
508}
509
510/**
511 * bfq_idle_insert - insert an entity into the idle tree.
512 * @st: the service tree containing the tree.
513 * @entity: the entity to insert.
514 */
515static void bfq_idle_insert(struct bfq_service_tree *st,
516 struct bfq_entity *entity)
517{
518 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
519 struct bfq_entity *first_idle = st->first_idle;
520 struct bfq_entity *last_idle = st->last_idle;
521
522 if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
523 st->first_idle = entity;
524 if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
525 st->last_idle = entity;
526
527 bfq_insert(&st->idle, entity);
528
529 if (bfqq != NULL)
530 list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
531}
532
533/**
534 * bfq_forget_entity - remove an entity from the wfq trees.
535 * @st: the service tree.
536 * @entity: the entity being removed.
537 *
538 * Update the device status and forget everything about @entity, putting
539 * the device reference to it, if it is a queue. Entities belonging to
540 * groups are not refcounted.
541 */
542static void bfq_forget_entity(struct bfq_service_tree *st,
543 struct bfq_entity *entity)
544{
545 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
546 struct bfq_sched_data *sd;
547
548 BUG_ON(!entity->on_st);
549
550 entity->on_st = 0;
551 st->wsum -= entity->weight;
552 if (bfqq != NULL) {
553 sd = entity->sched_data;
554 bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
555 bfqq, atomic_read(&bfqq->ref));
556 bfq_put_queue(bfqq);
557 }
558}
559
560/**
561 * bfq_put_idle_entity - release the idle tree ref of an entity.
562 * @st: service tree for the entity.
563 * @entity: the entity being released.
564 */
565static void bfq_put_idle_entity(struct bfq_service_tree *st,
566 struct bfq_entity *entity)
567{
568 bfq_idle_extract(st, entity);
569 bfq_forget_entity(st, entity);
570}
571
572/**
573 * bfq_forget_idle - update the idle tree if necessary.
574 * @st: the service tree to act upon.
575 *
576 * To preserve the global O(log N) complexity we only remove one entry here;
577 * as the idle tree will not grow indefinitely this can be done safely.
578 */
579static void bfq_forget_idle(struct bfq_service_tree *st)
580{
581 struct bfq_entity *first_idle = st->first_idle;
582 struct bfq_entity *last_idle = st->last_idle;
583
584 if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
585 !bfq_gt(last_idle->finish, st->vtime)) {
586 /*
587 * Forget the whole idle tree, increasing the vtime past
588 * the last finish time of idle entities.
589 */
590 st->vtime = last_idle->finish;
591 }
592
593 if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
594 bfq_put_idle_entity(st, first_idle);
595}
596
597static struct bfq_service_tree *
598__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
599 struct bfq_entity *entity)
600{
601 struct bfq_service_tree *new_st = old_st;
602
603 if (entity->ioprio_changed) {
604 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
605 unsigned short prev_weight, new_weight;
606 struct bfq_data *bfqd = NULL;
607 struct rb_root *root;
608#ifdef CONFIG_CGROUP_BFQIO
609 struct bfq_sched_data *sd;
610 struct bfq_group *bfqg;
611#endif
612
613 if (bfqq != NULL)
614 bfqd = bfqq->bfqd;
615#ifdef CONFIG_CGROUP_BFQIO
616 else {
617 sd = entity->my_sched_data;
618 bfqg = container_of(sd, struct bfq_group, sched_data);
619 BUG_ON(!bfqg);
620 bfqd = (struct bfq_data *)bfqg->bfqd;
621 BUG_ON(!bfqd);
622 }
623#endif
624
625 BUG_ON(old_st->wsum < entity->weight);
626 old_st->wsum -= entity->weight;
627
628 if (entity->new_weight != entity->orig_weight) {
629 if (entity->new_weight < BFQ_MIN_WEIGHT ||
630 entity->new_weight > BFQ_MAX_WEIGHT) {
631 printk(KERN_CRIT "update_weight_prio: "
632 "new_weight %d\n",
633 entity->new_weight);
634 BUG();
635 }
636 entity->orig_weight = entity->new_weight;
637 entity->ioprio =
638 bfq_weight_to_ioprio(entity->orig_weight);
639 }
640
641 entity->ioprio_class = entity->new_ioprio_class;
642 entity->ioprio_changed = 0;
643
644 /*
645 * NOTE: here we may be changing the weight too early,
646 * this will cause unfairness. The correct approach
647 * would have required additional complexity to defer
648 * weight changes to the proper time instants (i.e.,
649 * when entity->finish <= old_st->vtime).
650 */
651 new_st = bfq_entity_service_tree(entity);
652
653 prev_weight = entity->weight;
654 new_weight = entity->orig_weight *
655 (bfqq != NULL ? bfqq->wr_coeff : 1);
656 /*
657 * If the weight of the entity changes, remove the entity
658 * from its old weight counter (if there is a counter
659 * associated with the entity), and add it to the counter
660 * associated with its new weight.
661 */
662 if (prev_weight != new_weight) {
663 root = bfqq ? &bfqd->queue_weights_tree :
664 &bfqd->group_weights_tree;
665 bfq_weights_tree_remove(bfqd, entity, root);
666 }
667 entity->weight = new_weight;
668 /*
669 * Add the entity to its weights tree only if it is
670 * not associated with a weight-raised queue.
671 */
672 if (prev_weight != new_weight &&
673 (bfqq ? bfqq->wr_coeff == 1 : 1))
674 /* If we get here, root has been initialized. */
675 bfq_weights_tree_add(bfqd, entity, root);
676
677 new_st->wsum += entity->weight;
678
679 if (new_st != old_st)
680 entity->start = new_st->vtime;
681 }
682
683 return new_st;
684}
685
686/**
687 * bfq_bfqq_served - update the scheduler status after selection for
688 * service.
689 * @bfqq: the queue being served.
690 * @served: bytes to transfer.
691 *
692 * NOTE: this can be optimized, as the timestamps of upper level entities
693 * are synchronized every time a new bfqq is selected for service. By now,
694 * we keep it to better check consistency.
695 */
696static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
697{
698 struct bfq_entity *entity = &bfqq->entity;
699 struct bfq_service_tree *st;
700
701 for_each_entity(entity) {
702 st = bfq_entity_service_tree(entity);
703
704 entity->service += served;
705 BUG_ON(entity->service > entity->budget);
706 BUG_ON(st->wsum == 0);
707
708 st->vtime += bfq_delta(served, st->wsum);
709 bfq_forget_idle(st);
710 }
711 bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
712}
713
714/**
715 * bfq_bfqq_charge_full_budget - set the service to the entity budget.
716 * @bfqq: the queue that needs a service update.
717 *
718 * When it's not possible to be fair in the service domain, because
719 * a queue is not consuming its budget fast enough (the meaning of
720 * fast depends on the timeout parameter), we charge it a full
721 * budget. In this way we should obtain a sort of time-domain
722 * fairness among all the seeky/slow queues.
723 */
724static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
725{
726 struct bfq_entity *entity = &bfqq->entity;
727
728 bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
729
730 bfq_bfqq_served(bfqq, entity->budget - entity->service);
731}
732
733/**
734 * __bfq_activate_entity - activate an entity.
735 * @entity: the entity being activated.
736 *
737 * Called whenever an entity is activated, i.e., it is not active and one
738 * of its children receives a new request, or has to be reactivated due to
739 * budget exhaustion. It uses the current budget of the entity (and the
740 * service received if @entity is active) of the queue to calculate its
741 * timestamps.
742 */
743static void __bfq_activate_entity(struct bfq_entity *entity)
744{
745 struct bfq_sched_data *sd = entity->sched_data;
746 struct bfq_service_tree *st = bfq_entity_service_tree(entity);
747
748 if (entity == sd->in_service_entity) {
749 BUG_ON(entity->tree != NULL);
750 /*
751 * If we are requeueing the current entity we have
752 * to take care of not charging to it service it has
753 * not received.
754 */
755 bfq_calc_finish(entity, entity->service);
756 entity->start = entity->finish;
757 sd->in_service_entity = NULL;
758 } else if (entity->tree == &st->active) {
759 /*
760 * Requeueing an entity due to a change of some
761 * next_in_service entity below it. We reuse the
762 * old start time.
763 */
764 bfq_active_extract(st, entity);
765 } else if (entity->tree == &st->idle) {
766 /*
767 * Must be on the idle tree, bfq_idle_extract() will
768 * check for that.
769 */
770 bfq_idle_extract(st, entity);
771 entity->start = bfq_gt(st->vtime, entity->finish) ?
772 st->vtime : entity->finish;
773 } else {
774 /*
775 * The finish time of the entity may be invalid, and
776 * it is in the past for sure, otherwise the queue
777 * would have been on the idle tree.
778 */
779 entity->start = st->vtime;
780 st->wsum += entity->weight;
781 bfq_get_entity(entity);
782
783 BUG_ON(entity->on_st);
784 entity->on_st = 1;
785 }
786
787 st = __bfq_entity_update_weight_prio(st, entity);
788 bfq_calc_finish(entity, entity->budget);
789 bfq_active_insert(st, entity);
790}
791
792/**
793 * bfq_activate_entity - activate an entity and its ancestors if necessary.
794 * @entity: the entity to activate.
795 *
796 * Activate @entity and all the entities on the path from it to the root.
797 */
798static void bfq_activate_entity(struct bfq_entity *entity)
799{
800 struct bfq_sched_data *sd;
801
802 for_each_entity(entity) {
803 __bfq_activate_entity(entity);
804
805 sd = entity->sched_data;
806 if (!bfq_update_next_in_service(sd))
807 /*
808 * No need to propagate the activation to the
809 * upper entities, as they will be updated when
810 * the in-service entity is rescheduled.
811 */
812 break;
813 }
814}
815
816/**
817 * __bfq_deactivate_entity - deactivate an entity from its service tree.
818 * @entity: the entity to deactivate.
819 * @requeue: if false, the entity will not be put into the idle tree.
820 *
821 * Deactivate an entity, independently from its previous state. If the
822 * entity was not on a service tree just return, otherwise if it is on
823 * any scheduler tree, extract it from that tree, and if necessary
824 * and if the caller did not specify @requeue, put it on the idle tree.
825 *
826 * Return %1 if the caller should update the entity hierarchy, i.e.,
827 * if the entity was in service or if it was the next_in_service for
828 * its sched_data; return %0 otherwise.
829 */
830static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
831{
832 struct bfq_sched_data *sd = entity->sched_data;
833 struct bfq_service_tree *st = bfq_entity_service_tree(entity);
834 int was_in_service = entity == sd->in_service_entity;
835 int ret = 0;
836
837 if (!entity->on_st)
838 return 0;
839
840 BUG_ON(was_in_service && entity->tree != NULL);
841
842 if (was_in_service) {
843 bfq_calc_finish(entity, entity->service);
844 sd->in_service_entity = NULL;
845 } else if (entity->tree == &st->active)
846 bfq_active_extract(st, entity);
847 else if (entity->tree == &st->idle)
848 bfq_idle_extract(st, entity);
849 else if (entity->tree != NULL)
850 BUG();
851
852 if (was_in_service || sd->next_in_service == entity)
853 ret = bfq_update_next_in_service(sd);
854
855 if (!requeue || !bfq_gt(entity->finish, st->vtime))
856 bfq_forget_entity(st, entity);
857 else
858 bfq_idle_insert(st, entity);
859
860 BUG_ON(sd->in_service_entity == entity);
861 BUG_ON(sd->next_in_service == entity);
862
863 return ret;
864}
865
866/**
867 * bfq_deactivate_entity - deactivate an entity.
868 * @entity: the entity to deactivate.
869 * @requeue: true if the entity can be put on the idle tree
870 */
871static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
872{
873 struct bfq_sched_data *sd;
874 struct bfq_entity *parent;
875
876 for_each_entity_safe(entity, parent) {
877 sd = entity->sched_data;
878
879 if (!__bfq_deactivate_entity(entity, requeue))
880 /*
881 * The parent entity is still backlogged, and
882 * we don't need to update it as it is still
883 * in service.
884 */
885 break;
886
887 if (sd->next_in_service != NULL)
888 /*
889 * The parent entity is still backlogged and
890 * the budgets on the path towards the root
891 * need to be updated.
892 */
893 goto update;
894
895 /*
896 * If we reach there the parent is no more backlogged and
897 * we want to propagate the dequeue upwards.
898 */
899 requeue = 1;
900 }
901
902 return;
903
904update:
905 entity = parent;
906 for_each_entity(entity) {
907 __bfq_activate_entity(entity);
908
909 sd = entity->sched_data;
910 if (!bfq_update_next_in_service(sd))
911 break;
912 }
913}
914
915/**
916 * bfq_update_vtime - update vtime if necessary.
917 * @st: the service tree to act upon.
918 *
919 * If necessary update the service tree vtime to have at least one
920 * eligible entity, skipping to its start time. Assumes that the
921 * active tree of the device is not empty.
922 *
923 * NOTE: this hierarchical implementation updates vtimes quite often,
924 * we may end up with reactivated processes getting timestamps after a
925 * vtime skip done because we needed a ->first_active entity on some
926 * intermediate node.
927 */
928static void bfq_update_vtime(struct bfq_service_tree *st)
929{
930 struct bfq_entity *entry;
931 struct rb_node *node = st->active.rb_node;
932
933 entry = rb_entry(node, struct bfq_entity, rb_node);
934 if (bfq_gt(entry->min_start, st->vtime)) {
935 st->vtime = entry->min_start;
936 bfq_forget_idle(st);
937 }
938}
939
940/**
941 * bfq_first_active_entity - find the eligible entity with
942 * the smallest finish time
943 * @st: the service tree to select from.
944 *
945 * This function searches the first schedulable entity, starting from the
946 * root of the tree and going on the left every time on this side there is
947 * a subtree with at least one eligible (start >= vtime) entity. The path on
948 * the right is followed only if a) the left subtree contains no eligible
949 * entities and b) no eligible entity has been found yet.
950 */
951static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
952{
953 struct bfq_entity *entry, *first = NULL;
954 struct rb_node *node = st->active.rb_node;
955
956 while (node != NULL) {
957 entry = rb_entry(node, struct bfq_entity, rb_node);
958left:
959 if (!bfq_gt(entry->start, st->vtime))
960 first = entry;
961
962 BUG_ON(bfq_gt(entry->min_start, st->vtime));
963
964 if (node->rb_left != NULL) {
965 entry = rb_entry(node->rb_left,
966 struct bfq_entity, rb_node);
967 if (!bfq_gt(entry->min_start, st->vtime)) {
968 node = node->rb_left;
969 goto left;
970 }
971 }
972 if (first != NULL)
973 break;
974 node = node->rb_right;
975 }
976
977 BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
978 return first;
979}
980
981/**
982 * __bfq_lookup_next_entity - return the first eligible entity in @st.
983 * @st: the service tree.
984 *
985 * Update the virtual time in @st and return the first eligible entity
986 * it contains.
987 */
988static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
989 bool force)
990{
991 struct bfq_entity *entity, *new_next_in_service = NULL;
992
993 if (RB_EMPTY_ROOT(&st->active))
994 return NULL;
995
996 bfq_update_vtime(st);
997 entity = bfq_first_active_entity(st);
998 BUG_ON(bfq_gt(entity->start, st->vtime));
999
1000 /*
1001 * If the chosen entity does not match with the sched_data's
1002 * next_in_service and we are forcedly serving the IDLE priority
1003 * class tree, bubble up budget update.
1004 */
1005 if (unlikely(force && entity != entity->sched_data->next_in_service)) {
1006 new_next_in_service = entity;
1007 for_each_entity(new_next_in_service)
1008 bfq_update_budget(new_next_in_service);
1009 }
1010
1011 return entity;
1012}
1013
1014/**
1015 * bfq_lookup_next_entity - return the first eligible entity in @sd.
1016 * @sd: the sched_data.
1017 * @extract: if true the returned entity will be also extracted from @sd.
1018 *
1019 * NOTE: since we cache the next_in_service entity at each level of the
1020 * hierarchy, the complexity of the lookup can be decreased with
1021 * absolutely no effort just returning the cached next_in_service value;
1022 * we prefer to do full lookups to test the consistency of * the data
1023 * structures.
1024 */
1025static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
1026 int extract,
1027 struct bfq_data *bfqd)
1028{
1029 struct bfq_service_tree *st = sd->service_tree;
1030 struct bfq_entity *entity;
1031 int i = 0;
1032
1033 BUG_ON(sd->in_service_entity != NULL);
1034
1035 if (bfqd != NULL &&
1036 jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
1037 entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
1038 true);
1039 if (entity != NULL) {
1040 i = BFQ_IOPRIO_CLASSES - 1;
1041 bfqd->bfq_class_idle_last_service = jiffies;
1042 sd->next_in_service = entity;
1043 }
1044 }
1045 for (; i < BFQ_IOPRIO_CLASSES; i++) {
1046 entity = __bfq_lookup_next_entity(st + i, false);
1047 if (entity != NULL) {
1048 if (extract) {
Diogo Ferreira8c73c362016-11-23 14:46:50 +00001049 if (sd->next_in_service != entity) {
1050 entity = __bfq_lookup_next_entity(st + i, true);
1051 }
Paolo Valente70f28712013-05-09 19:10:02 +02001052 bfq_check_next_in_service(sd, entity);
1053 bfq_active_extract(st + i, entity);
1054 sd->in_service_entity = entity;
1055 sd->next_in_service = NULL;
1056 }
1057 break;
1058 }
1059 }
1060
1061 return entity;
1062}
1063
1064/*
1065 * Get next queue for service.
1066 */
1067static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
1068{
1069 struct bfq_entity *entity = NULL;
1070 struct bfq_sched_data *sd;
1071 struct bfq_queue *bfqq;
1072
1073 BUG_ON(bfqd->in_service_queue != NULL);
1074
1075 if (bfqd->busy_queues == 0)
1076 return NULL;
1077
1078 sd = &bfqd->root_group->sched_data;
1079 for (; sd != NULL; sd = entity->my_sched_data) {
1080 entity = bfq_lookup_next_entity(sd, 1, bfqd);
1081 BUG_ON(entity == NULL);
1082 entity->service = 0;
1083 }
1084
1085 bfqq = bfq_entity_to_bfqq(entity);
1086 BUG_ON(bfqq == NULL);
1087
1088 return bfqq;
1089}
1090
Paolo Valente70f28712013-05-09 19:10:02 +02001091static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
1092{
1093 if (bfqd->in_service_bic != NULL) {
1094 put_io_context(bfqd->in_service_bic->icq.ioc);
1095 bfqd->in_service_bic = NULL;
1096 }
1097
1098 bfqd->in_service_queue = NULL;
1099 del_timer(&bfqd->idle_slice_timer);
1100}
1101
1102static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1103 int requeue)
1104{
1105 struct bfq_entity *entity = &bfqq->entity;
1106
1107 if (bfqq == bfqd->in_service_queue)
1108 __bfq_bfqd_reset_in_service(bfqd);
1109
1110 bfq_deactivate_entity(entity, requeue);
1111}
1112
1113static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
1114{
1115 struct bfq_entity *entity = &bfqq->entity;
1116
1117 bfq_activate_entity(entity);
1118}
1119
1120/*
1121 * Called when the bfqq no longer has requests pending, remove it from
1122 * the service tree.
1123 */
1124static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1125 int requeue)
1126{
1127 BUG_ON(!bfq_bfqq_busy(bfqq));
1128 BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
1129
1130 bfq_log_bfqq(bfqd, bfqq, "del from busy");
1131
1132 bfq_clear_bfqq_busy(bfqq);
1133
1134 BUG_ON(bfqd->busy_queues == 0);
1135 bfqd->busy_queues--;
1136
1137 if (!bfqq->dispatched) {
1138 bfq_weights_tree_remove(bfqd, &bfqq->entity,
1139 &bfqd->queue_weights_tree);
1140 if (!blk_queue_nonrot(bfqd->queue)) {
1141 BUG_ON(!bfqd->busy_in_flight_queues);
1142 bfqd->busy_in_flight_queues--;
1143 if (bfq_bfqq_constantly_seeky(bfqq)) {
1144 BUG_ON(!bfqd->
1145 const_seeky_busy_in_flight_queues);
1146 bfqd->const_seeky_busy_in_flight_queues--;
1147 }
1148 }
1149 }
1150 if (bfqq->wr_coeff > 1)
1151 bfqd->wr_busy_queues--;
1152
1153 bfq_deactivate_bfqq(bfqd, bfqq, requeue);
1154}
1155
1156/*
1157 * Called when an inactive queue receives a new request.
1158 */
1159static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
1160{
1161 BUG_ON(bfq_bfqq_busy(bfqq));
1162 BUG_ON(bfqq == bfqd->in_service_queue);
1163
1164 bfq_log_bfqq(bfqd, bfqq, "add to busy");
1165
1166 bfq_activate_bfqq(bfqd, bfqq);
1167
1168 bfq_mark_bfqq_busy(bfqq);
1169 bfqd->busy_queues++;
1170
1171 if (!bfqq->dispatched) {
1172 if (bfqq->wr_coeff == 1)
1173 bfq_weights_tree_add(bfqd, &bfqq->entity,
1174 &bfqd->queue_weights_tree);
1175 if (!blk_queue_nonrot(bfqd->queue)) {
1176 bfqd->busy_in_flight_queues++;
1177 if (bfq_bfqq_constantly_seeky(bfqq))
1178 bfqd->const_seeky_busy_in_flight_queues++;
1179 }
1180 }
1181 if (bfqq->wr_coeff > 1)
1182 bfqd->wr_busy_queues++;
1183}