Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. |
| 3 | * Authors: David Chinner and Glauber Costa |
| 4 | * |
| 5 | * Generic LRU infrastructure |
| 6 | */ |
| 7 | #ifndef _LRU_LIST_H |
| 8 | #define _LRU_LIST_H |
| 9 | |
| 10 | #include <linux/list.h> |
Dave Chinner | 3b1d58a | 2013-08-28 10:18:00 +1000 | [diff] [blame] | 11 | #include <linux/nodemask.h> |
Vladimir Davydov | 503c358 | 2015-02-12 14:58:47 -0800 | [diff] [blame] | 12 | #include <linux/shrinker.h> |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 13 | |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 14 | struct mem_cgroup; |
| 15 | |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 16 | /* list_lru_walk_cb has to always return one of those */ |
| 17 | enum lru_status { |
| 18 | LRU_REMOVED, /* item removed from list */ |
Johannes Weiner | 449dd69 | 2014-04-03 14:47:56 -0700 | [diff] [blame] | 19 | LRU_REMOVED_RETRY, /* item removed, but lock has been |
| 20 | dropped and reacquired */ |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 21 | LRU_ROTATE, /* item referenced, give another pass */ |
| 22 | LRU_SKIP, /* item cannot be locked, skip */ |
| 23 | LRU_RETRY, /* item not freeable. May drop the lock |
| 24 | internally, but has to return locked. */ |
| 25 | }; |
| 26 | |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 27 | struct list_lru_one { |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 28 | struct list_head list; |
Vladimir Davydov | 2788cf0 | 2015-02-12 14:59:38 -0800 | [diff] [blame] | 29 | /* may become negative during memcg reparenting */ |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 30 | long nr_items; |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 31 | }; |
| 32 | |
| 33 | struct list_lru_memcg { |
| 34 | /* array of per cgroup lists, indexed by memcg_cache_id */ |
| 35 | struct list_lru_one *lru[0]; |
| 36 | }; |
| 37 | |
| 38 | struct list_lru_node { |
| 39 | /* protects all lists on the node, including per cgroup */ |
| 40 | spinlock_t lock; |
| 41 | /* global list, used for the root cgroup in cgroup aware lrus */ |
| 42 | struct list_lru_one lru; |
Johannes Weiner | 127424c | 2016-01-20 15:02:32 -0800 | [diff] [blame] | 43 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 44 | /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ |
| 45 | struct list_lru_memcg *memcg_lrus; |
| 46 | #endif |
Dave Chinner | 3b1d58a | 2013-08-28 10:18:00 +1000 | [diff] [blame] | 47 | } ____cacheline_aligned_in_smp; |
| 48 | |
| 49 | struct list_lru { |
Glauber Costa | 5ca302c | 2013-08-28 10:18:18 +1000 | [diff] [blame] | 50 | struct list_lru_node *node; |
Johannes Weiner | 127424c | 2016-01-20 15:02:32 -0800 | [diff] [blame] | 51 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) |
Vladimir Davydov | c0a5b56 | 2015-02-12 14:59:07 -0800 | [diff] [blame] | 52 | struct list_head list; |
| 53 | #endif |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 54 | }; |
| 55 | |
Glauber Costa | 5ca302c | 2013-08-28 10:18:18 +1000 | [diff] [blame] | 56 | void list_lru_destroy(struct list_lru *lru); |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 57 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, |
| 58 | struct lock_class_key *key); |
| 59 | |
| 60 | #define list_lru_init(lru) __list_lru_init((lru), false, NULL) |
| 61 | #define list_lru_init_key(lru, key) __list_lru_init((lru), false, (key)) |
| 62 | #define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL) |
| 63 | |
| 64 | int memcg_update_all_list_lrus(int num_memcgs); |
Vladimir Davydov | 2788cf0 | 2015-02-12 14:59:38 -0800 | [diff] [blame] | 65 | void memcg_drain_all_list_lrus(int src_idx, int dst_idx); |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 66 | |
| 67 | /** |
| 68 | * list_lru_add: add an element to the lru list's tail |
| 69 | * @list_lru: the lru pointer |
| 70 | * @item: the item to be added. |
| 71 | * |
| 72 | * If the element is already part of a list, this function returns doing |
| 73 | * nothing. Therefore the caller does not need to keep state about whether or |
| 74 | * not the element already belongs in the list and is allowed to lazy update |
| 75 | * it. Note however that this is valid for *a* list, not *this* list. If |
| 76 | * the caller organize itself in a way that elements can be in more than |
| 77 | * one type of list, it is up to the caller to fully remove the item from |
| 78 | * the previous list (with list_lru_del() for instance) before moving it |
| 79 | * to @list_lru |
| 80 | * |
| 81 | * Return value: true if the list was updated, false otherwise |
| 82 | */ |
| 83 | bool list_lru_add(struct list_lru *lru, struct list_head *item); |
| 84 | |
| 85 | /** |
| 86 | * list_lru_del: delete an element to the lru list |
| 87 | * @list_lru: the lru pointer |
| 88 | * @item: the item to be deleted. |
| 89 | * |
| 90 | * This function works analogously as list_lru_add in terms of list |
| 91 | * manipulation. The comments about an element already pertaining to |
| 92 | * a list are also valid for list_lru_del. |
| 93 | * |
| 94 | * Return value: true if the list was updated, false otherwise |
| 95 | */ |
| 96 | bool list_lru_del(struct list_lru *lru, struct list_head *item); |
| 97 | |
| 98 | /** |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 99 | * list_lru_count_one: return the number of objects currently held by @lru |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 100 | * @lru: the lru pointer. |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 101 | * @nid: the node id to count from. |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 102 | * @memcg: the cgroup to count from. |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 103 | * |
| 104 | * Always return a non-negative number, 0 for empty lists. There is no |
| 105 | * guarantee that the list is not updated while the count is being computed. |
| 106 | * Callers that want such a guarantee need to provide an outer lock. |
| 107 | */ |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 108 | unsigned long list_lru_count_one(struct list_lru *lru, |
| 109 | int nid, struct mem_cgroup *memcg); |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 110 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); |
Vladimir Davydov | 503c358 | 2015-02-12 14:58:47 -0800 | [diff] [blame] | 111 | |
| 112 | static inline unsigned long list_lru_shrink_count(struct list_lru *lru, |
| 113 | struct shrink_control *sc) |
| 114 | { |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 115 | return list_lru_count_one(lru, sc->nid, sc->memcg); |
Vladimir Davydov | 503c358 | 2015-02-12 14:58:47 -0800 | [diff] [blame] | 116 | } |
| 117 | |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 118 | static inline unsigned long list_lru_count(struct list_lru *lru) |
| 119 | { |
| 120 | long count = 0; |
| 121 | int nid; |
| 122 | |
Vladimir Davydov | ff0b67e | 2015-02-12 14:59:04 -0800 | [diff] [blame] | 123 | for_each_node_state(nid, N_NORMAL_MEMORY) |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 124 | count += list_lru_count_node(lru, nid); |
| 125 | |
| 126 | return count; |
| 127 | } |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 128 | |
Vladimir Davydov | 3f97b16 | 2015-02-12 14:59:35 -0800 | [diff] [blame] | 129 | void list_lru_isolate(struct list_lru_one *list, struct list_head *item); |
| 130 | void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, |
| 131 | struct list_head *head); |
| 132 | |
| 133 | typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, |
| 134 | struct list_lru_one *list, spinlock_t *lock, void *cb_arg); |
| 135 | |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 136 | /** |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 137 | * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items. |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 138 | * @lru: the lru pointer. |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 139 | * @nid: the node id to scan from. |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 140 | * @memcg: the cgroup to scan from. |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 141 | * @isolate: callback function that is resposible for deciding what to do with |
| 142 | * the item currently being scanned |
| 143 | * @cb_arg: opaque type that will be passed to @isolate |
| 144 | * @nr_to_walk: how many items to scan. |
| 145 | * |
| 146 | * This function will scan all elements in a particular list_lru, calling the |
| 147 | * @isolate callback for each of those items, along with the current list |
| 148 | * spinlock and a caller-provided opaque. The @isolate callback can choose to |
| 149 | * drop the lock internally, but *must* return with the lock held. The callback |
| 150 | * will return an enum lru_status telling the list_lru infrastructure what to |
| 151 | * do with the object being scanned. |
| 152 | * |
| 153 | * Please note that nr_to_walk does not mean how many objects will be freed, |
| 154 | * just how many objects will be scanned. |
| 155 | * |
| 156 | * Return value: the number of objects effectively removed from the LRU. |
| 157 | */ |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 158 | unsigned long list_lru_walk_one(struct list_lru *lru, |
| 159 | int nid, struct mem_cgroup *memcg, |
| 160 | list_lru_walk_cb isolate, void *cb_arg, |
| 161 | unsigned long *nr_to_walk); |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 162 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, |
| 163 | list_lru_walk_cb isolate, void *cb_arg, |
| 164 | unsigned long *nr_to_walk); |
| 165 | |
| 166 | static inline unsigned long |
Vladimir Davydov | 503c358 | 2015-02-12 14:58:47 -0800 | [diff] [blame] | 167 | list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc, |
| 168 | list_lru_walk_cb isolate, void *cb_arg) |
| 169 | { |
Vladimir Davydov | 60d3fd3 | 2015-02-12 14:59:10 -0800 | [diff] [blame] | 170 | return list_lru_walk_one(lru, sc->nid, sc->memcg, isolate, cb_arg, |
| 171 | &sc->nr_to_scan); |
Vladimir Davydov | 503c358 | 2015-02-12 14:58:47 -0800 | [diff] [blame] | 172 | } |
| 173 | |
| 174 | static inline unsigned long |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 175 | list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, |
| 176 | void *cb_arg, unsigned long nr_to_walk) |
| 177 | { |
| 178 | long isolated = 0; |
| 179 | int nid; |
| 180 | |
Vladimir Davydov | ff0b67e | 2015-02-12 14:59:04 -0800 | [diff] [blame] | 181 | for_each_node_state(nid, N_NORMAL_MEMORY) { |
Glauber Costa | 6a4f496 | 2013-08-28 10:18:02 +1000 | [diff] [blame] | 182 | isolated += list_lru_walk_node(lru, nid, isolate, |
| 183 | cb_arg, &nr_to_walk); |
| 184 | if (nr_to_walk <= 0) |
| 185 | break; |
| 186 | } |
| 187 | return isolated; |
| 188 | } |
Dave Chinner | a38e408 | 2013-08-28 10:17:58 +1000 | [diff] [blame] | 189 | #endif /* _LRU_LIST_H */ |