Blame - drivers/md/dm-cache-policy-mq.c - kernel/msm-4.9

blob: 13f547a4eeb61f2715845090b8e2e5ce311c73d1 [file] [log] [blame]

Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 Red Hat. All rights reserved.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-cache-policy.h"
				8	#include "dm.h"
				9
				10	#include <linux/hash.h>
				11	#include <linux/module.h>
				12	#include <linux/mutex.h>
				13	#include <linux/slab.h>
				14	#include <linux/vmalloc.h>
				15
				16	#define DM_MSG_PREFIX "cache-policy-mq"
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	17
				18	static struct kmem_cache *mq_entry_cache;
				19
				20	/----------------------------------------------------------------/
				21
				22	static unsigned next_power(unsigned n, unsigned min)
				23	{
				24	return roundup_pow_of_two(max(n, min));
				25	}
				26
				27	/----------------------------------------------------------------/
				28
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	29	/*
				30	* Large, sequential ios are probably better left on the origin device since
				31	* spindles tend to have good bandwidth.
				32	*
				33	* The io_tracker tries to spot when the io is in one of these sequential
				34	* modes.
				35	*
				36	* Two thresholds to switch between random and sequential io mode are defaulting
				37	* as follows and can be adjusted via the constructor and message interfaces.
				38	*/
				39	#define RANDOM_THRESHOLD_DEFAULT 4
				40	#define SEQUENTIAL_THRESHOLD_DEFAULT 512
				41
				42	enum io_pattern {
				43	PATTERN_SEQUENTIAL,
				44	PATTERN_RANDOM
				45	};
				46
				47	struct io_tracker {
				48	enum io_pattern pattern;
				49
				50	unsigned nr_seq_samples;
				51	unsigned nr_rand_samples;
				52	unsigned thresholds[2];
				53
				54	dm_oblock_t last_end_oblock;
				55	};
				56
				57	static void iot_init(struct io_tracker *t,
				58	int sequential_threshold, int random_threshold)
				59	{
				60	t->pattern = PATTERN_RANDOM;
				61	t->nr_seq_samples = 0;
				62	t->nr_rand_samples = 0;
				63	t->last_end_oblock = 0;
				64	t->thresholds[PATTERN_RANDOM] = random_threshold;
				65	t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold;
				66	}
				67
				68	static enum io_pattern iot_pattern(struct io_tracker *t)
				69	{
				70	return t->pattern;
				71	}
				72
				73	static void iot_update_stats(struct io_tracker t, struct bio bio)
				74	{
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	75	if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	76	t->nr_seq_samples++;
				77	else {
				78	/*
				79	* Just one non-sequential IO is enough to reset the
				80	* counters.
				81	*/
				82	if (t->nr_seq_samples) {
				83	t->nr_seq_samples = 0;
				84	t->nr_rand_samples = 0;
				85	}
				86
				87	t->nr_rand_samples++;
				88	}
				89
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	90	t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	91	}
				92
				93	static void iot_check_for_pattern_switch(struct io_tracker *t)
				94	{
				95	switch (t->pattern) {
				96	case PATTERN_SEQUENTIAL:
				97	if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) {
				98	t->pattern = PATTERN_RANDOM;
				99	t->nr_seq_samples = t->nr_rand_samples = 0;
				100	}
				101	break;
				102
				103	case PATTERN_RANDOM:
				104	if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) {
				105	t->pattern = PATTERN_SEQUENTIAL;
				106	t->nr_seq_samples = t->nr_rand_samples = 0;
				107	}
				108	break;
				109	}
				110	}
				111
				112	static void iot_examine_bio(struct io_tracker t, struct bio bio)
				113	{
				114	iot_update_stats(t, bio);
				115	iot_check_for_pattern_switch(t);
				116	}
				117
				118	/----------------------------------------------------------------/
				119
				120
				121	/*
				122	* This queue is divided up into different levels. Allowing us to push
				123	* entries to the back of any of the levels. Think of it as a partially
				124	* sorted queue.
				125	*/
				126	#define NR_QUEUE_LEVELS 16u
				127
				128	struct queue {
				129	struct list_head qs[NR_QUEUE_LEVELS];
				130	};
				131
				132	static void queue_init(struct queue *q)
				133	{
				134	unsigned i;
				135
				136	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				137	INIT_LIST_HEAD(q->qs + i);
				138	}
				139
				140	/*
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	141	* Checks to see if the queue is empty.
				142	* FIXME: reduce cpu usage.
				143	*/
				144	static bool queue_empty(struct queue *q)
				145	{
				146	unsigned i;
				147
				148	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				149	if (!list_empty(q->qs + i))
				150	return false;
				151
				152	return true;
				153	}
				154
				155	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	156	* Insert an entry to the back of the given level.
				157	*/
				158	static void queue_push(struct queue q, unsigned level, struct list_head elt)
				159	{
				160	list_add_tail(elt, q->qs + level);
				161	}
				162
				163	static void queue_remove(struct list_head *elt)
				164	{
				165	list_del(elt);
				166	}
				167
				168	/*
				169	* Shifts all regions down one level. This has no effect on the order of
				170	* the queue.
				171	*/
				172	static void queue_shift_down(struct queue *q)
				173	{
				174	unsigned level;
				175
				176	for (level = 1; level < NR_QUEUE_LEVELS; level++)
				177	list_splice_init(q->qs + level, q->qs + level - 1);
				178	}
				179
				180	/*
				181	* Gives us the oldest entry of the lowest popoulated level. If the first
				182	* level is emptied then we shift down one level.
				183	*/
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	184	static struct list_head queue_peek(struct queue q)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	185	{
				186	unsigned level;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	187
				188	for (level = 0; level < NR_QUEUE_LEVELS; level++)
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	189	if (!list_empty(q->qs + level))
				190	return q->qs[level].next;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	191
				192	return NULL;
				193	}
				194
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	195	static struct list_head queue_pop(struct queue q)
				196	{
				197	struct list_head *r = queue_peek(q);
				198
				199	if (r) {
				200	list_del(r);
				201
				202	/* have we just emptied the bottom level? */
				203	if (list_empty(q->qs))
				204	queue_shift_down(q);
				205	}
				206
				207	return r;
				208	}
				209
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	210	static struct list_head list_pop(struct list_head lh)
				211	{
				212	struct list_head *r = lh->next;
				213
				214	BUG_ON(!r);
				215	list_del_init(r);
				216
				217	return r;
				218	}
				219
				220	/----------------------------------------------------------------/
				221
				222	/*
				223	* Describes a cache entry. Used in both the cache and the pre_cache.
				224	*/
				225	struct entry {
				226	struct hlist_node hlist;
				227	struct list_head list;
				228	dm_oblock_t oblock;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	229
				230	/*
				231	* FIXME: pack these better
				232	*/
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	233	bool dirty:1;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	234	unsigned hit_count;
				235	unsigned generation;
				236	unsigned tick;
				237	};
				238
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	239	/*
				240	* Rather than storing the cblock in an entry, we allocate all entries in
				241	* an array, and infer the cblock from the entry position.
				242	*
				243	* Free entries are linked together into a list.
				244	*/
				245	struct entry_pool {
				246	struct entry entries, entries_end;
				247	struct list_head free;
				248	unsigned nr_allocated;
				249	};
				250
				251	static int epool_init(struct entry_pool *ep, unsigned nr_entries)
				252	{
				253	unsigned i;
				254
				255	ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
				256	if (!ep->entries)
				257	return -ENOMEM;
				258
				259	ep->entries_end = ep->entries + nr_entries;
				260
				261	INIT_LIST_HEAD(&ep->free);
				262	for (i = 0; i < nr_entries; i++)
				263	list_add(&ep->entries[i].list, &ep->free);
				264
				265	ep->nr_allocated = 0;
				266
				267	return 0;
				268	}
				269
				270	static void epool_exit(struct entry_pool *ep)
				271	{
				272	vfree(ep->entries);
				273	}
				274
				275	static struct entry alloc_entry(struct entry_pool ep)
				276	{
				277	struct entry *e;
				278
				279	if (list_empty(&ep->free))
				280	return NULL;
				281
				282	e = list_entry(list_pop(&ep->free), struct entry, list);
				283	INIT_LIST_HEAD(&e->list);
				284	INIT_HLIST_NODE(&e->hlist);
				285	ep->nr_allocated++;
				286
				287	return e;
				288	}
				289
				290	/*
				291	* This assumes the cblock hasn't already been allocated.
				292	*/
				293	static struct entry alloc_particular_entry(struct entry_pool ep, dm_cblock_t cblock)
				294	{
				295	struct entry *e = ep->entries + from_cblock(cblock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	296
Wei Yongjun	b815805	2013-11-18 13:32:43 -0500	[diff] [blame]	297	list_del_init(&e->list);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	298	INIT_HLIST_NODE(&e->hlist);
				299	ep->nr_allocated++;
				300
				301	return e;
				302	}
				303
				304	static void free_entry(struct entry_pool ep, struct entry e)
				305	{
				306	BUG_ON(!ep->nr_allocated);
				307	ep->nr_allocated--;
				308	INIT_HLIST_NODE(&e->hlist);
				309	list_add(&e->list, &ep->free);
				310	}
				311
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	312	/*
				313	* Returns NULL if the entry is free.
				314	*/
				315	static struct entry epool_find(struct entry_pool ep, dm_cblock_t cblock)
				316	{
				317	struct entry *e = ep->entries + from_cblock(cblock);
Mike Snitzer	7b6b2bc	2013-11-12 12:17:43 -0500	[diff] [blame]	318	return !hlist_unhashed(&e->hlist) ? e : NULL;
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	319	}
				320
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	321	static bool epool_empty(struct entry_pool *ep)
				322	{
				323	return list_empty(&ep->free);
				324	}
				325
				326	static bool in_pool(struct entry_pool ep, struct entry e)
				327	{
				328	return e >= ep->entries && e < ep->entries_end;
				329	}
				330
				331	static dm_cblock_t infer_cblock(struct entry_pool ep, struct entry e)
				332	{
				333	return to_cblock(e - ep->entries);
				334	}
				335
				336	/----------------------------------------------------------------/
				337
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	338	struct mq_policy {
				339	struct dm_cache_policy policy;
				340
				341	/* protects everything */
				342	struct mutex lock;
				343	dm_cblock_t cache_size;
				344	struct io_tracker tracker;
				345
				346	/*
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	347	* Entries come from two pools, one of pre-cache entries, and one
				348	* for the cache proper.
				349	*/
				350	struct entry_pool pre_cache_pool;
				351	struct entry_pool cache_pool;
				352
				353	/*
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	354	* We maintain three queues of entries. The cache proper,
				355	* consisting of a clean and dirty queue, contains the currently
				356	* active mappings. Whereas the pre_cache tracks blocks that
				357	* are being hit frequently and potential candidates for promotion
				358	* to the cache.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	359	*/
				360	struct queue pre_cache;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	361	struct queue cache_clean;
				362	struct queue cache_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	363
				364	/*
				365	* Keeps track of time, incremented by the core. We use this to
				366	* avoid attributing multiple hits within the same tick.
				367	*
				368	* Access to tick_protected should be done with the spin lock held.
				369	* It's copied to tick at the start of the map function (within the
				370	* mutex).
				371	*/
				372	spinlock_t tick_lock;
				373	unsigned tick_protected;
				374	unsigned tick;
				375
				376	/*
				377	* A count of the number of times the map function has been called
				378	* and found an entry in the pre_cache or cache. Currently used to
				379	* calculate the generation.
				380	*/
				381	unsigned hit_count;
				382
				383	/*
				384	* A generation is a longish period that is used to trigger some
				385	* book keeping effects. eg, decrementing hit counts on entries.
				386	* This is needed to allow the cache to evolve as io patterns
				387	* change.
				388	*/
				389	unsigned generation;
				390	unsigned generation_period; /* in lookups (will probably change) */
				391
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	392	unsigned discard_promote_adjustment;
				393	unsigned read_promote_adjustment;
				394	unsigned write_promote_adjustment;
				395
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	396	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	397	* The hash table allows us to quickly find an entry by origin
				398	* block. Both pre_cache and cache entries are in here.
				399	*/
				400	unsigned nr_buckets;
				401	dm_block_t hash_bits;
				402	struct hlist_head *table;
				403	};
				404
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	405	#define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1
				406	#define DEFAULT_READ_PROMOTE_ADJUSTMENT 4
				407	#define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	408	#define DISCOURAGE_DEMOTING_DIRTY_THRESHOLD 128
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	409
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	410	/----------------------------------------------------------------/
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	411
				412	/*
				413	* Simple hash table implementation. Should replace with the standard hash
				414	* table that's making its way upstream.
				415	*/
				416	static void hash_insert(struct mq_policy mq, struct entry e)
				417	{
				418	unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits);
				419
				420	hlist_add_head(&e->hlist, mq->table + h);
				421	}
				422
				423	static struct entry hash_lookup(struct mq_policy mq, dm_oblock_t oblock)
				424	{
				425	unsigned h = hash_64(from_oblock(oblock), mq->hash_bits);
				426	struct hlist_head *bucket = mq->table + h;
				427	struct entry *e;
				428
				429	hlist_for_each_entry(e, bucket, hlist)
				430	if (e->oblock == oblock) {
				431	hlist_del(&e->hlist);
				432	hlist_add_head(&e->hlist, bucket);
				433	return e;
				434	}
				435
				436	return NULL;
				437	}
				438
				439	static void hash_remove(struct entry *e)
				440	{
				441	hlist_del(&e->hlist);
				442	}
				443
				444	/----------------------------------------------------------------/
				445
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	446	static bool any_free_cblocks(struct mq_policy *mq)
				447	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	448	return !epool_empty(&mq->cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	449	}
				450
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	451	static bool any_clean_cblocks(struct mq_policy *mq)
				452	{
				453	return !queue_empty(&mq->cache_clean);
				454	}
				455
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	456	/----------------------------------------------------------------/
				457
				458	/*
				459	* Now we get to the meat of the policy. This section deals with deciding
				460	* when to to add entries to the pre_cache and cache, and move between
				461	* them.
				462	*/
				463
				464	/*
				465	* The queue level is based on the log2 of the hit count.
				466	*/
				467	static unsigned queue_level(struct entry *e)
				468	{
				469	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
				470	}
				471
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	472	static bool in_cache(struct mq_policy mq, struct entry e)
				473	{
				474	return in_pool(&mq->cache_pool, e);
				475	}
				476
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	477	/*
				478	* Inserts the entry into the pre_cache or the cache. Ensures the cache
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	479	* block is marked as allocated if necc. Inserts into the hash table.
				480	* Sets the tick which records when the entry was last moved about.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	481	*/
				482	static void push(struct mq_policy mq, struct entry e)
				483	{
				484	e->tick = mq->tick;
				485	hash_insert(mq, e);
				486
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	487	if (in_cache(mq, e))
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	488	queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
				489	queue_level(e), &e->list);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	490	else
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	491	queue_push(&mq->pre_cache, queue_level(e), &e->list);
				492	}
				493
				494	/*
				495	* Removes an entry from pre_cache or cache. Removes from the hash table.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	496	*/
				497	static void del(struct mq_policy mq, struct entry e)
				498	{
				499	queue_remove(&e->list);
				500	hash_remove(e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	501	}
				502
				503	/*
				504	* Like del, except it removes the first entry in the queue (ie. the least
				505	* recently used).
				506	*/
				507	static struct entry pop(struct mq_policy mq, struct queue *q)
				508	{
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	509	struct entry *e;
				510	struct list_head *h = queue_pop(q);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	511
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	512	if (!h)
				513	return NULL;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	514
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	515	e = container_of(h, struct entry, list);
				516	hash_remove(e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	517
				518	return e;
				519	}
				520
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	521	static struct entry peek(struct queue q)
				522	{
				523	struct list_head *h = queue_peek(q);
				524	return h ? container_of(h, struct entry, list) : NULL;
				525	}
				526
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	527	/*
				528	* Has this entry already been updated?
				529	*/
				530	static bool updated_this_tick(struct mq_policy mq, struct entry e)
				531	{
				532	return mq->tick == e->tick;
				533	}
				534
				535	/*
				536	* The promotion threshold is adjusted every generation. As are the counts
				537	* of the entries.
				538	*
				539	* At the moment the threshold is taken by averaging the hit counts of some
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	540	* of the entries in the cache (the first 20 entries across all levels in
				541	* ascending order, giving preference to the clean entries at each level).
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	542	*
				543	* We can be much cleverer than this though. For example, each promotion
				544	* could bump up the threshold helping to prevent churn. Much more to do
				545	* here.
				546	*/
				547
				548	#define MAX_TO_AVERAGE 20
				549
				550	static void check_generation(struct mq_policy *mq)
				551	{
				552	unsigned total = 0, nr = 0, count = 0, level;
				553	struct list_head *head;
				554	struct entry *e;
				555
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	556	if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	557	mq->hit_count = 0;
				558	mq->generation++;
				559
				560	for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	561	head = mq->cache_clean.qs + level;
				562	list_for_each_entry(e, head, list) {
				563	nr++;
				564	total += e->hit_count;
				565
				566	if (++count >= MAX_TO_AVERAGE)
				567	break;
				568	}
				569
				570	head = mq->cache_dirty.qs + level;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	571	list_for_each_entry(e, head, list) {
				572	nr++;
				573	total += e->hit_count;
				574
				575	if (++count >= MAX_TO_AVERAGE)
				576	break;
				577	}
				578	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	579	}
				580	}
				581
				582	/*
				583	* Whenever we use an entry we bump up it's hit counter, and push it to the
				584	* back to it's current level.
				585	*/
				586	static void requeue_and_update_tick(struct mq_policy mq, struct entry e)
				587	{
				588	if (updated_this_tick(mq, e))
				589	return;
				590
				591	e->hit_count++;
				592	mq->hit_count++;
				593	check_generation(mq);
				594
				595	/* generation adjustment, to stop the counts increasing forever. */
				596	/* FIXME: divide? */
				597	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
				598	e->generation = mq->generation;
				599
				600	del(mq, e);
				601	push(mq, e);
				602	}
				603
				604	/*
				605	* Demote the least recently used entry from the cache to the pre_cache.
				606	* Returns the new cache entry to use, and the old origin block it was
				607	* mapped to.
				608	*
				609	* We drop the hit count on the demoted entry back to 1 to stop it bouncing
				610	* straight back into the cache if it's subsequently hit. There are
				611	* various options here, and more experimentation would be good:
				612	*
				613	* - just forget about the demoted entry completely (ie. don't insert it
				614	into the pre_cache).
				615	* - divide the hit count rather that setting to some hard coded value.
				616	* - set the hit count to a hard coded value other than 1, eg, is it better
				617	* if it goes in at level 2?
				618	*/
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	619	static int demote_cblock(struct mq_policy mq, dm_oblock_t oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	620	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	621	struct entry *demoted = pop(mq, &mq->cache_clean);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	622
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	623	if (!demoted)
				624	/*
				625	* We could get a block from mq->cache_dirty, but that
				626	* would add extra latency to the triggering bio as it
				627	* waits for the writeback. Better to not promote this
				628	* time and hope there's a clean block next time this block
				629	* is hit.
				630	*/
				631	return -ENOSPC;
				632
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	633	*oblock = demoted->oblock;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	634	free_entry(&mq->cache_pool, demoted);
				635
				636	/*
				637	* We used to put the demoted block into the pre-cache, but I think
				638	* it's simpler to just let it work it's way up from zero again.
				639	* Stops blocks flickering in and out of the cache.
				640	*/
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	641
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	642	return 0;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	643	}
				644
				645	/*
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	646	* Entries in the pre_cache whose hit count passes the promotion
				647	* threshold move to the cache proper. Working out the correct
				648	* value for the promotion_threshold is crucial to this policy.
				649	*/
				650	static unsigned promote_threshold(struct mq_policy *mq)
				651	{
				652	struct entry *e;
				653
				654	if (any_free_cblocks(mq))
				655	return 0;
				656
				657	e = peek(&mq->cache_clean);
				658	if (e)
				659	return e->hit_count;
				660
				661	e = peek(&mq->cache_dirty);
				662	if (e)
				663	return e->hit_count + DISCOURAGE_DEMOTING_DIRTY_THRESHOLD;
				664
				665	/* This should never happen */
				666	return 0;
				667	}
				668
				669	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	670	* We modify the basic promotion_threshold depending on the specific io.
				671	*
				672	* If the origin block has been discarded then there's no cost to copy it
				673	* to the cache.
				674	*
				675	* We bias towards reads, since they can be demoted at no cost if they
				676	* haven't been dirtied.
				677	*/
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	678	static unsigned adjusted_promote_threshold(struct mq_policy *mq,
				679	bool discarded_oblock, int data_dir)
				680	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	681	if (data_dir == READ)
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	682	return promote_threshold(mq) + mq->read_promote_adjustment;
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	683
				684	if (discarded_oblock && (any_free_cblocks(mq) \|\| any_clean_cblocks(mq))) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	685	/*
				686	* We don't need to do any copying at all, so give this a
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	687	* very low threshold.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	688	*/
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	689	return mq->discard_promote_adjustment;
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	690	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	691
Joe Thornber	b155aa0	2014-10-22 14:30:58 +0100	[diff] [blame]	692	return promote_threshold(mq) + mq->write_promote_adjustment;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	693	}
				694
				695	static bool should_promote(struct mq_policy mq, struct entry e,
				696	bool discarded_oblock, int data_dir)
				697	{
				698	return e->hit_count >=
				699	adjusted_promote_threshold(mq, discarded_oblock, data_dir);
				700	}
				701
				702	static int cache_entry_found(struct mq_policy *mq,
				703	struct entry *e,
				704	struct policy_result *result)
				705	{
				706	requeue_and_update_tick(mq, e);
				707
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	708	if (in_cache(mq, e)) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	709	result->op = POLICY_HIT;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	710	result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	711	}
				712
				713	return 0;
				714	}
				715
				716	/*
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	717	* Moves an entry from the pre_cache to the cache. The main work is
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	718	* finding which cache block to use.
				719	*/
				720	static int pre_cache_to_cache(struct mq_policy mq, struct entry e,
				721	struct policy_result *result)
				722	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	723	int r;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	724	struct entry *new_e;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	725
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	726	/* Ensure there's a free cblock in the cache */
				727	if (epool_empty(&mq->cache_pool)) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	728	result->op = POLICY_REPLACE;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	729	r = demote_cblock(mq, &result->old_oblock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	730	if (r) {
				731	result->op = POLICY_MISS;
				732	return 0;
				733	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	734	} else
				735	result->op = POLICY_NEW;
				736
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	737	new_e = alloc_entry(&mq->cache_pool);
				738	BUG_ON(!new_e);
				739
				740	new_e->oblock = e->oblock;
				741	new_e->dirty = false;
				742	new_e->hit_count = e->hit_count;
				743	new_e->generation = e->generation;
				744	new_e->tick = e->tick;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	745
				746	del(mq, e);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	747	free_entry(&mq->pre_cache_pool, e);
				748	push(mq, new_e);
				749
				750	result->cblock = infer_cblock(&mq->cache_pool, new_e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	751
				752	return 0;
				753	}
				754
				755	static int pre_cache_entry_found(struct mq_policy mq, struct entry e,
				756	bool can_migrate, bool discarded_oblock,
				757	int data_dir, struct policy_result *result)
				758	{
				759	int r = 0;
				760	bool updated = updated_this_tick(mq, e);
				761
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	762	if ((!discarded_oblock && updated) \|\|
Joe Thornber	af95e7a	2013-11-15 10:51:20 +0000	[diff] [blame]	763	!should_promote(mq, e, discarded_oblock, data_dir)) {
				764	requeue_and_update_tick(mq, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	765	result->op = POLICY_MISS;
Joe Thornber	af95e7a	2013-11-15 10:51:20 +0000	[diff] [blame]	766
				767	} else if (!can_migrate)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	768	r = -EWOULDBLOCK;
Joe Thornber	af95e7a	2013-11-15 10:51:20 +0000	[diff] [blame]	769
				770	else {
				771	requeue_and_update_tick(mq, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	772	r = pre_cache_to_cache(mq, e, result);
Joe Thornber	af95e7a	2013-11-15 10:51:20 +0000	[diff] [blame]	773	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	774
				775	return r;
				776	}
				777
				778	static void insert_in_pre_cache(struct mq_policy *mq,
				779	dm_oblock_t oblock)
				780	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	781	struct entry *e = alloc_entry(&mq->pre_cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	782
				783	if (!e)
				784	/*
				785	* There's no spare entry structure, so we grab the least
				786	* used one from the pre_cache.
				787	*/
				788	e = pop(mq, &mq->pre_cache);
				789
				790	if (unlikely(!e)) {
				791	DMWARN("couldn't pop from pre cache");
				792	return;
				793	}
				794
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	795	e->dirty = false;
				796	e->oblock = oblock;
				797	e->hit_count = 1;
				798	e->generation = mq->generation;
				799	push(mq, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	800	}
				801
				802	static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
				803	struct policy_result *result)
				804	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	805	int r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	806	struct entry *e;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	807
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	808	if (epool_empty(&mq->cache_pool)) {
				809	result->op = POLICY_REPLACE;
				810	r = demote_cblock(mq, &result->old_oblock);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	811	if (unlikely(r)) {
				812	result->op = POLICY_MISS;
				813	insert_in_pre_cache(mq, oblock);
				814	return;
				815	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	816
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	817	/*
				818	* This will always succeed, since we've just demoted.
				819	*/
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	820	e = alloc_entry(&mq->cache_pool);
				821	BUG_ON(!e);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	822
				823	} else {
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	824	e = alloc_entry(&mq->cache_pool);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	825	result->op = POLICY_NEW;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	826	}
				827
				828	e->oblock = oblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	829	e->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	830	e->hit_count = 1;
				831	e->generation = mq->generation;
				832	push(mq, e);
				833
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	834	result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	835	}
				836
				837	static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
				838	bool can_migrate, bool discarded_oblock,
				839	int data_dir, struct policy_result *result)
				840	{
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	841	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	842	if (can_migrate)
				843	insert_in_cache(mq, oblock, result);
				844	else
				845	return -EWOULDBLOCK;
				846	} else {
				847	insert_in_pre_cache(mq, oblock);
				848	result->op = POLICY_MISS;
				849	}
				850
				851	return 0;
				852	}
				853
				854	/*
				855	* Looks the oblock up in the hash table, then decides whether to put in
				856	* pre_cache, or cache etc.
				857	*/
				858	static int map(struct mq_policy *mq, dm_oblock_t oblock,
				859	bool can_migrate, bool discarded_oblock,
				860	int data_dir, struct policy_result *result)
				861	{
				862	int r = 0;
				863	struct entry *e = hash_lookup(mq, oblock);
				864
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	865	if (e && in_cache(mq, e))
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	866	r = cache_entry_found(mq, e, result);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	867
Mike Snitzer	f1afb36	2014-10-30 10:02:01 -0400	[diff] [blame]	868	else if (mq->tracker.thresholds[PATTERN_SEQUENTIAL] &&
				869	iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	870	result->op = POLICY_MISS;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	871
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	872	else if (e)
				873	r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
				874	data_dir, result);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	875
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	876	else
				877	r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
				878	data_dir, result);
				879
				880	if (r == -EWOULDBLOCK)
				881	result->op = POLICY_MISS;
				882
				883	return r;
				884	}
				885
				886	/----------------------------------------------------------------/
				887
				888	/*
				889	* Public interface, via the policy struct. See dm-cache-policy.h for a
				890	* description of these.
				891	*/
				892
				893	static struct mq_policy to_mq_policy(struct dm_cache_policy p)
				894	{
				895	return container_of(p, struct mq_policy, policy);
				896	}
				897
				898	static void mq_destroy(struct dm_cache_policy *p)
				899	{
				900	struct mq_policy *mq = to_mq_policy(p);
				901
Heinz Mauelshagen	14f398c	2014-02-28 12:02:56 -0500	[diff] [blame]	902	vfree(mq->table);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	903	epool_exit(&mq->cache_pool);
				904	epool_exit(&mq->pre_cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	905	kfree(mq);
				906	}
				907
				908	static void copy_tick(struct mq_policy *mq)
				909	{
				910	unsigned long flags;
				911
				912	spin_lock_irqsave(&mq->tick_lock, flags);
				913	mq->tick = mq->tick_protected;
				914	spin_unlock_irqrestore(&mq->tick_lock, flags);
				915	}
				916
				917	static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
				918	bool can_block, bool can_migrate, bool discarded_oblock,
				919	struct bio bio, struct policy_result result)
				920	{
				921	int r;
				922	struct mq_policy *mq = to_mq_policy(p);
				923
				924	result->op = POLICY_MISS;
				925
				926	if (can_block)
				927	mutex_lock(&mq->lock);
				928	else if (!mutex_trylock(&mq->lock))
				929	return -EWOULDBLOCK;
				930
				931	copy_tick(mq);
				932
				933	iot_examine_bio(&mq->tracker, bio);
				934	r = map(mq, oblock, can_migrate, discarded_oblock,
				935	bio_data_dir(bio), result);
				936
				937	mutex_unlock(&mq->lock);
				938
				939	return r;
				940	}
				941
				942	static int mq_lookup(struct dm_cache_policy p, dm_oblock_t oblock, dm_cblock_t cblock)
				943	{
				944	int r;
				945	struct mq_policy *mq = to_mq_policy(p);
				946	struct entry *e;
				947
				948	if (!mutex_trylock(&mq->lock))
				949	return -EWOULDBLOCK;
				950
				951	e = hash_lookup(mq, oblock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	952	if (e && in_cache(mq, e)) {
				953	*cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	954	r = 0;
				955	} else
				956	r = -ENOENT;
				957
				958	mutex_unlock(&mq->lock);
				959
				960	return r;
				961	}
				962
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	963	static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	964	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	965	struct entry *e;
				966
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	967	e = hash_lookup(mq, oblock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	968	BUG_ON(!e \|\| !in_cache(mq, e));
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	969
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	970	del(mq, e);
				971	e->dirty = set;
				972	push(mq, e);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	973	}
				974
				975	static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				976	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	977	struct mq_policy *mq = to_mq_policy(p);
				978
				979	mutex_lock(&mq->lock);
				980	__mq_set_clear_dirty(mq, oblock, true);
				981	mutex_unlock(&mq->lock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	982	}
				983
				984	static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				985	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	986	struct mq_policy *mq = to_mq_policy(p);
				987
				988	mutex_lock(&mq->lock);
				989	__mq_set_clear_dirty(mq, oblock, false);
				990	mutex_unlock(&mq->lock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	991	}
				992
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	993	static int mq_load_mapping(struct dm_cache_policy *p,
				994	dm_oblock_t oblock, dm_cblock_t cblock,
				995	uint32_t hint, bool hint_valid)
				996	{
				997	struct mq_policy *mq = to_mq_policy(p);
				998	struct entry *e;
				999
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1000	e = alloc_particular_entry(&mq->cache_pool, cblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1001	e->oblock = oblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1002	e->dirty = false; /* this gets corrected in a minute */
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1003	e->hit_count = hint_valid ? hint : 1;
				1004	e->generation = mq->generation;
				1005	push(mq, e);
				1006
				1007	return 0;
				1008	}
				1009
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1010	static int mq_save_hints(struct mq_policy mq, struct queue q,
				1011	policy_walk_fn fn, void *context)
				1012	{
				1013	int r;
				1014	unsigned level;
				1015	struct entry *e;
				1016
				1017	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				1018	list_for_each_entry(e, q->qs + level, list) {
				1019	r = fn(context, infer_cblock(&mq->cache_pool, e),
				1020	e->oblock, e->hit_count);
				1021	if (r)
				1022	return r;
				1023	}
				1024
				1025	return 0;
				1026	}
				1027
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1028	static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
				1029	void *context)
				1030	{
				1031	struct mq_policy *mq = to_mq_policy(p);
				1032	int r = 0;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1033
				1034	mutex_lock(&mq->lock);
				1035
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1036	r = mq_save_hints(mq, &mq->cache_clean, fn, context);
				1037	if (!r)
				1038	r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1039
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1040	mutex_unlock(&mq->lock);
				1041
				1042	return r;
				1043	}
				1044
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1045	static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
				1046	{
				1047	struct entry *e;
				1048
				1049	e = hash_lookup(mq, oblock);
				1050	BUG_ON(!e \|\| !in_cache(mq, e));
				1051
				1052	del(mq, e);
				1053	free_entry(&mq->cache_pool, e);
				1054	}
				1055
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1056	static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1057	{
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1058	struct mq_policy *mq = to_mq_policy(p);
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1059
				1060	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1061	__remove_mapping(mq, oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1062	mutex_unlock(&mq->lock);
				1063	}
				1064
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	1065	static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock)
				1066	{
				1067	struct entry *e = epool_find(&mq->cache_pool, cblock);
				1068
				1069	if (!e)
				1070	return -ENODATA;
				1071
				1072	del(mq, e);
				1073	free_entry(&mq->cache_pool, e);
				1074
				1075	return 0;
				1076	}
				1077
				1078	static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
				1079	{
				1080	int r;
				1081	struct mq_policy *mq = to_mq_policy(p);
				1082
				1083	mutex_lock(&mq->lock);
				1084	r = __remove_cblock(mq, cblock);
				1085	mutex_unlock(&mq->lock);
				1086
				1087	return r;
				1088	}
				1089
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1090	static int __mq_writeback_work(struct mq_policy mq, dm_oblock_t oblock,
				1091	dm_cblock_t *cblock)
				1092	{
				1093	struct entry *e = pop(mq, &mq->cache_dirty);
				1094
				1095	if (!e)
				1096	return -ENODATA;
				1097
				1098	*oblock = e->oblock;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1099	*cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1100	e->dirty = false;
				1101	push(mq, e);
				1102
				1103	return 0;
				1104	}
				1105
				1106	static int mq_writeback_work(struct dm_cache_policy p, dm_oblock_t oblock,
				1107	dm_cblock_t *cblock)
				1108	{
				1109	int r;
				1110	struct mq_policy *mq = to_mq_policy(p);
				1111
				1112	mutex_lock(&mq->lock);
				1113	r = __mq_writeback_work(mq, oblock, cblock);
				1114	mutex_unlock(&mq->lock);
				1115
				1116	return r;
				1117	}
				1118
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1119	static void __force_mapping(struct mq_policy *mq,
				1120	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1121	{
				1122	struct entry *e = hash_lookup(mq, current_oblock);
				1123
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1124	if (e && in_cache(mq, e)) {
				1125	del(mq, e);
				1126	e->oblock = new_oblock;
				1127	e->dirty = true;
				1128	push(mq, e);
				1129	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1130	}
				1131
				1132	static void mq_force_mapping(struct dm_cache_policy *p,
				1133	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1134	{
				1135	struct mq_policy *mq = to_mq_policy(p);
				1136
				1137	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1138	__force_mapping(mq, current_oblock, new_oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1139	mutex_unlock(&mq->lock);
				1140	}
				1141
				1142	static dm_cblock_t mq_residency(struct dm_cache_policy *p)
				1143	{
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1144	dm_cblock_t r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1145	struct mq_policy *mq = to_mq_policy(p);
				1146
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1147	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1148	r = to_cblock(mq->cache_pool.nr_allocated);
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1149	mutex_unlock(&mq->lock);
				1150
				1151	return r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1152	}
				1153
				1154	static void mq_tick(struct dm_cache_policy *p)
				1155	{
				1156	struct mq_policy *mq = to_mq_policy(p);
				1157	unsigned long flags;
				1158
				1159	spin_lock_irqsave(&mq->tick_lock, flags);
				1160	mq->tick_protected++;
				1161	spin_unlock_irqrestore(&mq->tick_lock, flags);
				1162	}
				1163
				1164	static int mq_set_config_value(struct dm_cache_policy *p,
				1165	const char key, const char value)
				1166	{
				1167	struct mq_policy *mq = to_mq_policy(p);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1168	unsigned long tmp;
				1169
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1170	if (kstrtoul(value, 10, &tmp))
				1171	return -EINVAL;
				1172
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	1173	if (!strcasecmp(key, "random_threshold")) {
				1174	mq->tracker.thresholds[PATTERN_RANDOM] = tmp;
				1175
				1176	} else if (!strcasecmp(key, "sequential_threshold")) {
				1177	mq->tracker.thresholds[PATTERN_SEQUENTIAL] = tmp;
				1178
				1179	} else if (!strcasecmp(key, "discard_promote_adjustment"))
				1180	mq->discard_promote_adjustment = tmp;
				1181
				1182	else if (!strcasecmp(key, "read_promote_adjustment"))
				1183	mq->read_promote_adjustment = tmp;
				1184
				1185	else if (!strcasecmp(key, "write_promote_adjustment"))
				1186	mq->write_promote_adjustment = tmp;
				1187
				1188	else
				1189	return -EINVAL;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1190
				1191	return 0;
				1192	}
				1193
				1194	static int mq_emit_config_values(struct dm_cache_policy p, char result, unsigned maxlen)
				1195	{
				1196	ssize_t sz = 0;
				1197	struct mq_policy *mq = to_mq_policy(p);
				1198
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	1199	DMEMIT("10 random_threshold %u "
				1200	"sequential_threshold %u "
				1201	"discard_promote_adjustment %u "
				1202	"read_promote_adjustment %u "
				1203	"write_promote_adjustment %u",
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1204	mq->tracker.thresholds[PATTERN_RANDOM],
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	1205	mq->tracker.thresholds[PATTERN_SEQUENTIAL],
				1206	mq->discard_promote_adjustment,
				1207	mq->read_promote_adjustment,
				1208	mq->write_promote_adjustment);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1209
				1210	return 0;
				1211	}
				1212
				1213	/* Init the policy plugin interface function pointers. */
				1214	static void init_policy_functions(struct mq_policy *mq)
				1215	{
				1216	mq->policy.destroy = mq_destroy;
				1217	mq->policy.map = mq_map;
				1218	mq->policy.lookup = mq_lookup;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1219	mq->policy.set_dirty = mq_set_dirty;
				1220	mq->policy.clear_dirty = mq_clear_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1221	mq->policy.load_mapping = mq_load_mapping;
				1222	mq->policy.walk_mappings = mq_walk_mappings;
				1223	mq->policy.remove_mapping = mq_remove_mapping;
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	1224	mq->policy.remove_cblock = mq_remove_cblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1225	mq->policy.writeback_work = mq_writeback_work;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1226	mq->policy.force_mapping = mq_force_mapping;
				1227	mq->policy.residency = mq_residency;
				1228	mq->policy.tick = mq_tick;
				1229	mq->policy.emit_config_values = mq_emit_config_values;
				1230	mq->policy.set_config_value = mq_set_config_value;
				1231	}
				1232
				1233	static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
				1234	sector_t origin_size,
				1235	sector_t cache_block_size)
				1236	{
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1237	struct mq_policy mq = kzalloc(sizeof(mq), GFP_KERNEL);
				1238
				1239	if (!mq)
				1240	return NULL;
				1241
				1242	init_policy_functions(mq);
				1243	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1244	mq->cache_size = cache_size;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1245
				1246	if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
				1247	DMERR("couldn't initialize pool of pre-cache entries");
				1248	goto bad_pre_cache_init;
				1249	}
				1250
				1251	if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
				1252	DMERR("couldn't initialize pool of cache entries");
				1253	goto bad_cache_init;
				1254	}
				1255
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1256	mq->tick_protected = 0;
				1257	mq->tick = 0;
				1258	mq->hit_count = 0;
				1259	mq->generation = 0;
Joe Thornber	78e03d6	2013-12-09 12:53:05 +0000	[diff] [blame]	1260	mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT;
				1261	mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT;
				1262	mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1263	mutex_init(&mq->lock);
				1264	spin_lock_init(&mq->tick_lock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1265
				1266	queue_init(&mq->pre_cache);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1267	queue_init(&mq->cache_clean);
				1268	queue_init(&mq->cache_dirty);
				1269
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1270	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
				1271
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1272	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
				1273	mq->hash_bits = ffs(mq->nr_buckets) - 1;
Heinz Mauelshagen	14f398c	2014-02-28 12:02:56 -0500	[diff] [blame]	1274	mq->table = vzalloc(sizeof(mq->table) mq->nr_buckets);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1275	if (!mq->table)
				1276	goto bad_alloc_table;
				1277
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1278	return &mq->policy;
				1279
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1280	bad_alloc_table:
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1281	epool_exit(&mq->cache_pool);
				1282	bad_cache_init:
				1283	epool_exit(&mq->pre_cache_pool);
				1284	bad_pre_cache_init:
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1285	kfree(mq);
				1286
				1287	return NULL;
				1288	}
				1289
				1290	/----------------------------------------------------------------/
				1291
				1292	static struct dm_cache_policy_type mq_policy_type = {
				1293	.name = "mq",
Mike Snitzer	f1afb36	2014-10-30 10:02:01 -0400	[diff] [blame]	1294	.version = {1, 3, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1295	.hint_size = 4,
				1296	.owner = THIS_MODULE,
				1297	.create = mq_create
				1298	};
				1299
				1300	static struct dm_cache_policy_type default_policy_type = {
				1301	.name = "default",
Mike Snitzer	f1afb36	2014-10-30 10:02:01 -0400	[diff] [blame]	1302	.version = {1, 3, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1303	.hint_size = 4,
				1304	.owner = THIS_MODULE,
Mike Snitzer	2e68c4e	2014-01-15 21:06:55 -0500	[diff] [blame]	1305	.create = mq_create,
				1306	.real = &mq_policy_type
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1307	};
				1308
				1309	static int __init mq_init(void)
				1310	{
				1311	int r;
				1312
				1313	mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry",
				1314	sizeof(struct entry),
				1315	__alignof__(struct entry),
				1316	0, NULL);
				1317	if (!mq_entry_cache)
				1318	goto bad;
				1319
				1320	r = dm_cache_policy_register(&mq_policy_type);
				1321	if (r) {
				1322	DMERR("register failed %d", r);
				1323	goto bad_register_mq;
				1324	}
				1325
				1326	r = dm_cache_policy_register(&default_policy_type);
				1327	if (!r) {
Mike Snitzer	4e7f506	2013-03-20 17:21:27 +0000	[diff] [blame]	1328	DMINFO("version %u.%u.%u loaded",
				1329	mq_policy_type.version[0],
				1330	mq_policy_type.version[1],
				1331	mq_policy_type.version[2]);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1332	return 0;
				1333	}
				1334
				1335	DMERR("register failed (as default) %d", r);
				1336
				1337	dm_cache_policy_unregister(&mq_policy_type);
				1338	bad_register_mq:
				1339	kmem_cache_destroy(mq_entry_cache);
				1340	bad:
				1341	return -ENOMEM;
				1342	}
				1343
				1344	static void __exit mq_exit(void)
				1345	{
				1346	dm_cache_policy_unregister(&mq_policy_type);
				1347	dm_cache_policy_unregister(&default_policy_type);
				1348
				1349	kmem_cache_destroy(mq_entry_cache);
				1350	}
				1351
				1352	module_init(mq_init);
				1353	module_exit(mq_exit);
				1354
				1355	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				1356	MODULE_LICENSE("GPL");
				1357	MODULE_DESCRIPTION("mq cache policy");
				1358
				1359	MODULE_ALIAS("dm-cache-default");