Blame - drivers/md/dm-cache-policy-mq.c - kernel/msm-4.9

blob: 416b7b752a6e0018d4a11800606a8be6b8890e22 [file] [log] [blame]

Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 Red Hat. All rights reserved.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-cache-policy.h"
				8	#include "dm.h"
				9
				10	#include <linux/hash.h>
				11	#include <linux/module.h>
				12	#include <linux/mutex.h>
				13	#include <linux/slab.h>
				14	#include <linux/vmalloc.h>
				15
				16	#define DM_MSG_PREFIX "cache-policy-mq"
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	17
				18	static struct kmem_cache *mq_entry_cache;
				19
				20	/----------------------------------------------------------------/
				21
				22	static unsigned next_power(unsigned n, unsigned min)
				23	{
				24	return roundup_pow_of_two(max(n, min));
				25	}
				26
				27	/----------------------------------------------------------------/
				28
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	29	/*
				30	* Large, sequential ios are probably better left on the origin device since
				31	* spindles tend to have good bandwidth.
				32	*
				33	* The io_tracker tries to spot when the io is in one of these sequential
				34	* modes.
				35	*
				36	* Two thresholds to switch between random and sequential io mode are defaulting
				37	* as follows and can be adjusted via the constructor and message interfaces.
				38	*/
				39	#define RANDOM_THRESHOLD_DEFAULT 4
				40	#define SEQUENTIAL_THRESHOLD_DEFAULT 512
				41
				42	enum io_pattern {
				43	PATTERN_SEQUENTIAL,
				44	PATTERN_RANDOM
				45	};
				46
				47	struct io_tracker {
				48	enum io_pattern pattern;
				49
				50	unsigned nr_seq_samples;
				51	unsigned nr_rand_samples;
				52	unsigned thresholds[2];
				53
				54	dm_oblock_t last_end_oblock;
				55	};
				56
				57	static void iot_init(struct io_tracker *t,
				58	int sequential_threshold, int random_threshold)
				59	{
				60	t->pattern = PATTERN_RANDOM;
				61	t->nr_seq_samples = 0;
				62	t->nr_rand_samples = 0;
				63	t->last_end_oblock = 0;
				64	t->thresholds[PATTERN_RANDOM] = random_threshold;
				65	t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold;
				66	}
				67
				68	static enum io_pattern iot_pattern(struct io_tracker *t)
				69	{
				70	return t->pattern;
				71	}
				72
				73	static void iot_update_stats(struct io_tracker t, struct bio bio)
				74	{
				75	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
				76	t->nr_seq_samples++;
				77	else {
				78	/*
				79	* Just one non-sequential IO is enough to reset the
				80	* counters.
				81	*/
				82	if (t->nr_seq_samples) {
				83	t->nr_seq_samples = 0;
				84	t->nr_rand_samples = 0;
				85	}
				86
				87	t->nr_rand_samples++;
				88	}
				89
				90	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
				91	}
				92
				93	static void iot_check_for_pattern_switch(struct io_tracker *t)
				94	{
				95	switch (t->pattern) {
				96	case PATTERN_SEQUENTIAL:
				97	if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) {
				98	t->pattern = PATTERN_RANDOM;
				99	t->nr_seq_samples = t->nr_rand_samples = 0;
				100	}
				101	break;
				102
				103	case PATTERN_RANDOM:
				104	if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) {
				105	t->pattern = PATTERN_SEQUENTIAL;
				106	t->nr_seq_samples = t->nr_rand_samples = 0;
				107	}
				108	break;
				109	}
				110	}
				111
				112	static void iot_examine_bio(struct io_tracker t, struct bio bio)
				113	{
				114	iot_update_stats(t, bio);
				115	iot_check_for_pattern_switch(t);
				116	}
				117
				118	/----------------------------------------------------------------/
				119
				120
				121	/*
				122	* This queue is divided up into different levels. Allowing us to push
				123	* entries to the back of any of the levels. Think of it as a partially
				124	* sorted queue.
				125	*/
				126	#define NR_QUEUE_LEVELS 16u
				127
				128	struct queue {
				129	struct list_head qs[NR_QUEUE_LEVELS];
				130	};
				131
				132	static void queue_init(struct queue *q)
				133	{
				134	unsigned i;
				135
				136	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				137	INIT_LIST_HEAD(q->qs + i);
				138	}
				139
				140	/*
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	141	* Checks to see if the queue is empty.
				142	* FIXME: reduce cpu usage.
				143	*/
				144	static bool queue_empty(struct queue *q)
				145	{
				146	unsigned i;
				147
				148	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				149	if (!list_empty(q->qs + i))
				150	return false;
				151
				152	return true;
				153	}
				154
				155	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	156	* Insert an entry to the back of the given level.
				157	*/
				158	static void queue_push(struct queue q, unsigned level, struct list_head elt)
				159	{
				160	list_add_tail(elt, q->qs + level);
				161	}
				162
				163	static void queue_remove(struct list_head *elt)
				164	{
				165	list_del(elt);
				166	}
				167
				168	/*
				169	* Shifts all regions down one level. This has no effect on the order of
				170	* the queue.
				171	*/
				172	static void queue_shift_down(struct queue *q)
				173	{
				174	unsigned level;
				175
				176	for (level = 1; level < NR_QUEUE_LEVELS; level++)
				177	list_splice_init(q->qs + level, q->qs + level - 1);
				178	}
				179
				180	/*
				181	* Gives us the oldest entry of the lowest popoulated level. If the first
				182	* level is emptied then we shift down one level.
				183	*/
				184	static struct list_head queue_pop(struct queue q)
				185	{
				186	unsigned level;
				187	struct list_head *r;
				188
				189	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				190	if (!list_empty(q->qs + level)) {
				191	r = q->qs[level].next;
				192	list_del(r);
				193
				194	/* have we just emptied the bottom level? */
				195	if (level == 0 && list_empty(q->qs))
				196	queue_shift_down(q);
				197
				198	return r;
				199	}
				200
				201	return NULL;
				202	}
				203
				204	static struct list_head list_pop(struct list_head lh)
				205	{
				206	struct list_head *r = lh->next;
				207
				208	BUG_ON(!r);
				209	list_del_init(r);
				210
				211	return r;
				212	}
				213
				214	/----------------------------------------------------------------/
				215
				216	/*
				217	* Describes a cache entry. Used in both the cache and the pre_cache.
				218	*/
				219	struct entry {
				220	struct hlist_node hlist;
				221	struct list_head list;
				222	dm_oblock_t oblock;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	223
				224	/*
				225	* FIXME: pack these better
				226	*/
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	227	bool dirty:1;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	228	unsigned hit_count;
				229	unsigned generation;
				230	unsigned tick;
				231	};
				232
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	233	/*
				234	* Rather than storing the cblock in an entry, we allocate all entries in
				235	* an array, and infer the cblock from the entry position.
				236	*
				237	* Free entries are linked together into a list.
				238	*/
				239	struct entry_pool {
				240	struct entry entries, entries_end;
				241	struct list_head free;
				242	unsigned nr_allocated;
				243	};
				244
				245	static int epool_init(struct entry_pool *ep, unsigned nr_entries)
				246	{
				247	unsigned i;
				248
				249	ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
				250	if (!ep->entries)
				251	return -ENOMEM;
				252
				253	ep->entries_end = ep->entries + nr_entries;
				254
				255	INIT_LIST_HEAD(&ep->free);
				256	for (i = 0; i < nr_entries; i++)
				257	list_add(&ep->entries[i].list, &ep->free);
				258
				259	ep->nr_allocated = 0;
				260
				261	return 0;
				262	}
				263
				264	static void epool_exit(struct entry_pool *ep)
				265	{
				266	vfree(ep->entries);
				267	}
				268
				269	static struct entry alloc_entry(struct entry_pool ep)
				270	{
				271	struct entry *e;
				272
				273	if (list_empty(&ep->free))
				274	return NULL;
				275
				276	e = list_entry(list_pop(&ep->free), struct entry, list);
				277	INIT_LIST_HEAD(&e->list);
				278	INIT_HLIST_NODE(&e->hlist);
				279	ep->nr_allocated++;
				280
				281	return e;
				282	}
				283
				284	/*
				285	* This assumes the cblock hasn't already been allocated.
				286	*/
				287	static struct entry alloc_particular_entry(struct entry_pool ep, dm_cblock_t cblock)
				288	{
				289	struct entry *e = ep->entries + from_cblock(cblock);
				290	list_del(&e->list);
				291
				292	INIT_LIST_HEAD(&e->list);
				293	INIT_HLIST_NODE(&e->hlist);
				294	ep->nr_allocated++;
				295
				296	return e;
				297	}
				298
				299	static void free_entry(struct entry_pool ep, struct entry e)
				300	{
				301	BUG_ON(!ep->nr_allocated);
				302	ep->nr_allocated--;
				303	INIT_HLIST_NODE(&e->hlist);
				304	list_add(&e->list, &ep->free);
				305	}
				306
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	307	/*
				308	* Returns NULL if the entry is free.
				309	*/
				310	static struct entry epool_find(struct entry_pool ep, dm_cblock_t cblock)
				311	{
				312	struct entry *e = ep->entries + from_cblock(cblock);
Mike Snitzer	7b6b2bc	2013-11-12 12:17:43 -0500	[diff] [blame]	313	return !hlist_unhashed(&e->hlist) ? e : NULL;
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	314	}
				315
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	316	static bool epool_empty(struct entry_pool *ep)
				317	{
				318	return list_empty(&ep->free);
				319	}
				320
				321	static bool in_pool(struct entry_pool ep, struct entry e)
				322	{
				323	return e >= ep->entries && e < ep->entries_end;
				324	}
				325
				326	static dm_cblock_t infer_cblock(struct entry_pool ep, struct entry e)
				327	{
				328	return to_cblock(e - ep->entries);
				329	}
				330
				331	/----------------------------------------------------------------/
				332
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	333	struct mq_policy {
				334	struct dm_cache_policy policy;
				335
				336	/* protects everything */
				337	struct mutex lock;
				338	dm_cblock_t cache_size;
				339	struct io_tracker tracker;
				340
				341	/*
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	342	* Entries come from two pools, one of pre-cache entries, and one
				343	* for the cache proper.
				344	*/
				345	struct entry_pool pre_cache_pool;
				346	struct entry_pool cache_pool;
				347
				348	/*
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	349	* We maintain three queues of entries. The cache proper,
				350	* consisting of a clean and dirty queue, contains the currently
				351	* active mappings. Whereas the pre_cache tracks blocks that
				352	* are being hit frequently and potential candidates for promotion
				353	* to the cache.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	354	*/
				355	struct queue pre_cache;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	356	struct queue cache_clean;
				357	struct queue cache_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	358
				359	/*
				360	* Keeps track of time, incremented by the core. We use this to
				361	* avoid attributing multiple hits within the same tick.
				362	*
				363	* Access to tick_protected should be done with the spin lock held.
				364	* It's copied to tick at the start of the map function (within the
				365	* mutex).
				366	*/
				367	spinlock_t tick_lock;
				368	unsigned tick_protected;
				369	unsigned tick;
				370
				371	/*
				372	* A count of the number of times the map function has been called
				373	* and found an entry in the pre_cache or cache. Currently used to
				374	* calculate the generation.
				375	*/
				376	unsigned hit_count;
				377
				378	/*
				379	* A generation is a longish period that is used to trigger some
				380	* book keeping effects. eg, decrementing hit counts on entries.
				381	* This is needed to allow the cache to evolve as io patterns
				382	* change.
				383	*/
				384	unsigned generation;
				385	unsigned generation_period; /* in lookups (will probably change) */
				386
				387	/*
				388	* Entries in the pre_cache whose hit count passes the promotion
				389	* threshold move to the cache proper. Working out the correct
				390	* value for the promotion_threshold is crucial to this policy.
				391	*/
				392	unsigned promote_threshold;
				393
				394	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	395	* The hash table allows us to quickly find an entry by origin
				396	* block. Both pre_cache and cache entries are in here.
				397	*/
				398	unsigned nr_buckets;
				399	dm_block_t hash_bits;
				400	struct hlist_head *table;
				401	};
				402
				403	/----------------------------------------------------------------/
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	404
				405	/*
				406	* Simple hash table implementation. Should replace with the standard hash
				407	* table that's making its way upstream.
				408	*/
				409	static void hash_insert(struct mq_policy mq, struct entry e)
				410	{
				411	unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits);
				412
				413	hlist_add_head(&e->hlist, mq->table + h);
				414	}
				415
				416	static struct entry hash_lookup(struct mq_policy mq, dm_oblock_t oblock)
				417	{
				418	unsigned h = hash_64(from_oblock(oblock), mq->hash_bits);
				419	struct hlist_head *bucket = mq->table + h;
				420	struct entry *e;
				421
				422	hlist_for_each_entry(e, bucket, hlist)
				423	if (e->oblock == oblock) {
				424	hlist_del(&e->hlist);
				425	hlist_add_head(&e->hlist, bucket);
				426	return e;
				427	}
				428
				429	return NULL;
				430	}
				431
				432	static void hash_remove(struct entry *e)
				433	{
				434	hlist_del(&e->hlist);
				435	}
				436
				437	/----------------------------------------------------------------/
				438
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	439	static bool any_free_cblocks(struct mq_policy *mq)
				440	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	441	return !epool_empty(&mq->cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	442	}
				443
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	444	static bool any_clean_cblocks(struct mq_policy *mq)
				445	{
				446	return !queue_empty(&mq->cache_clean);
				447	}
				448
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	449	/----------------------------------------------------------------/
				450
				451	/*
				452	* Now we get to the meat of the policy. This section deals with deciding
				453	* when to to add entries to the pre_cache and cache, and move between
				454	* them.
				455	*/
				456
				457	/*
				458	* The queue level is based on the log2 of the hit count.
				459	*/
				460	static unsigned queue_level(struct entry *e)
				461	{
				462	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
				463	}
				464
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	465	static bool in_cache(struct mq_policy mq, struct entry e)
				466	{
				467	return in_pool(&mq->cache_pool, e);
				468	}
				469
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	470	/*
				471	* Inserts the entry into the pre_cache or the cache. Ensures the cache
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	472	* block is marked as allocated if necc. Inserts into the hash table.
				473	* Sets the tick which records when the entry was last moved about.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	474	*/
				475	static void push(struct mq_policy mq, struct entry e)
				476	{
				477	e->tick = mq->tick;
				478	hash_insert(mq, e);
				479
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	480	if (in_cache(mq, e))
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	481	queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
				482	queue_level(e), &e->list);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	483	else
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	484	queue_push(&mq->pre_cache, queue_level(e), &e->list);
				485	}
				486
				487	/*
				488	* Removes an entry from pre_cache or cache. Removes from the hash table.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	489	*/
				490	static void del(struct mq_policy mq, struct entry e)
				491	{
				492	queue_remove(&e->list);
				493	hash_remove(e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	494	}
				495
				496	/*
				497	* Like del, except it removes the first entry in the queue (ie. the least
				498	* recently used).
				499	*/
				500	static struct entry pop(struct mq_policy mq, struct queue *q)
				501	{
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	502	struct entry *e;
				503	struct list_head *h = queue_pop(q);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	504
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	505	if (!h)
				506	return NULL;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	507
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	508	e = container_of(h, struct entry, list);
				509	hash_remove(e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	510
				511	return e;
				512	}
				513
				514	/*
				515	* Has this entry already been updated?
				516	*/
				517	static bool updated_this_tick(struct mq_policy mq, struct entry e)
				518	{
				519	return mq->tick == e->tick;
				520	}
				521
				522	/*
				523	* The promotion threshold is adjusted every generation. As are the counts
				524	* of the entries.
				525	*
				526	* At the moment the threshold is taken by averaging the hit counts of some
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	527	* of the entries in the cache (the first 20 entries across all levels in
				528	* ascending order, giving preference to the clean entries at each level).
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	529	*
				530	* We can be much cleverer than this though. For example, each promotion
				531	* could bump up the threshold helping to prevent churn. Much more to do
				532	* here.
				533	*/
				534
				535	#define MAX_TO_AVERAGE 20
				536
				537	static void check_generation(struct mq_policy *mq)
				538	{
				539	unsigned total = 0, nr = 0, count = 0, level;
				540	struct list_head *head;
				541	struct entry *e;
				542
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	543	if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	544	mq->hit_count = 0;
				545	mq->generation++;
				546
				547	for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	548	head = mq->cache_clean.qs + level;
				549	list_for_each_entry(e, head, list) {
				550	nr++;
				551	total += e->hit_count;
				552
				553	if (++count >= MAX_TO_AVERAGE)
				554	break;
				555	}
				556
				557	head = mq->cache_dirty.qs + level;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	558	list_for_each_entry(e, head, list) {
				559	nr++;
				560	total += e->hit_count;
				561
				562	if (++count >= MAX_TO_AVERAGE)
				563	break;
				564	}
				565	}
				566
				567	mq->promote_threshold = nr ? total / nr : 1;
				568	if (mq->promote_threshold * nr < total)
				569	mq->promote_threshold++;
				570	}
				571	}
				572
				573	/*
				574	* Whenever we use an entry we bump up it's hit counter, and push it to the
				575	* back to it's current level.
				576	*/
				577	static void requeue_and_update_tick(struct mq_policy mq, struct entry e)
				578	{
				579	if (updated_this_tick(mq, e))
				580	return;
				581
				582	e->hit_count++;
				583	mq->hit_count++;
				584	check_generation(mq);
				585
				586	/* generation adjustment, to stop the counts increasing forever. */
				587	/* FIXME: divide? */
				588	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
				589	e->generation = mq->generation;
				590
				591	del(mq, e);
				592	push(mq, e);
				593	}
				594
				595	/*
				596	* Demote the least recently used entry from the cache to the pre_cache.
				597	* Returns the new cache entry to use, and the old origin block it was
				598	* mapped to.
				599	*
				600	* We drop the hit count on the demoted entry back to 1 to stop it bouncing
				601	* straight back into the cache if it's subsequently hit. There are
				602	* various options here, and more experimentation would be good:
				603	*
				604	* - just forget about the demoted entry completely (ie. don't insert it
				605	into the pre_cache).
				606	* - divide the hit count rather that setting to some hard coded value.
				607	* - set the hit count to a hard coded value other than 1, eg, is it better
				608	* if it goes in at level 2?
				609	*/
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	610	static int demote_cblock(struct mq_policy mq, dm_oblock_t oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	611	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	612	struct entry *demoted = pop(mq, &mq->cache_clean);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	613
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	614	if (!demoted)
				615	/*
				616	* We could get a block from mq->cache_dirty, but that
				617	* would add extra latency to the triggering bio as it
				618	* waits for the writeback. Better to not promote this
				619	* time and hope there's a clean block next time this block
				620	* is hit.
				621	*/
				622	return -ENOSPC;
				623
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	624	*oblock = demoted->oblock;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	625	free_entry(&mq->cache_pool, demoted);
				626
				627	/*
				628	* We used to put the demoted block into the pre-cache, but I think
				629	* it's simpler to just let it work it's way up from zero again.
				630	* Stops blocks flickering in and out of the cache.
				631	*/
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	632
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	633	return 0;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	634	}
				635
				636	/*
				637	* We modify the basic promotion_threshold depending on the specific io.
				638	*
				639	* If the origin block has been discarded then there's no cost to copy it
				640	* to the cache.
				641	*
				642	* We bias towards reads, since they can be demoted at no cost if they
				643	* haven't been dirtied.
				644	*/
				645	#define DISCARDED_PROMOTE_THRESHOLD 1
				646	#define READ_PROMOTE_THRESHOLD 4
				647	#define WRITE_PROMOTE_THRESHOLD 8
				648
				649	static unsigned adjusted_promote_threshold(struct mq_policy *mq,
				650	bool discarded_oblock, int data_dir)
				651	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	652	if (data_dir == READ)
				653	return mq->promote_threshold + READ_PROMOTE_THRESHOLD;
				654
				655	if (discarded_oblock && (any_free_cblocks(mq) \|\| any_clean_cblocks(mq))) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	656	/*
				657	* We don't need to do any copying at all, so give this a
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	658	* very low threshold.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	659	*/
				660	return DISCARDED_PROMOTE_THRESHOLD;
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	661	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	662
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	663	return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	664	}
				665
				666	static bool should_promote(struct mq_policy mq, struct entry e,
				667	bool discarded_oblock, int data_dir)
				668	{
				669	return e->hit_count >=
				670	adjusted_promote_threshold(mq, discarded_oblock, data_dir);
				671	}
				672
				673	static int cache_entry_found(struct mq_policy *mq,
				674	struct entry *e,
				675	struct policy_result *result)
				676	{
				677	requeue_and_update_tick(mq, e);
				678
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	679	if (in_cache(mq, e)) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	680	result->op = POLICY_HIT;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	681	result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	682	}
				683
				684	return 0;
				685	}
				686
				687	/*
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	688	* Moves an entry from the pre_cache to the cache. The main work is
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	689	* finding which cache block to use.
				690	*/
				691	static int pre_cache_to_cache(struct mq_policy mq, struct entry e,
				692	struct policy_result *result)
				693	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	694	int r;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	695	struct entry *new_e;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	696
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	697	/* Ensure there's a free cblock in the cache */
				698	if (epool_empty(&mq->cache_pool)) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	699	result->op = POLICY_REPLACE;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	700	r = demote_cblock(mq, &result->old_oblock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	701	if (r) {
				702	result->op = POLICY_MISS;
				703	return 0;
				704	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	705	} else
				706	result->op = POLICY_NEW;
				707
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	708	new_e = alloc_entry(&mq->cache_pool);
				709	BUG_ON(!new_e);
				710
				711	new_e->oblock = e->oblock;
				712	new_e->dirty = false;
				713	new_e->hit_count = e->hit_count;
				714	new_e->generation = e->generation;
				715	new_e->tick = e->tick;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	716
				717	del(mq, e);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	718	free_entry(&mq->pre_cache_pool, e);
				719	push(mq, new_e);
				720
				721	result->cblock = infer_cblock(&mq->cache_pool, new_e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	722
				723	return 0;
				724	}
				725
				726	static int pre_cache_entry_found(struct mq_policy mq, struct entry e,
				727	bool can_migrate, bool discarded_oblock,
				728	int data_dir, struct policy_result *result)
				729	{
				730	int r = 0;
				731	bool updated = updated_this_tick(mq, e);
				732
				733	requeue_and_update_tick(mq, e);
				734
				735	if ((!discarded_oblock && updated) \|\|
				736	!should_promote(mq, e, discarded_oblock, data_dir))
				737	result->op = POLICY_MISS;
				738	else if (!can_migrate)
				739	r = -EWOULDBLOCK;
				740	else
				741	r = pre_cache_to_cache(mq, e, result);
				742
				743	return r;
				744	}
				745
				746	static void insert_in_pre_cache(struct mq_policy *mq,
				747	dm_oblock_t oblock)
				748	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	749	struct entry *e = alloc_entry(&mq->pre_cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	750
				751	if (!e)
				752	/*
				753	* There's no spare entry structure, so we grab the least
				754	* used one from the pre_cache.
				755	*/
				756	e = pop(mq, &mq->pre_cache);
				757
				758	if (unlikely(!e)) {
				759	DMWARN("couldn't pop from pre cache");
				760	return;
				761	}
				762
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	763	e->dirty = false;
				764	e->oblock = oblock;
				765	e->hit_count = 1;
				766	e->generation = mq->generation;
				767	push(mq, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	768	}
				769
				770	static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
				771	struct policy_result *result)
				772	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	773	int r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	774	struct entry *e;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	775
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	776	if (epool_empty(&mq->cache_pool)) {
				777	result->op = POLICY_REPLACE;
				778	r = demote_cblock(mq, &result->old_oblock);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	779	if (unlikely(r)) {
				780	result->op = POLICY_MISS;
				781	insert_in_pre_cache(mq, oblock);
				782	return;
				783	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	784
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	785	/*
				786	* This will always succeed, since we've just demoted.
				787	*/
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	788	e = alloc_entry(&mq->cache_pool);
				789	BUG_ON(!e);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	790
				791	} else {
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	792	e = alloc_entry(&mq->cache_pool);
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame]	793	result->op = POLICY_NEW;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	794	}
				795
				796	e->oblock = oblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	797	e->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	798	e->hit_count = 1;
				799	e->generation = mq->generation;
				800	push(mq, e);
				801
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	802	result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	803	}
				804
				805	static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
				806	bool can_migrate, bool discarded_oblock,
				807	int data_dir, struct policy_result *result)
				808	{
				809	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) {
				810	if (can_migrate)
				811	insert_in_cache(mq, oblock, result);
				812	else
				813	return -EWOULDBLOCK;
				814	} else {
				815	insert_in_pre_cache(mq, oblock);
				816	result->op = POLICY_MISS;
				817	}
				818
				819	return 0;
				820	}
				821
				822	/*
				823	* Looks the oblock up in the hash table, then decides whether to put in
				824	* pre_cache, or cache etc.
				825	*/
				826	static int map(struct mq_policy *mq, dm_oblock_t oblock,
				827	bool can_migrate, bool discarded_oblock,
				828	int data_dir, struct policy_result *result)
				829	{
				830	int r = 0;
				831	struct entry *e = hash_lookup(mq, oblock);
				832
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	833	if (e && in_cache(mq, e))
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	834	r = cache_entry_found(mq, e, result);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	835
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	836	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
				837	result->op = POLICY_MISS;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	838
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	839	else if (e)
				840	r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
				841	data_dir, result);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	842
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	843	else
				844	r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
				845	data_dir, result);
				846
				847	if (r == -EWOULDBLOCK)
				848	result->op = POLICY_MISS;
				849
				850	return r;
				851	}
				852
				853	/----------------------------------------------------------------/
				854
				855	/*
				856	* Public interface, via the policy struct. See dm-cache-policy.h for a
				857	* description of these.
				858	*/
				859
				860	static struct mq_policy to_mq_policy(struct dm_cache_policy p)
				861	{
				862	return container_of(p, struct mq_policy, policy);
				863	}
				864
				865	static void mq_destroy(struct dm_cache_policy *p)
				866	{
				867	struct mq_policy *mq = to_mq_policy(p);
				868
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	869	kfree(mq->table);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	870	epool_exit(&mq->cache_pool);
				871	epool_exit(&mq->pre_cache_pool);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	872	kfree(mq);
				873	}
				874
				875	static void copy_tick(struct mq_policy *mq)
				876	{
				877	unsigned long flags;
				878
				879	spin_lock_irqsave(&mq->tick_lock, flags);
				880	mq->tick = mq->tick_protected;
				881	spin_unlock_irqrestore(&mq->tick_lock, flags);
				882	}
				883
				884	static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
				885	bool can_block, bool can_migrate, bool discarded_oblock,
				886	struct bio bio, struct policy_result result)
				887	{
				888	int r;
				889	struct mq_policy *mq = to_mq_policy(p);
				890
				891	result->op = POLICY_MISS;
				892
				893	if (can_block)
				894	mutex_lock(&mq->lock);
				895	else if (!mutex_trylock(&mq->lock))
				896	return -EWOULDBLOCK;
				897
				898	copy_tick(mq);
				899
				900	iot_examine_bio(&mq->tracker, bio);
				901	r = map(mq, oblock, can_migrate, discarded_oblock,
				902	bio_data_dir(bio), result);
				903
				904	mutex_unlock(&mq->lock);
				905
				906	return r;
				907	}
				908
				909	static int mq_lookup(struct dm_cache_policy p, dm_oblock_t oblock, dm_cblock_t cblock)
				910	{
				911	int r;
				912	struct mq_policy *mq = to_mq_policy(p);
				913	struct entry *e;
				914
				915	if (!mutex_trylock(&mq->lock))
				916	return -EWOULDBLOCK;
				917
				918	e = hash_lookup(mq, oblock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	919	if (e && in_cache(mq, e)) {
				920	*cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	921	r = 0;
				922	} else
				923	r = -ENOENT;
				924
				925	mutex_unlock(&mq->lock);
				926
				927	return r;
				928	}
				929
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	930	static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	931	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	932	struct entry *e;
				933
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	934	e = hash_lookup(mq, oblock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	935	BUG_ON(!e \|\| !in_cache(mq, e));
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	936
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	937	del(mq, e);
				938	e->dirty = set;
				939	push(mq, e);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	940	}
				941
				942	static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				943	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	944	struct mq_policy *mq = to_mq_policy(p);
				945
				946	mutex_lock(&mq->lock);
				947	__mq_set_clear_dirty(mq, oblock, true);
				948	mutex_unlock(&mq->lock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	949	}
				950
				951	static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				952	{
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	953	struct mq_policy *mq = to_mq_policy(p);
				954
				955	mutex_lock(&mq->lock);
				956	__mq_set_clear_dirty(mq, oblock, false);
				957	mutex_unlock(&mq->lock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	958	}
				959
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	960	static int mq_load_mapping(struct dm_cache_policy *p,
				961	dm_oblock_t oblock, dm_cblock_t cblock,
				962	uint32_t hint, bool hint_valid)
				963	{
				964	struct mq_policy *mq = to_mq_policy(p);
				965	struct entry *e;
				966
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	967	e = alloc_particular_entry(&mq->cache_pool, cblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	968	e->oblock = oblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	969	e->dirty = false; /* this gets corrected in a minute */
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	970	e->hit_count = hint_valid ? hint : 1;
				971	e->generation = mq->generation;
				972	push(mq, e);
				973
				974	return 0;
				975	}
				976
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	977	static int mq_save_hints(struct mq_policy mq, struct queue q,
				978	policy_walk_fn fn, void *context)
				979	{
				980	int r;
				981	unsigned level;
				982	struct entry *e;
				983
				984	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				985	list_for_each_entry(e, q->qs + level, list) {
				986	r = fn(context, infer_cblock(&mq->cache_pool, e),
				987	e->oblock, e->hit_count);
				988	if (r)
				989	return r;
				990	}
				991
				992	return 0;
				993	}
				994
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	995	static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
				996	void *context)
				997	{
				998	struct mq_policy *mq = to_mq_policy(p);
				999	int r = 0;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1000
				1001	mutex_lock(&mq->lock);
				1002
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1003	r = mq_save_hints(mq, &mq->cache_clean, fn, context);
				1004	if (!r)
				1005	r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1006
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1007	mutex_unlock(&mq->lock);
				1008
				1009	return r;
				1010	}
				1011
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1012	static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
				1013	{
				1014	struct entry *e;
				1015
				1016	e = hash_lookup(mq, oblock);
				1017	BUG_ON(!e \|\| !in_cache(mq, e));
				1018
				1019	del(mq, e);
				1020	free_entry(&mq->cache_pool, e);
				1021	}
				1022
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1023	static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1024	{
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1025	struct mq_policy *mq = to_mq_policy(p);
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1026
				1027	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1028	__remove_mapping(mq, oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1029	mutex_unlock(&mq->lock);
				1030	}
				1031
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	1032	static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock)
				1033	{
				1034	struct entry *e = epool_find(&mq->cache_pool, cblock);
				1035
				1036	if (!e)
				1037	return -ENODATA;
				1038
				1039	del(mq, e);
				1040	free_entry(&mq->cache_pool, e);
				1041
				1042	return 0;
				1043	}
				1044
				1045	static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
				1046	{
				1047	int r;
				1048	struct mq_policy *mq = to_mq_policy(p);
				1049
				1050	mutex_lock(&mq->lock);
				1051	r = __remove_cblock(mq, cblock);
				1052	mutex_unlock(&mq->lock);
				1053
				1054	return r;
				1055	}
				1056
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1057	static int __mq_writeback_work(struct mq_policy mq, dm_oblock_t oblock,
				1058	dm_cblock_t *cblock)
				1059	{
				1060	struct entry *e = pop(mq, &mq->cache_dirty);
				1061
				1062	if (!e)
				1063	return -ENODATA;
				1064
				1065	*oblock = e->oblock;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1066	*cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1067	e->dirty = false;
				1068	push(mq, e);
				1069
				1070	return 0;
				1071	}
				1072
				1073	static int mq_writeback_work(struct dm_cache_policy p, dm_oblock_t oblock,
				1074	dm_cblock_t *cblock)
				1075	{
				1076	int r;
				1077	struct mq_policy *mq = to_mq_policy(p);
				1078
				1079	mutex_lock(&mq->lock);
				1080	r = __mq_writeback_work(mq, oblock, cblock);
				1081	mutex_unlock(&mq->lock);
				1082
				1083	return r;
				1084	}
				1085
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1086	static void __force_mapping(struct mq_policy *mq,
				1087	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1088	{
				1089	struct entry *e = hash_lookup(mq, current_oblock);
				1090
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1091	if (e && in_cache(mq, e)) {
				1092	del(mq, e);
				1093	e->oblock = new_oblock;
				1094	e->dirty = true;
				1095	push(mq, e);
				1096	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1097	}
				1098
				1099	static void mq_force_mapping(struct dm_cache_policy *p,
				1100	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1101	{
				1102	struct mq_policy *mq = to_mq_policy(p);
				1103
				1104	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1105	__force_mapping(mq, current_oblock, new_oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1106	mutex_unlock(&mq->lock);
				1107	}
				1108
				1109	static dm_cblock_t mq_residency(struct dm_cache_policy *p)
				1110	{
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1111	dm_cblock_t r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1112	struct mq_policy *mq = to_mq_policy(p);
				1113
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1114	mutex_lock(&mq->lock);
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1115	r = to_cblock(mq->cache_pool.nr_allocated);
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1116	mutex_unlock(&mq->lock);
				1117
				1118	return r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1119	}
				1120
				1121	static void mq_tick(struct dm_cache_policy *p)
				1122	{
				1123	struct mq_policy *mq = to_mq_policy(p);
				1124	unsigned long flags;
				1125
				1126	spin_lock_irqsave(&mq->tick_lock, flags);
				1127	mq->tick_protected++;
				1128	spin_unlock_irqrestore(&mq->tick_lock, flags);
				1129	}
				1130
				1131	static int mq_set_config_value(struct dm_cache_policy *p,
				1132	const char key, const char value)
				1133	{
				1134	struct mq_policy *mq = to_mq_policy(p);
				1135	enum io_pattern pattern;
				1136	unsigned long tmp;
				1137
				1138	if (!strcasecmp(key, "random_threshold"))
				1139	pattern = PATTERN_RANDOM;
				1140	else if (!strcasecmp(key, "sequential_threshold"))
				1141	pattern = PATTERN_SEQUENTIAL;
				1142	else
				1143	return -EINVAL;
				1144
				1145	if (kstrtoul(value, 10, &tmp))
				1146	return -EINVAL;
				1147
				1148	mq->tracker.thresholds[pattern] = tmp;
				1149
				1150	return 0;
				1151	}
				1152
				1153	static int mq_emit_config_values(struct dm_cache_policy p, char result, unsigned maxlen)
				1154	{
				1155	ssize_t sz = 0;
				1156	struct mq_policy *mq = to_mq_policy(p);
				1157
				1158	DMEMIT("4 random_threshold %u sequential_threshold %u",
				1159	mq->tracker.thresholds[PATTERN_RANDOM],
				1160	mq->tracker.thresholds[PATTERN_SEQUENTIAL]);
				1161
				1162	return 0;
				1163	}
				1164
				1165	/* Init the policy plugin interface function pointers. */
				1166	static void init_policy_functions(struct mq_policy *mq)
				1167	{
				1168	mq->policy.destroy = mq_destroy;
				1169	mq->policy.map = mq_map;
				1170	mq->policy.lookup = mq_lookup;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1171	mq->policy.set_dirty = mq_set_dirty;
				1172	mq->policy.clear_dirty = mq_clear_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1173	mq->policy.load_mapping = mq_load_mapping;
				1174	mq->policy.walk_mappings = mq_walk_mappings;
				1175	mq->policy.remove_mapping = mq_remove_mapping;
Joe Thornber	532906a	2013-11-08 16:36:17 +0000	[diff] [blame]	1176	mq->policy.remove_cblock = mq_remove_cblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1177	mq->policy.writeback_work = mq_writeback_work;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1178	mq->policy.force_mapping = mq_force_mapping;
				1179	mq->policy.residency = mq_residency;
				1180	mq->policy.tick = mq_tick;
				1181	mq->policy.emit_config_values = mq_emit_config_values;
				1182	mq->policy.set_config_value = mq_set_config_value;
				1183	}
				1184
				1185	static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
				1186	sector_t origin_size,
				1187	sector_t cache_block_size)
				1188	{
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1189	struct mq_policy mq = kzalloc(sizeof(mq), GFP_KERNEL);
				1190
				1191	if (!mq)
				1192	return NULL;
				1193
				1194	init_policy_functions(mq);
				1195	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1196	mq->cache_size = cache_size;
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1197
				1198	if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
				1199	DMERR("couldn't initialize pool of pre-cache entries");
				1200	goto bad_pre_cache_init;
				1201	}
				1202
				1203	if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
				1204	DMERR("couldn't initialize pool of cache entries");
				1205	goto bad_cache_init;
				1206	}
				1207
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1208	mq->tick_protected = 0;
				1209	mq->tick = 0;
				1210	mq->hit_count = 0;
				1211	mq->generation = 0;
				1212	mq->promote_threshold = 0;
				1213	mutex_init(&mq->lock);
				1214	spin_lock_init(&mq->tick_lock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1215
				1216	queue_init(&mq->pre_cache);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1217	queue_init(&mq->cache_clean);
				1218	queue_init(&mq->cache_dirty);
				1219
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1220	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
				1221
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1222	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
				1223	mq->hash_bits = ffs(mq->nr_buckets) - 1;
				1224	mq->table = kzalloc(sizeof(mq->table) mq->nr_buckets, GFP_KERNEL);
				1225	if (!mq->table)
				1226	goto bad_alloc_table;
				1227
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1228	return &mq->policy;
				1229
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1230	bad_alloc_table:
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1231	epool_exit(&mq->cache_pool);
				1232	bad_cache_init:
				1233	epool_exit(&mq->pre_cache_pool);
				1234	bad_pre_cache_init:
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1235	kfree(mq);
				1236
				1237	return NULL;
				1238	}
				1239
				1240	/----------------------------------------------------------------/
				1241
				1242	static struct dm_cache_policy_type mq_policy_type = {
				1243	.name = "mq",
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1244	.version = {1, 1, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1245	.hint_size = 4,
				1246	.owner = THIS_MODULE,
				1247	.create = mq_create
				1248	};
				1249
				1250	static struct dm_cache_policy_type default_policy_type = {
				1251	.name = "default",
Joe Thornber	633618e	2013-11-09 11:12:51 +0000	[diff] [blame]	1252	.version = {1, 1, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1253	.hint_size = 4,
				1254	.owner = THIS_MODULE,
				1255	.create = mq_create
				1256	};
				1257
				1258	static int __init mq_init(void)
				1259	{
				1260	int r;
				1261
				1262	mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry",
				1263	sizeof(struct entry),
				1264	__alignof__(struct entry),
				1265	0, NULL);
				1266	if (!mq_entry_cache)
				1267	goto bad;
				1268
				1269	r = dm_cache_policy_register(&mq_policy_type);
				1270	if (r) {
				1271	DMERR("register failed %d", r);
				1272	goto bad_register_mq;
				1273	}
				1274
				1275	r = dm_cache_policy_register(&default_policy_type);
				1276	if (!r) {
Mike Snitzer	4e7f506	2013-03-20 17:21:27 +0000	[diff] [blame]	1277	DMINFO("version %u.%u.%u loaded",
				1278	mq_policy_type.version[0],
				1279	mq_policy_type.version[1],
				1280	mq_policy_type.version[2]);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1281	return 0;
				1282	}
				1283
				1284	DMERR("register failed (as default) %d", r);
				1285
				1286	dm_cache_policy_unregister(&mq_policy_type);
				1287	bad_register_mq:
				1288	kmem_cache_destroy(mq_entry_cache);
				1289	bad:
				1290	return -ENOMEM;
				1291	}
				1292
				1293	static void __exit mq_exit(void)
				1294	{
				1295	dm_cache_policy_unregister(&mq_policy_type);
				1296	dm_cache_policy_unregister(&default_policy_type);
				1297
				1298	kmem_cache_destroy(mq_entry_cache);
				1299	}
				1300
				1301	module_init(mq_init);
				1302	module_exit(mq_exit);
				1303
				1304	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				1305	MODULE_LICENSE("GPL");
				1306	MODULE_DESCRIPTION("mq cache policy");
				1307
				1308	MODULE_ALIAS("dm-cache-default");