Blame - drivers/md/dm-cache-policy-mq.c - kernel/msm-4.9

blob: 444f0bf10b21292b84a35766f52ea4812fba104e [file] [log] [blame]

Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 Red Hat. All rights reserved.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-cache-policy.h"
				8	#include "dm.h"
				9
				10	#include <linux/hash.h>
				11	#include <linux/module.h>
				12	#include <linux/mutex.h>
				13	#include <linux/slab.h>
				14	#include <linux/vmalloc.h>
				15
				16	#define DM_MSG_PREFIX "cache-policy-mq"
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	17
				18	static struct kmem_cache *mq_entry_cache;
				19
				20	/----------------------------------------------------------------/
				21
				22	static unsigned next_power(unsigned n, unsigned min)
				23	{
				24	return roundup_pow_of_two(max(n, min));
				25	}
				26
				27	/----------------------------------------------------------------/
				28
				29	static unsigned long *alloc_bitset(unsigned nr_entries)
				30	{
				31	size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
				32	return vzalloc(s);
				33	}
				34
				35	static void free_bitset(unsigned long *bits)
				36	{
				37	vfree(bits);
				38	}
				39
				40	/----------------------------------------------------------------/
				41
				42	/*
				43	* Large, sequential ios are probably better left on the origin device since
				44	* spindles tend to have good bandwidth.
				45	*
				46	* The io_tracker tries to spot when the io is in one of these sequential
				47	* modes.
				48	*
				49	* Two thresholds to switch between random and sequential io mode are defaulting
				50	* as follows and can be adjusted via the constructor and message interfaces.
				51	*/
				52	#define RANDOM_THRESHOLD_DEFAULT 4
				53	#define SEQUENTIAL_THRESHOLD_DEFAULT 512
				54
				55	enum io_pattern {
				56	PATTERN_SEQUENTIAL,
				57	PATTERN_RANDOM
				58	};
				59
				60	struct io_tracker {
				61	enum io_pattern pattern;
				62
				63	unsigned nr_seq_samples;
				64	unsigned nr_rand_samples;
				65	unsigned thresholds[2];
				66
				67	dm_oblock_t last_end_oblock;
				68	};
				69
				70	static void iot_init(struct io_tracker *t,
				71	int sequential_threshold, int random_threshold)
				72	{
				73	t->pattern = PATTERN_RANDOM;
				74	t->nr_seq_samples = 0;
				75	t->nr_rand_samples = 0;
				76	t->last_end_oblock = 0;
				77	t->thresholds[PATTERN_RANDOM] = random_threshold;
				78	t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold;
				79	}
				80
				81	static enum io_pattern iot_pattern(struct io_tracker *t)
				82	{
				83	return t->pattern;
				84	}
				85
				86	static void iot_update_stats(struct io_tracker t, struct bio bio)
				87	{
				88	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
				89	t->nr_seq_samples++;
				90	else {
				91	/*
				92	* Just one non-sequential IO is enough to reset the
				93	* counters.
				94	*/
				95	if (t->nr_seq_samples) {
				96	t->nr_seq_samples = 0;
				97	t->nr_rand_samples = 0;
				98	}
				99
				100	t->nr_rand_samples++;
				101	}
				102
				103	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
				104	}
				105
				106	static void iot_check_for_pattern_switch(struct io_tracker *t)
				107	{
				108	switch (t->pattern) {
				109	case PATTERN_SEQUENTIAL:
				110	if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) {
				111	t->pattern = PATTERN_RANDOM;
				112	t->nr_seq_samples = t->nr_rand_samples = 0;
				113	}
				114	break;
				115
				116	case PATTERN_RANDOM:
				117	if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) {
				118	t->pattern = PATTERN_SEQUENTIAL;
				119	t->nr_seq_samples = t->nr_rand_samples = 0;
				120	}
				121	break;
				122	}
				123	}
				124
				125	static void iot_examine_bio(struct io_tracker t, struct bio bio)
				126	{
				127	iot_update_stats(t, bio);
				128	iot_check_for_pattern_switch(t);
				129	}
				130
				131	/----------------------------------------------------------------/
				132
				133
				134	/*
				135	* This queue is divided up into different levels. Allowing us to push
				136	* entries to the back of any of the levels. Think of it as a partially
				137	* sorted queue.
				138	*/
				139	#define NR_QUEUE_LEVELS 16u
				140
				141	struct queue {
				142	struct list_head qs[NR_QUEUE_LEVELS];
				143	};
				144
				145	static void queue_init(struct queue *q)
				146	{
				147	unsigned i;
				148
				149	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				150	INIT_LIST_HEAD(q->qs + i);
				151	}
				152
				153	/*
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	154	* Checks to see if the queue is empty.
				155	* FIXME: reduce cpu usage.
				156	*/
				157	static bool queue_empty(struct queue *q)
				158	{
				159	unsigned i;
				160
				161	for (i = 0; i < NR_QUEUE_LEVELS; i++)
				162	if (!list_empty(q->qs + i))
				163	return false;
				164
				165	return true;
				166	}
				167
				168	/*
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	169	* Insert an entry to the back of the given level.
				170	*/
				171	static void queue_push(struct queue q, unsigned level, struct list_head elt)
				172	{
				173	list_add_tail(elt, q->qs + level);
				174	}
				175
				176	static void queue_remove(struct list_head *elt)
				177	{
				178	list_del(elt);
				179	}
				180
				181	/*
				182	* Shifts all regions down one level. This has no effect on the order of
				183	* the queue.
				184	*/
				185	static void queue_shift_down(struct queue *q)
				186	{
				187	unsigned level;
				188
				189	for (level = 1; level < NR_QUEUE_LEVELS; level++)
				190	list_splice_init(q->qs + level, q->qs + level - 1);
				191	}
				192
				193	/*
				194	* Gives us the oldest entry of the lowest popoulated level. If the first
				195	* level is emptied then we shift down one level.
				196	*/
				197	static struct list_head queue_pop(struct queue q)
				198	{
				199	unsigned level;
				200	struct list_head *r;
				201
				202	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				203	if (!list_empty(q->qs + level)) {
				204	r = q->qs[level].next;
				205	list_del(r);
				206
				207	/* have we just emptied the bottom level? */
				208	if (level == 0 && list_empty(q->qs))
				209	queue_shift_down(q);
				210
				211	return r;
				212	}
				213
				214	return NULL;
				215	}
				216
				217	static struct list_head list_pop(struct list_head lh)
				218	{
				219	struct list_head *r = lh->next;
				220
				221	BUG_ON(!r);
				222	list_del_init(r);
				223
				224	return r;
				225	}
				226
				227	/----------------------------------------------------------------/
				228
				229	/*
				230	* Describes a cache entry. Used in both the cache and the pre_cache.
				231	*/
				232	struct entry {
				233	struct hlist_node hlist;
				234	struct list_head list;
				235	dm_oblock_t oblock;
				236	dm_cblock_t cblock; /* valid iff in_cache */
				237
				238	/*
				239	* FIXME: pack these better
				240	*/
				241	bool in_cache:1;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	242	bool dirty:1;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	243	unsigned hit_count;
				244	unsigned generation;
				245	unsigned tick;
				246	};
				247
				248	struct mq_policy {
				249	struct dm_cache_policy policy;
				250
				251	/* protects everything */
				252	struct mutex lock;
				253	dm_cblock_t cache_size;
				254	struct io_tracker tracker;
				255
				256	/*
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	257	* We maintain three queues of entries. The cache proper,
				258	* consisting of a clean and dirty queue, contains the currently
				259	* active mappings. Whereas the pre_cache tracks blocks that
				260	* are being hit frequently and potential candidates for promotion
				261	* to the cache.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	262	*/
				263	struct queue pre_cache;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	264	struct queue cache_clean;
				265	struct queue cache_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	266
				267	/*
				268	* Keeps track of time, incremented by the core. We use this to
				269	* avoid attributing multiple hits within the same tick.
				270	*
				271	* Access to tick_protected should be done with the spin lock held.
				272	* It's copied to tick at the start of the map function (within the
				273	* mutex).
				274	*/
				275	spinlock_t tick_lock;
				276	unsigned tick_protected;
				277	unsigned tick;
				278
				279	/*
				280	* A count of the number of times the map function has been called
				281	* and found an entry in the pre_cache or cache. Currently used to
				282	* calculate the generation.
				283	*/
				284	unsigned hit_count;
				285
				286	/*
				287	* A generation is a longish period that is used to trigger some
				288	* book keeping effects. eg, decrementing hit counts on entries.
				289	* This is needed to allow the cache to evolve as io patterns
				290	* change.
				291	*/
				292	unsigned generation;
				293	unsigned generation_period; /* in lookups (will probably change) */
				294
				295	/*
				296	* Entries in the pre_cache whose hit count passes the promotion
				297	* threshold move to the cache proper. Working out the correct
				298	* value for the promotion_threshold is crucial to this policy.
				299	*/
				300	unsigned promote_threshold;
				301
				302	/*
				303	* We need cache_size entries for the cache, and choose to have
				304	* cache_size entries for the pre_cache too. One motivation for
				305	* using the same size is to make the hit counts directly
				306	* comparable between pre_cache and cache.
				307	*/
				308	unsigned nr_entries;
				309	unsigned nr_entries_allocated;
				310	struct list_head free;
				311
				312	/*
				313	* Cache blocks may be unallocated. We store this info in a
				314	* bitset.
				315	*/
				316	unsigned long *allocation_bitset;
				317	unsigned nr_cblocks_allocated;
				318	unsigned find_free_nr_words;
				319	unsigned find_free_last_word;
				320
				321	/*
				322	* The hash table allows us to quickly find an entry by origin
				323	* block. Both pre_cache and cache entries are in here.
				324	*/
				325	unsigned nr_buckets;
				326	dm_block_t hash_bits;
				327	struct hlist_head *table;
				328	};
				329
				330	/----------------------------------------------------------------/
				331	/* Free/alloc mq cache entry structures. */
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	332	static void concat_queue(struct list_head lh, struct queue q)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	333	{
				334	unsigned level;
				335
				336	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				337	list_splice(q->qs + level, lh);
				338	}
				339
				340	static void free_entries(struct mq_policy *mq)
				341	{
				342	struct entry e, tmp;
				343
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	344	concat_queue(&mq->free, &mq->pre_cache);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	345	concat_queue(&mq->free, &mq->cache_clean);
				346	concat_queue(&mq->free, &mq->cache_dirty);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	347
				348	list_for_each_entry_safe(e, tmp, &mq->free, list)
				349	kmem_cache_free(mq_entry_cache, e);
				350	}
				351
				352	static int alloc_entries(struct mq_policy *mq, unsigned elts)
				353	{
				354	unsigned u = mq->nr_entries;
				355
				356	INIT_LIST_HEAD(&mq->free);
				357	mq->nr_entries_allocated = 0;
				358
				359	while (u--) {
				360	struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL);
				361
				362	if (!e) {
				363	free_entries(mq);
				364	return -ENOMEM;
				365	}
				366
				367
				368	list_add(&e->list, &mq->free);
				369	}
				370
				371	return 0;
				372	}
				373
				374	/----------------------------------------------------------------/
				375
				376	/*
				377	* Simple hash table implementation. Should replace with the standard hash
				378	* table that's making its way upstream.
				379	*/
				380	static void hash_insert(struct mq_policy mq, struct entry e)
				381	{
				382	unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits);
				383
				384	hlist_add_head(&e->hlist, mq->table + h);
				385	}
				386
				387	static struct entry hash_lookup(struct mq_policy mq, dm_oblock_t oblock)
				388	{
				389	unsigned h = hash_64(from_oblock(oblock), mq->hash_bits);
				390	struct hlist_head *bucket = mq->table + h;
				391	struct entry *e;
				392
				393	hlist_for_each_entry(e, bucket, hlist)
				394	if (e->oblock == oblock) {
				395	hlist_del(&e->hlist);
				396	hlist_add_head(&e->hlist, bucket);
				397	return e;
				398	}
				399
				400	return NULL;
				401	}
				402
				403	static void hash_remove(struct entry *e)
				404	{
				405	hlist_del(&e->hlist);
				406	}
				407
				408	/----------------------------------------------------------------/
				409
				410	/*
				411	* Allocates a new entry structure. The memory is allocated in one lump,
				412	* so we just handing it out here. Returns NULL if all entries have
				413	* already been allocated. Cannot fail otherwise.
				414	*/
				415	static struct entry alloc_entry(struct mq_policy mq)
				416	{
				417	struct entry *e;
				418
				419	if (mq->nr_entries_allocated >= mq->nr_entries) {
				420	BUG_ON(!list_empty(&mq->free));
				421	return NULL;
				422	}
				423
				424	e = list_entry(list_pop(&mq->free), struct entry, list);
				425	INIT_LIST_HEAD(&e->list);
				426	INIT_HLIST_NODE(&e->hlist);
				427
				428	mq->nr_entries_allocated++;
				429	return e;
				430	}
				431
				432	/----------------------------------------------------------------/
				433
				434	/*
				435	* Mark cache blocks allocated or not in the bitset.
				436	*/
				437	static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock)
				438	{
				439	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
				440	BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset));
				441
				442	set_bit(from_cblock(cblock), mq->allocation_bitset);
				443	mq->nr_cblocks_allocated++;
				444	}
				445
				446	static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock)
				447	{
				448	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
				449	BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset));
				450
				451	clear_bit(from_cblock(cblock), mq->allocation_bitset);
				452	mq->nr_cblocks_allocated--;
				453	}
				454
				455	static bool any_free_cblocks(struct mq_policy *mq)
				456	{
				457	return mq->nr_cblocks_allocated < from_cblock(mq->cache_size);
				458	}
				459
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	460	static bool any_clean_cblocks(struct mq_policy *mq)
				461	{
				462	return !queue_empty(&mq->cache_clean);
				463	}
				464
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	465	/*
				466	* Fills result out with a cache block that isn't in use, or return
				467	* -ENOSPC. This does _not_ mark the cblock as allocated, the caller is
				468	* reponsible for that.
				469	*/
				470	static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end,
				471	dm_cblock_t result, unsigned last_word)
				472	{
				473	int r = -ENOSPC;
				474	unsigned w;
				475
				476	for (w = begin; w < end; w++) {
				477	/*
				478	* ffz is undefined if no zero exists
				479	*/
				480	if (mq->allocation_bitset[w] != ~0UL) {
				481	*last_word = w;
				482	result = to_cblock((w BITS_PER_LONG) + ffz(mq->allocation_bitset[w]));
				483	if (from_cblock(*result) < from_cblock(mq->cache_size))
				484	r = 0;
				485
				486	break;
				487	}
				488	}
				489
				490	return r;
				491	}
				492
				493	static int find_free_cblock(struct mq_policy mq, dm_cblock_t result)
				494	{
				495	int r;
				496
				497	if (!any_free_cblocks(mq))
				498	return -ENOSPC;
				499
				500	r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word);
				501	if (r == -ENOSPC && mq->find_free_last_word)
				502	r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word);
				503
				504	return r;
				505	}
				506
				507	/----------------------------------------------------------------/
				508
				509	/*
				510	* Now we get to the meat of the policy. This section deals with deciding
				511	* when to to add entries to the pre_cache and cache, and move between
				512	* them.
				513	*/
				514
				515	/*
				516	* The queue level is based on the log2 of the hit count.
				517	*/
				518	static unsigned queue_level(struct entry *e)
				519	{
				520	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
				521	}
				522
				523	/*
				524	* Inserts the entry into the pre_cache or the cache. Ensures the cache
				525	* block is marked as allocated if necc. Inserts into the hash table. Sets the
				526	* tick which records when the entry was last moved about.
				527	*/
				528	static void push(struct mq_policy mq, struct entry e)
				529	{
				530	e->tick = mq->tick;
				531	hash_insert(mq, e);
				532
				533	if (e->in_cache) {
				534	alloc_cblock(mq, e->cblock);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	535	queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
				536	queue_level(e), &e->list);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	537	} else
				538	queue_push(&mq->pre_cache, queue_level(e), &e->list);
				539	}
				540
				541	/*
				542	* Removes an entry from pre_cache or cache. Removes from the hash table.
				543	* Frees off the cache block if necc.
				544	*/
				545	static void del(struct mq_policy mq, struct entry e)
				546	{
				547	queue_remove(&e->list);
				548	hash_remove(e);
				549	if (e->in_cache)
				550	free_cblock(mq, e->cblock);
				551	}
				552
				553	/*
				554	* Like del, except it removes the first entry in the queue (ie. the least
				555	* recently used).
				556	*/
				557	static struct entry pop(struct mq_policy mq, struct queue *q)
				558	{
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	559	struct entry *e;
				560	struct list_head *h = queue_pop(q);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	561
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	562	if (!h)
				563	return NULL;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	564
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	565	e = container_of(h, struct entry, list);
				566	hash_remove(e);
				567	if (e->in_cache)
				568	free_cblock(mq, e->cblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	569
				570	return e;
				571	}
				572
				573	/*
				574	* Has this entry already been updated?
				575	*/
				576	static bool updated_this_tick(struct mq_policy mq, struct entry e)
				577	{
				578	return mq->tick == e->tick;
				579	}
				580
				581	/*
				582	* The promotion threshold is adjusted every generation. As are the counts
				583	* of the entries.
				584	*
				585	* At the moment the threshold is taken by averaging the hit counts of some
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	586	* of the entries in the cache (the first 20 entries across all levels in
				587	* ascending order, giving preference to the clean entries at each level).
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	588	*
				589	* We can be much cleverer than this though. For example, each promotion
				590	* could bump up the threshold helping to prevent churn. Much more to do
				591	* here.
				592	*/
				593
				594	#define MAX_TO_AVERAGE 20
				595
				596	static void check_generation(struct mq_policy *mq)
				597	{
				598	unsigned total = 0, nr = 0, count = 0, level;
				599	struct list_head *head;
				600	struct entry *e;
				601
				602	if ((mq->hit_count >= mq->generation_period) &&
				603	(mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) {
				604
				605	mq->hit_count = 0;
				606	mq->generation++;
				607
				608	for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	609	head = mq->cache_clean.qs + level;
				610	list_for_each_entry(e, head, list) {
				611	nr++;
				612	total += e->hit_count;
				613
				614	if (++count >= MAX_TO_AVERAGE)
				615	break;
				616	}
				617
				618	head = mq->cache_dirty.qs + level;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	619	list_for_each_entry(e, head, list) {
				620	nr++;
				621	total += e->hit_count;
				622
				623	if (++count >= MAX_TO_AVERAGE)
				624	break;
				625	}
				626	}
				627
				628	mq->promote_threshold = nr ? total / nr : 1;
				629	if (mq->promote_threshold * nr < total)
				630	mq->promote_threshold++;
				631	}
				632	}
				633
				634	/*
				635	* Whenever we use an entry we bump up it's hit counter, and push it to the
				636	* back to it's current level.
				637	*/
				638	static void requeue_and_update_tick(struct mq_policy mq, struct entry e)
				639	{
				640	if (updated_this_tick(mq, e))
				641	return;
				642
				643	e->hit_count++;
				644	mq->hit_count++;
				645	check_generation(mq);
				646
				647	/* generation adjustment, to stop the counts increasing forever. */
				648	/* FIXME: divide? */
				649	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
				650	e->generation = mq->generation;
				651
				652	del(mq, e);
				653	push(mq, e);
				654	}
				655
				656	/*
				657	* Demote the least recently used entry from the cache to the pre_cache.
				658	* Returns the new cache entry to use, and the old origin block it was
				659	* mapped to.
				660	*
				661	* We drop the hit count on the demoted entry back to 1 to stop it bouncing
				662	* straight back into the cache if it's subsequently hit. There are
				663	* various options here, and more experimentation would be good:
				664	*
				665	* - just forget about the demoted entry completely (ie. don't insert it
				666	into the pre_cache).
				667	* - divide the hit count rather that setting to some hard coded value.
				668	* - set the hit count to a hard coded value other than 1, eg, is it better
				669	* if it goes in at level 2?
				670	*/
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	671	static int demote_cblock(struct mq_policy mq, dm_oblock_t oblock, dm_cblock_t *cblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	672	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	673	struct entry *demoted = pop(mq, &mq->cache_clean);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	674
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	675	if (!demoted)
				676	/*
				677	* We could get a block from mq->cache_dirty, but that
				678	* would add extra latency to the triggering bio as it
				679	* waits for the writeback. Better to not promote this
				680	* time and hope there's a clean block next time this block
				681	* is hit.
				682	*/
				683	return -ENOSPC;
				684
				685	*cblock = demoted->cblock;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	686	*oblock = demoted->oblock;
				687	demoted->in_cache = false;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	688	demoted->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	689	demoted->hit_count = 1;
				690	push(mq, demoted);
				691
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	692	return 0;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	693	}
				694
				695	/*
				696	* We modify the basic promotion_threshold depending on the specific io.
				697	*
				698	* If the origin block has been discarded then there's no cost to copy it
				699	* to the cache.
				700	*
				701	* We bias towards reads, since they can be demoted at no cost if they
				702	* haven't been dirtied.
				703	*/
				704	#define DISCARDED_PROMOTE_THRESHOLD 1
				705	#define READ_PROMOTE_THRESHOLD 4
				706	#define WRITE_PROMOTE_THRESHOLD 8
				707
				708	static unsigned adjusted_promote_threshold(struct mq_policy *mq,
				709	bool discarded_oblock, int data_dir)
				710	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	711	if (data_dir == READ)
				712	return mq->promote_threshold + READ_PROMOTE_THRESHOLD;
				713
				714	if (discarded_oblock && (any_free_cblocks(mq) \|\| any_clean_cblocks(mq))) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	715	/*
				716	* We don't need to do any copying at all, so give this a
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	717	* very low threshold.
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	718	*/
				719	return DISCARDED_PROMOTE_THRESHOLD;
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	720	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	721
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	722	return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	723	}
				724
				725	static bool should_promote(struct mq_policy mq, struct entry e,
				726	bool discarded_oblock, int data_dir)
				727	{
				728	return e->hit_count >=
				729	adjusted_promote_threshold(mq, discarded_oblock, data_dir);
				730	}
				731
				732	static int cache_entry_found(struct mq_policy *mq,
				733	struct entry *e,
				734	struct policy_result *result)
				735	{
				736	requeue_and_update_tick(mq, e);
				737
				738	if (e->in_cache) {
				739	result->op = POLICY_HIT;
				740	result->cblock = e->cblock;
				741	}
				742
				743	return 0;
				744	}
				745
				746	/*
Joe Thornber	0184b44	2013-10-24 14:10:28 -0400	[diff] [blame]	747	* Moves an entry from the pre_cache to the cache. The main work is
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	748	* finding which cache block to use.
				749	*/
				750	static int pre_cache_to_cache(struct mq_policy mq, struct entry e,
				751	struct policy_result *result)
				752	{
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	753	int r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	754	dm_cblock_t cblock;
				755
				756	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
				757	result->op = POLICY_REPLACE;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	758	r = demote_cblock(mq, &result->old_oblock, &cblock);
				759	if (r) {
				760	result->op = POLICY_MISS;
				761	return 0;
				762	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	763	} else
				764	result->op = POLICY_NEW;
				765
				766	result->cblock = e->cblock = cblock;
				767
				768	del(mq, e);
				769	e->in_cache = true;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	770	e->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	771	push(mq, e);
				772
				773	return 0;
				774	}
				775
				776	static int pre_cache_entry_found(struct mq_policy mq, struct entry e,
				777	bool can_migrate, bool discarded_oblock,
				778	int data_dir, struct policy_result *result)
				779	{
				780	int r = 0;
				781	bool updated = updated_this_tick(mq, e);
				782
				783	requeue_and_update_tick(mq, e);
				784
				785	if ((!discarded_oblock && updated) \|\|
				786	!should_promote(mq, e, discarded_oblock, data_dir))
				787	result->op = POLICY_MISS;
				788	else if (!can_migrate)
				789	r = -EWOULDBLOCK;
				790	else
				791	r = pre_cache_to_cache(mq, e, result);
				792
				793	return r;
				794	}
				795
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	796	static void insert_entry_in_pre_cache(struct mq_policy *mq,
				797	struct entry *e, dm_oblock_t oblock)
				798	{
				799	e->in_cache = false;
				800	e->dirty = false;
				801	e->oblock = oblock;
				802	e->hit_count = 1;
				803	e->generation = mq->generation;
				804	push(mq, e);
				805	}
				806
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	807	static void insert_in_pre_cache(struct mq_policy *mq,
				808	dm_oblock_t oblock)
				809	{
				810	struct entry *e = alloc_entry(mq);
				811
				812	if (!e)
				813	/*
				814	* There's no spare entry structure, so we grab the least
				815	* used one from the pre_cache.
				816	*/
				817	e = pop(mq, &mq->pre_cache);
				818
				819	if (unlikely(!e)) {
				820	DMWARN("couldn't pop from pre cache");
				821	return;
				822	}
				823
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	824	insert_entry_in_pre_cache(mq, e, oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	825	}
				826
				827	static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
				828	struct policy_result *result)
				829	{
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	830	int r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	831	struct entry *e;
				832	dm_cblock_t cblock;
				833
				834	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	835	r = demote_cblock(mq, &result->old_oblock, &cblock);
				836	if (unlikely(r)) {
				837	result->op = POLICY_MISS;
				838	insert_in_pre_cache(mq, oblock);
				839	return;
				840	}
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	841
Joe Thornber	c86c307	2013-10-24 14:10:28 -0400	[diff] [blame^]	842	/*
				843	* This will always succeed, since we've just demoted.
				844	*/
				845	e = pop(mq, &mq->pre_cache);
				846	result->op = POLICY_REPLACE;
				847
				848	} else {
				849	e = alloc_entry(mq);
				850	if (unlikely(!e))
				851	e = pop(mq, &mq->pre_cache);
				852
				853	if (unlikely(!e)) {
				854	result->op = POLICY_MISS;
				855	return;
				856	}
				857
				858	result->op = POLICY_NEW;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	859	}
				860
				861	e->oblock = oblock;
				862	e->cblock = cblock;
				863	e->in_cache = true;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	864	e->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	865	e->hit_count = 1;
				866	e->generation = mq->generation;
				867	push(mq, e);
				868
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	869	result->cblock = e->cblock;
				870	}
				871
				872	static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
				873	bool can_migrate, bool discarded_oblock,
				874	int data_dir, struct policy_result *result)
				875	{
				876	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) {
				877	if (can_migrate)
				878	insert_in_cache(mq, oblock, result);
				879	else
				880	return -EWOULDBLOCK;
				881	} else {
				882	insert_in_pre_cache(mq, oblock);
				883	result->op = POLICY_MISS;
				884	}
				885
				886	return 0;
				887	}
				888
				889	/*
				890	* Looks the oblock up in the hash table, then decides whether to put in
				891	* pre_cache, or cache etc.
				892	*/
				893	static int map(struct mq_policy *mq, dm_oblock_t oblock,
				894	bool can_migrate, bool discarded_oblock,
				895	int data_dir, struct policy_result *result)
				896	{
				897	int r = 0;
				898	struct entry *e = hash_lookup(mq, oblock);
				899
				900	if (e && e->in_cache)
				901	r = cache_entry_found(mq, e, result);
				902	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
				903	result->op = POLICY_MISS;
				904	else if (e)
				905	r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
				906	data_dir, result);
				907	else
				908	r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
				909	data_dir, result);
				910
				911	if (r == -EWOULDBLOCK)
				912	result->op = POLICY_MISS;
				913
				914	return r;
				915	}
				916
				917	/----------------------------------------------------------------/
				918
				919	/*
				920	* Public interface, via the policy struct. See dm-cache-policy.h for a
				921	* description of these.
				922	*/
				923
				924	static struct mq_policy to_mq_policy(struct dm_cache_policy p)
				925	{
				926	return container_of(p, struct mq_policy, policy);
				927	}
				928
				929	static void mq_destroy(struct dm_cache_policy *p)
				930	{
				931	struct mq_policy *mq = to_mq_policy(p);
				932
				933	free_bitset(mq->allocation_bitset);
				934	kfree(mq->table);
				935	free_entries(mq);
				936	kfree(mq);
				937	}
				938
				939	static void copy_tick(struct mq_policy *mq)
				940	{
				941	unsigned long flags;
				942
				943	spin_lock_irqsave(&mq->tick_lock, flags);
				944	mq->tick = mq->tick_protected;
				945	spin_unlock_irqrestore(&mq->tick_lock, flags);
				946	}
				947
				948	static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
				949	bool can_block, bool can_migrate, bool discarded_oblock,
				950	struct bio bio, struct policy_result result)
				951	{
				952	int r;
				953	struct mq_policy *mq = to_mq_policy(p);
				954
				955	result->op = POLICY_MISS;
				956
				957	if (can_block)
				958	mutex_lock(&mq->lock);
				959	else if (!mutex_trylock(&mq->lock))
				960	return -EWOULDBLOCK;
				961
				962	copy_tick(mq);
				963
				964	iot_examine_bio(&mq->tracker, bio);
				965	r = map(mq, oblock, can_migrate, discarded_oblock,
				966	bio_data_dir(bio), result);
				967
				968	mutex_unlock(&mq->lock);
				969
				970	return r;
				971	}
				972
				973	static int mq_lookup(struct dm_cache_policy p, dm_oblock_t oblock, dm_cblock_t cblock)
				974	{
				975	int r;
				976	struct mq_policy *mq = to_mq_policy(p);
				977	struct entry *e;
				978
				979	if (!mutex_trylock(&mq->lock))
				980	return -EWOULDBLOCK;
				981
				982	e = hash_lookup(mq, oblock);
				983	if (e && e->in_cache) {
				984	*cblock = e->cblock;
				985	r = 0;
				986	} else
				987	r = -ENOENT;
				988
				989	mutex_unlock(&mq->lock);
				990
				991	return r;
				992	}
				993
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	994	/*
				995	* FIXME: __mq_set_clear_dirty can block due to mutex.
				996	* Ideally a policy should not block in functions called
				997	* from the map() function. Explore using RCU.
				998	*/
				999	static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
				1000	{
				1001	struct mq_policy *mq = to_mq_policy(p);
				1002	struct entry *e;
				1003
				1004	mutex_lock(&mq->lock);
				1005	e = hash_lookup(mq, oblock);
				1006	if (!e)
				1007	DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
				1008	else {
				1009	BUG_ON(!e->in_cache);
				1010
				1011	del(mq, e);
				1012	e->dirty = set;
				1013	push(mq, e);
				1014	}
				1015	mutex_unlock(&mq->lock);
				1016	}
				1017
				1018	static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				1019	{
				1020	__mq_set_clear_dirty(p, oblock, true);
				1021	}
				1022
				1023	static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				1024	{
				1025	__mq_set_clear_dirty(p, oblock, false);
				1026	}
				1027
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1028	static int mq_load_mapping(struct dm_cache_policy *p,
				1029	dm_oblock_t oblock, dm_cblock_t cblock,
				1030	uint32_t hint, bool hint_valid)
				1031	{
				1032	struct mq_policy *mq = to_mq_policy(p);
				1033	struct entry *e;
				1034
				1035	e = alloc_entry(mq);
				1036	if (!e)
				1037	return -ENOMEM;
				1038
				1039	e->cblock = cblock;
				1040	e->oblock = oblock;
				1041	e->in_cache = true;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1042	e->dirty = false; /* this gets corrected in a minute */
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1043	e->hit_count = hint_valid ? hint : 1;
				1044	e->generation = mq->generation;
				1045	push(mq, e);
				1046
				1047	return 0;
				1048	}
				1049
				1050	static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
				1051	void *context)
				1052	{
				1053	struct mq_policy *mq = to_mq_policy(p);
				1054	int r = 0;
				1055	struct entry *e;
				1056	unsigned level;
				1057
				1058	mutex_lock(&mq->lock);
				1059
				1060	for (level = 0; level < NR_QUEUE_LEVELS; level++)
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1061	list_for_each_entry(e, &mq->cache_clean.qs[level], list) {
				1062	r = fn(context, e->cblock, e->oblock, e->hit_count);
				1063	if (r)
				1064	goto out;
				1065	}
				1066
				1067	for (level = 0; level < NR_QUEUE_LEVELS; level++)
				1068	list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1069	r = fn(context, e->cblock, e->oblock, e->hit_count);
				1070	if (r)
				1071	goto out;
				1072	}
				1073
				1074	out:
				1075	mutex_unlock(&mq->lock);
				1076
				1077	return r;
				1078	}
				1079
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1080	static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1081	{
Geert Uytterhoeven	b936bf8	2013-07-26 09:57:31 +0200	[diff] [blame]	1082	struct mq_policy *mq = to_mq_policy(p);
				1083	struct entry *e;
				1084
				1085	mutex_lock(&mq->lock);
				1086
				1087	e = hash_lookup(mq, oblock);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1088
				1089	BUG_ON(!e \|\| !e->in_cache);
				1090
				1091	del(mq, e);
				1092	e->in_cache = false;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1093	e->dirty = false;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1094	push(mq, e);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1095
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1096	mutex_unlock(&mq->lock);
				1097	}
				1098
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1099	static int __mq_writeback_work(struct mq_policy mq, dm_oblock_t oblock,
				1100	dm_cblock_t *cblock)
				1101	{
				1102	struct entry *e = pop(mq, &mq->cache_dirty);
				1103
				1104	if (!e)
				1105	return -ENODATA;
				1106
				1107	*oblock = e->oblock;
				1108	*cblock = e->cblock;
				1109	e->dirty = false;
				1110	push(mq, e);
				1111
				1112	return 0;
				1113	}
				1114
				1115	static int mq_writeback_work(struct dm_cache_policy p, dm_oblock_t oblock,
				1116	dm_cblock_t *cblock)
				1117	{
				1118	int r;
				1119	struct mq_policy *mq = to_mq_policy(p);
				1120
				1121	mutex_lock(&mq->lock);
				1122	r = __mq_writeback_work(mq, oblock, cblock);
				1123	mutex_unlock(&mq->lock);
				1124
				1125	return r;
				1126	}
				1127
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1128	static void force_mapping(struct mq_policy *mq,
				1129	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1130	{
				1131	struct entry *e = hash_lookup(mq, current_oblock);
				1132
				1133	BUG_ON(!e \|\| !e->in_cache);
				1134
				1135	del(mq, e);
				1136	e->oblock = new_oblock;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1137	e->dirty = true;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1138	push(mq, e);
				1139	}
				1140
				1141	static void mq_force_mapping(struct dm_cache_policy *p,
				1142	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1143	{
				1144	struct mq_policy *mq = to_mq_policy(p);
				1145
				1146	mutex_lock(&mq->lock);
				1147	force_mapping(mq, current_oblock, new_oblock);
				1148	mutex_unlock(&mq->lock);
				1149	}
				1150
				1151	static dm_cblock_t mq_residency(struct dm_cache_policy *p)
				1152	{
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1153	dm_cblock_t r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1154	struct mq_policy *mq = to_mq_policy(p);
				1155
Joe Thornber	99ba2ae	2013-10-21 11:44:57 +0100	[diff] [blame]	1156	mutex_lock(&mq->lock);
				1157	r = to_cblock(mq->nr_cblocks_allocated);
				1158	mutex_unlock(&mq->lock);
				1159
				1160	return r;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1161	}
				1162
				1163	static void mq_tick(struct dm_cache_policy *p)
				1164	{
				1165	struct mq_policy *mq = to_mq_policy(p);
				1166	unsigned long flags;
				1167
				1168	spin_lock_irqsave(&mq->tick_lock, flags);
				1169	mq->tick_protected++;
				1170	spin_unlock_irqrestore(&mq->tick_lock, flags);
				1171	}
				1172
				1173	static int mq_set_config_value(struct dm_cache_policy *p,
				1174	const char key, const char value)
				1175	{
				1176	struct mq_policy *mq = to_mq_policy(p);
				1177	enum io_pattern pattern;
				1178	unsigned long tmp;
				1179
				1180	if (!strcasecmp(key, "random_threshold"))
				1181	pattern = PATTERN_RANDOM;
				1182	else if (!strcasecmp(key, "sequential_threshold"))
				1183	pattern = PATTERN_SEQUENTIAL;
				1184	else
				1185	return -EINVAL;
				1186
				1187	if (kstrtoul(value, 10, &tmp))
				1188	return -EINVAL;
				1189
				1190	mq->tracker.thresholds[pattern] = tmp;
				1191
				1192	return 0;
				1193	}
				1194
				1195	static int mq_emit_config_values(struct dm_cache_policy p, char result, unsigned maxlen)
				1196	{
				1197	ssize_t sz = 0;
				1198	struct mq_policy *mq = to_mq_policy(p);
				1199
				1200	DMEMIT("4 random_threshold %u sequential_threshold %u",
				1201	mq->tracker.thresholds[PATTERN_RANDOM],
				1202	mq->tracker.thresholds[PATTERN_SEQUENTIAL]);
				1203
				1204	return 0;
				1205	}
				1206
				1207	/* Init the policy plugin interface function pointers. */
				1208	static void init_policy_functions(struct mq_policy *mq)
				1209	{
				1210	mq->policy.destroy = mq_destroy;
				1211	mq->policy.map = mq_map;
				1212	mq->policy.lookup = mq_lookup;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1213	mq->policy.set_dirty = mq_set_dirty;
				1214	mq->policy.clear_dirty = mq_clear_dirty;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1215	mq->policy.load_mapping = mq_load_mapping;
				1216	mq->policy.walk_mappings = mq_walk_mappings;
				1217	mq->policy.remove_mapping = mq_remove_mapping;
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1218	mq->policy.writeback_work = mq_writeback_work;
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1219	mq->policy.force_mapping = mq_force_mapping;
				1220	mq->policy.residency = mq_residency;
				1221	mq->policy.tick = mq_tick;
				1222	mq->policy.emit_config_values = mq_emit_config_values;
				1223	mq->policy.set_config_value = mq_set_config_value;
				1224	}
				1225
				1226	static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
				1227	sector_t origin_size,
				1228	sector_t cache_block_size)
				1229	{
				1230	int r;
				1231	struct mq_policy mq = kzalloc(sizeof(mq), GFP_KERNEL);
				1232
				1233	if (!mq)
				1234	return NULL;
				1235
				1236	init_policy_functions(mq);
				1237	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
				1238
				1239	mq->cache_size = cache_size;
				1240	mq->tick_protected = 0;
				1241	mq->tick = 0;
				1242	mq->hit_count = 0;
				1243	mq->generation = 0;
				1244	mq->promote_threshold = 0;
				1245	mutex_init(&mq->lock);
				1246	spin_lock_init(&mq->tick_lock);
				1247	mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG);
				1248	mq->find_free_last_word = 0;
				1249
				1250	queue_init(&mq->pre_cache);
Joe Thornber	01911c1	2013-10-24 14:10:28 -0400	[diff] [blame]	1251	queue_init(&mq->cache_clean);
				1252	queue_init(&mq->cache_dirty);
				1253
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1254	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
				1255
				1256	mq->nr_entries = 2 * from_cblock(cache_size);
				1257	r = alloc_entries(mq, mq->nr_entries);
				1258	if (r)
				1259	goto bad_cache_alloc;
				1260
				1261	mq->nr_entries_allocated = 0;
				1262	mq->nr_cblocks_allocated = 0;
				1263
				1264	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
				1265	mq->hash_bits = ffs(mq->nr_buckets) - 1;
				1266	mq->table = kzalloc(sizeof(mq->table) mq->nr_buckets, GFP_KERNEL);
				1267	if (!mq->table)
				1268	goto bad_alloc_table;
				1269
				1270	mq->allocation_bitset = alloc_bitset(from_cblock(cache_size));
				1271	if (!mq->allocation_bitset)
				1272	goto bad_alloc_bitset;
				1273
				1274	return &mq->policy;
				1275
				1276	bad_alloc_bitset:
				1277	kfree(mq->table);
				1278	bad_alloc_table:
				1279	free_entries(mq);
				1280	bad_cache_alloc:
				1281	kfree(mq);
				1282
				1283	return NULL;
				1284	}
				1285
				1286	/----------------------------------------------------------------/
				1287
				1288	static struct dm_cache_policy_type mq_policy_type = {
				1289	.name = "mq",
Mike Snitzer	4e7f506	2013-03-20 17:21:27 +0000	[diff] [blame]	1290	.version = {1, 0, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1291	.hint_size = 4,
				1292	.owner = THIS_MODULE,
				1293	.create = mq_create
				1294	};
				1295
				1296	static struct dm_cache_policy_type default_policy_type = {
				1297	.name = "default",
Mike Snitzer	4e7f506	2013-03-20 17:21:27 +0000	[diff] [blame]	1298	.version = {1, 0, 0},
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1299	.hint_size = 4,
				1300	.owner = THIS_MODULE,
				1301	.create = mq_create
				1302	};
				1303
				1304	static int __init mq_init(void)
				1305	{
				1306	int r;
				1307
				1308	mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry",
				1309	sizeof(struct entry),
				1310	__alignof__(struct entry),
				1311	0, NULL);
				1312	if (!mq_entry_cache)
				1313	goto bad;
				1314
				1315	r = dm_cache_policy_register(&mq_policy_type);
				1316	if (r) {
				1317	DMERR("register failed %d", r);
				1318	goto bad_register_mq;
				1319	}
				1320
				1321	r = dm_cache_policy_register(&default_policy_type);
				1322	if (!r) {
Mike Snitzer	4e7f506	2013-03-20 17:21:27 +0000	[diff] [blame]	1323	DMINFO("version %u.%u.%u loaded",
				1324	mq_policy_type.version[0],
				1325	mq_policy_type.version[1],
				1326	mq_policy_type.version[2]);
Joe Thornber	f283635	2013-03-01 22:45:51 +0000	[diff] [blame]	1327	return 0;
				1328	}
				1329
				1330	DMERR("register failed (as default) %d", r);
				1331
				1332	dm_cache_policy_unregister(&mq_policy_type);
				1333	bad_register_mq:
				1334	kmem_cache_destroy(mq_entry_cache);
				1335	bad:
				1336	return -ENOMEM;
				1337	}
				1338
				1339	static void __exit mq_exit(void)
				1340	{
				1341	dm_cache_policy_unregister(&mq_policy_type);
				1342	dm_cache_policy_unregister(&default_policy_type);
				1343
				1344	kmem_cache_destroy(mq_entry_cache);
				1345	}
				1346
				1347	module_init(mq_init);
				1348	module_exit(mq_exit);
				1349
				1350	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				1351	MODULE_LICENSE("GPL");
				1352	MODULE_DESCRIPTION("mq cache policy");
				1353
				1354	MODULE_ALIAS("dm-cache-default");