Blame - drivers/md/dm-cache-policy-smq.c - kernel/msm-4.9

blob: 28d4586748d0033e1a2940bd8688b72ba34e84e5 [file] [log] [blame]

Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2015 Red Hat. All rights reserved.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-cache-policy.h"
				8	#include "dm-cache-policy-internal.h"
				9	#include "dm.h"
				10
				11	#include <linux/hash.h>
				12	#include <linux/jiffies.h>
				13	#include <linux/module.h>
				14	#include <linux/mutex.h>
				15	#include <linux/vmalloc.h>
				16	#include <linux/math64.h>
				17
				18	#define DM_MSG_PREFIX "cache-policy-smq"
				19
				20	/----------------------------------------------------------------/
				21
				22	/*
				23	* Safe division functions that return zero on divide by zero.
				24	*/
				25	static unsigned safe_div(unsigned n, unsigned d)
				26	{
				27	return d ? n / d : 0u;
				28	}
				29
				30	static unsigned safe_mod(unsigned n, unsigned d)
				31	{
				32	return d ? n % d : 0u;
				33	}
				34
				35	/----------------------------------------------------------------/
				36
				37	struct entry {
				38	unsigned hash_next:28;
				39	unsigned prev:28;
				40	unsigned next:28;
				41	unsigned level:7;
				42	bool dirty:1;
				43	bool allocated:1;
				44	bool sentinel:1;
				45
				46	dm_oblock_t oblock;
				47	};
				48
				49	/----------------------------------------------------------------/
				50
				51	#define INDEXER_NULL ((1u << 28u) - 1u)
				52
				53	/*
				54	* An entry_space manages a set of entries that we use for the queues.
				55	* The clean and dirty queues share entries, so this object is separate
				56	* from the queue itself.
				57	*/
				58	struct entry_space {
				59	struct entry *begin;
				60	struct entry *end;
				61	};
				62
				63	static int space_init(struct entry_space *es, unsigned nr_entries)
				64	{
				65	if (!nr_entries) {
				66	es->begin = es->end = NULL;
				67	return 0;
				68	}
				69
				70	es->begin = vzalloc(sizeof(struct entry) * nr_entries);
				71	if (!es->begin)
				72	return -ENOMEM;
				73
				74	es->end = es->begin + nr_entries;
				75	return 0;
				76	}
				77
				78	static void space_exit(struct entry_space *es)
				79	{
				80	vfree(es->begin);
				81	}
				82
				83	static struct entry __get_entry(struct entry_space es, unsigned block)
				84	{
				85	struct entry *e;
				86
				87	e = es->begin + block;
				88	BUG_ON(e >= es->end);
				89
				90	return e;
				91	}
				92
				93	static unsigned to_index(struct entry_space es, struct entry e)
				94	{
				95	BUG_ON(e < es->begin \|\| e >= es->end);
				96	return e - es->begin;
				97	}
				98
				99	static struct entry to_entry(struct entry_space es, unsigned block)
				100	{
				101	if (block == INDEXER_NULL)
				102	return NULL;
				103
				104	return __get_entry(es, block);
				105	}
				106
				107	/----------------------------------------------------------------/
				108
				109	struct ilist {
				110	unsigned nr_elts; /* excluding sentinel entries */
				111	unsigned head, tail;
				112	};
				113
				114	static void l_init(struct ilist *l)
				115	{
				116	l->nr_elts = 0;
				117	l->head = l->tail = INDEXER_NULL;
				118	}
				119
				120	static struct entry l_head(struct entry_space es, struct ilist *l)
				121	{
				122	return to_entry(es, l->head);
				123	}
				124
				125	static struct entry l_tail(struct entry_space es, struct ilist *l)
				126	{
				127	return to_entry(es, l->tail);
				128	}
				129
				130	static struct entry l_next(struct entry_space es, struct entry *e)
				131	{
				132	return to_entry(es, e->next);
				133	}
				134
				135	static struct entry l_prev(struct entry_space es, struct entry *e)
				136	{
				137	return to_entry(es, e->prev);
				138	}
				139
				140	static bool l_empty(struct ilist *l)
				141	{
				142	return l->head == INDEXER_NULL;
				143	}
				144
				145	static void l_add_head(struct entry_space es, struct ilist l, struct entry *e)
				146	{
				147	struct entry *head = l_head(es, l);
				148
				149	e->next = l->head;
				150	e->prev = INDEXER_NULL;
				151
				152	if (head)
				153	head->prev = l->head = to_index(es, e);
				154	else
				155	l->head = l->tail = to_index(es, e);
				156
				157	if (!e->sentinel)
				158	l->nr_elts++;
				159	}
				160
				161	static void l_add_tail(struct entry_space es, struct ilist l, struct entry *e)
				162	{
				163	struct entry *tail = l_tail(es, l);
				164
				165	e->next = INDEXER_NULL;
				166	e->prev = l->tail;
				167
				168	if (tail)
				169	tail->next = l->tail = to_index(es, e);
				170	else
				171	l->head = l->tail = to_index(es, e);
				172
				173	if (!e->sentinel)
				174	l->nr_elts++;
				175	}
				176
				177	static void l_add_before(struct entry_space es, struct ilist l,
				178	struct entry old, struct entry e)
				179	{
				180	struct entry *prev = l_prev(es, old);
				181
				182	if (!prev)
				183	l_add_head(es, l, e);
				184
				185	else {
				186	e->prev = old->prev;
				187	e->next = to_index(es, old);
				188	prev->next = old->prev = to_index(es, e);
				189
				190	if (!e->sentinel)
				191	l->nr_elts++;
				192	}
				193	}
				194
				195	static void l_del(struct entry_space es, struct ilist l, struct entry *e)
				196	{
				197	struct entry *prev = l_prev(es, e);
				198	struct entry *next = l_next(es, e);
				199
				200	if (prev)
				201	prev->next = e->next;
				202	else
				203	l->head = e->next;
				204
				205	if (next)
				206	next->prev = e->prev;
				207	else
				208	l->tail = e->prev;
				209
				210	if (!e->sentinel)
				211	l->nr_elts--;
				212	}
				213
				214	static struct entry l_pop_tail(struct entry_space es, struct ilist *l)
				215	{
				216	struct entry *e;
				217
				218	for (e = l_tail(es, l); e; e = l_prev(es, e))
				219	if (!e->sentinel) {
				220	l_del(es, l, e);
				221	return e;
				222	}
				223
				224	return NULL;
				225	}
				226
				227	/----------------------------------------------------------------/
				228
				229	/*
				230	* The stochastic-multi-queue is a set of lru lists stacked into levels.
				231	* Entries are moved up levels when they are used, which loosely orders the
				232	* most accessed entries in the top levels and least in the bottom. This
				233	* structure is much better than a single lru list.
				234	*/
				235	#define MAX_LEVELS 64u
				236
				237	struct queue {
				238	struct entry_space *es;
				239
				240	unsigned nr_elts;
				241	unsigned nr_levels;
				242	struct ilist qs[MAX_LEVELS];
				243
				244	/*
				245	* We maintain a count of the number of entries we would like in each
				246	* level.
				247	*/
				248	unsigned last_target_nr_elts;
				249	unsigned nr_top_levels;
				250	unsigned nr_in_top_levels;
				251	unsigned target_count[MAX_LEVELS];
				252	};
				253
				254	static void q_init(struct queue q, struct entry_space es, unsigned nr_levels)
				255	{
				256	unsigned i;
				257
				258	q->es = es;
				259	q->nr_elts = 0;
				260	q->nr_levels = nr_levels;
				261
				262	for (i = 0; i < q->nr_levels; i++) {
				263	l_init(q->qs + i);
				264	q->target_count[i] = 0u;
				265	}
				266
				267	q->last_target_nr_elts = 0u;
				268	q->nr_top_levels = 0u;
				269	q->nr_in_top_levels = 0u;
				270	}
				271
				272	static unsigned q_size(struct queue *q)
				273	{
				274	return q->nr_elts;
				275	}
				276
				277	/*
				278	* Insert an entry to the back of the given level.
				279	*/
				280	static void q_push(struct queue q, struct entry e)
				281	{
				282	if (!e->sentinel)
				283	q->nr_elts++;
				284
				285	l_add_tail(q->es, q->qs + e->level, e);
				286	}
				287
				288	static void q_push_before(struct queue q, struct entry old, struct entry *e)
				289	{
				290	if (!e->sentinel)
				291	q->nr_elts++;
				292
				293	l_add_before(q->es, q->qs + e->level, old, e);
				294	}
				295
				296	static void q_del(struct queue q, struct entry e)
				297	{
				298	l_del(q->es, q->qs + e->level, e);
				299	if (!e->sentinel)
				300	q->nr_elts--;
				301	}
				302
				303	/*
				304	* Return the oldest entry of the lowest populated level.
				305	*/
				306	static struct entry q_peek(struct queue q, unsigned max_level, bool can_cross_sentinel)
				307	{
				308	unsigned level;
				309	struct entry *e;
				310
				311	max_level = min(max_level, q->nr_levels);
				312
				313	for (level = 0; level < max_level; level++)
				314	for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
				315	if (e->sentinel) {
				316	if (can_cross_sentinel)
				317	continue;
				318	else
				319	break;
				320	}
				321
				322	return e;
				323	}
				324
				325	return NULL;
				326	}
				327
				328	static struct entry q_pop(struct queue q)
				329	{
				330	struct entry *e = q_peek(q, q->nr_levels, true);
				331
				332	if (e)
				333	q_del(q, e);
				334
				335	return e;
				336	}
				337
				338	/*
				339	* Pops an entry from a level that is not past a sentinel.
				340	*/
				341	static struct entry q_pop_old(struct queue q, unsigned max_level)
				342	{
				343	struct entry *e = q_peek(q, max_level, false);
				344
				345	if (e)
				346	q_del(q, e);
				347
				348	return e;
				349	}
				350
				351	/*
				352	* This function assumes there is a non-sentinel entry to pop. It's only
				353	* used by redistribute, so we know this is true. It also doesn't adjust
				354	* the q->nr_elts count.
				355	*/
				356	static struct entry __redist_pop_from(struct queue q, unsigned level)
				357	{
				358	struct entry *e;
				359
				360	for (; level < q->nr_levels; level++)
				361	for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e))
				362	if (!e->sentinel) {
				363	l_del(q->es, q->qs + e->level, e);
				364	return e;
				365	}
				366
				367	return NULL;
				368	}
				369
				370	static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend)
				371	{
				372	unsigned level, nr_levels, entries_per_level, remainder;
				373
				374	BUG_ON(lbegin > lend);
				375	BUG_ON(lend > q->nr_levels);
				376	nr_levels = lend - lbegin;
				377	entries_per_level = safe_div(nr_elts, nr_levels);
				378	remainder = safe_mod(nr_elts, nr_levels);
				379
				380	for (level = lbegin; level < lend; level++)
				381	q->target_count[level] =
				382	(level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level;
				383	}
				384
				385	/*
				386	* Typically we have fewer elements in the top few levels which allows us
				387	* to adjust the promote threshold nicely.
				388	*/
				389	static void q_set_targets(struct queue *q)
				390	{
				391	if (q->last_target_nr_elts == q->nr_elts)
				392	return;
				393
				394	q->last_target_nr_elts = q->nr_elts;
				395
				396	if (q->nr_top_levels > q->nr_levels)
				397	q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels);
				398
				399	else {
				400	q_set_targets_subrange_(q, q->nr_in_top_levels,
				401	q->nr_levels - q->nr_top_levels, q->nr_levels);
				402
				403	if (q->nr_in_top_levels < q->nr_elts)
				404	q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels,
				405	0, q->nr_levels - q->nr_top_levels);
				406	else
				407	q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels);
				408	}
				409	}
				410
				411	static void q_redistribute(struct queue *q)
				412	{
				413	unsigned target, level;
				414	struct ilist l, l_above;
				415	struct entry *e;
				416
				417	q_set_targets(q);
				418
				419	for (level = 0u; level < q->nr_levels - 1u; level++) {
				420	l = q->qs + level;
				421	target = q->target_count[level];
				422
				423	/*
				424	* Pull down some entries from the level above.
				425	*/
				426	while (l->nr_elts < target) {
				427	e = __redist_pop_from(q, level + 1u);
				428	if (!e) {
				429	/* bug in nr_elts */
				430	break;
				431	}
				432
				433	e->level = level;
				434	l_add_tail(q->es, l, e);
				435	}
				436
				437	/*
				438	* Push some entries up.
				439	*/
				440	l_above = q->qs + level + 1u;
				441	while (l->nr_elts > target) {
				442	e = l_pop_tail(q->es, l);
				443
				444	if (!e)
				445	/* bug in nr_elts */
				446	break;
				447
				448	e->level = level + 1u;
				449	l_add_head(q->es, l_above, e);
				450	}
				451	}
				452	}
				453
				454	static void q_requeue_before(struct queue q, struct entry dest, struct entry *e, unsigned extra_levels)
				455	{
				456	struct entry *de;
				457	unsigned new_level;
				458
				459	q_del(q, e);
				460
				461	if (extra_levels && (e->level < q->nr_levels - 1u)) {
				462	new_level = min(q->nr_levels - 1u, e->level + extra_levels);
				463	for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
				464	if (de->sentinel)
				465	continue;
				466
				467	q_del(q, de);
				468	de->level = e->level;
				469
				470	if (dest)
				471	q_push_before(q, dest, de);
				472	else
				473	q_push(q, de);
				474	break;
				475	}
				476
				477	e->level = new_level;
				478	}
				479
				480	q_push(q, e);
				481	}
				482
				483	static void q_requeue(struct queue q, struct entry e, unsigned extra_levels)
				484	{
				485	q_requeue_before(q, NULL, e, extra_levels);
				486	}
				487
				488	/----------------------------------------------------------------/
				489
				490	#define FP_SHIFT 8
				491	#define SIXTEENTH (1u << (FP_SHIFT - 4u))
				492	#define EIGHTH (1u << (FP_SHIFT - 3u))
				493
				494	struct stats {
				495	unsigned hit_threshold;
				496	unsigned hits;
				497	unsigned misses;
				498	};
				499
				500	enum performance {
				501	Q_POOR,
				502	Q_FAIR,
				503	Q_WELL
				504	};
				505
				506	static void stats_init(struct stats *s, unsigned nr_levels)
				507	{
				508	s->hit_threshold = (nr_levels * 3u) / 4u;
				509	s->hits = 0u;
				510	s->misses = 0u;
				511	}
				512
				513	static void stats_reset(struct stats *s)
				514	{
				515	s->hits = s->misses = 0u;
				516	}
				517
				518	static void stats_level_accessed(struct stats *s, unsigned level)
				519	{
				520	if (level >= s->hit_threshold)
				521	s->hits++;
				522	else
				523	s->misses++;
				524	}
				525
				526	static void stats_miss(struct stats *s)
				527	{
				528	s->misses++;
				529	}
				530
				531	/*
				532	* There are times when we don't have any confidence in the hotspot queue.
				533	* Such as when a fresh cache is created and the blocks have been spread
				534	* out across the levels, or if an io load changes. We detect this by
				535	* seeing how often a lookup is in the top levels of the hotspot queue.
				536	*/
				537	static enum performance stats_assess(struct stats *s)
				538	{
				539	unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses);
				540
				541	if (confidence < SIXTEENTH)
				542	return Q_POOR;
				543
				544	else if (confidence < EIGHTH)
				545	return Q_FAIR;
				546
				547	else
				548	return Q_WELL;
				549	}
				550
				551	/----------------------------------------------------------------/
				552
				553	struct hash_table {
				554	struct entry_space *es;
				555	unsigned long long hash_bits;
				556	unsigned *buckets;
				557	};
				558
				559	/*
				560	* All cache entries are stored in a chained hash table. To save space we
				561	* use indexing again, and only store indexes to the next entry.
				562	*/
				563	static int h_init(struct hash_table ht, struct entry_space es, unsigned nr_entries)
				564	{
				565	unsigned i, nr_buckets;
				566
				567	ht->es = es;
				568	nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
Mikulas Patocka	a3d939a	2015-10-02 11:21:24 -0400	[diff] [blame]	569	ht->hash_bits = __ffs(nr_buckets);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	570
				571	ht->buckets = vmalloc(sizeof(ht->buckets) nr_buckets);
				572	if (!ht->buckets)
				573	return -ENOMEM;
				574
				575	for (i = 0; i < nr_buckets; i++)
				576	ht->buckets[i] = INDEXER_NULL;
				577
				578	return 0;
				579	}
				580
				581	static void h_exit(struct hash_table *ht)
				582	{
				583	vfree(ht->buckets);
				584	}
				585
				586	static struct entry h_head(struct hash_table ht, unsigned bucket)
				587	{
				588	return to_entry(ht->es, ht->buckets[bucket]);
				589	}
				590
				591	static struct entry h_next(struct hash_table ht, struct entry *e)
				592	{
				593	return to_entry(ht->es, e->hash_next);
				594	}
				595
				596	static void __h_insert(struct hash_table ht, unsigned bucket, struct entry e)
				597	{
				598	e->hash_next = ht->buckets[bucket];
				599	ht->buckets[bucket] = to_index(ht->es, e);
				600	}
				601
				602	static void h_insert(struct hash_table ht, struct entry e)
				603	{
				604	unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
				605	__h_insert(ht, h, e);
				606	}
				607
				608	static struct entry __h_lookup(struct hash_table ht, unsigned h, dm_oblock_t oblock,
				609	struct entry **prev)
				610	{
				611	struct entry *e;
				612
				613	*prev = NULL;
				614	for (e = h_head(ht, h); e; e = h_next(ht, e)) {
				615	if (e->oblock == oblock)
				616	return e;
				617
				618	*prev = e;
				619	}
				620
				621	return NULL;
				622	}
				623
				624	static void __h_unlink(struct hash_table *ht, unsigned h,
				625	struct entry e, struct entry prev)
				626	{
				627	if (prev)
				628	prev->hash_next = e->hash_next;
				629	else
				630	ht->buckets[h] = e->hash_next;
				631	}
				632
				633	/*
				634	* Also moves each entry to the front of the bucket.
				635	*/
				636	static struct entry h_lookup(struct hash_table ht, dm_oblock_t oblock)
				637	{
				638	struct entry e, prev;
				639	unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
				640
				641	e = __h_lookup(ht, h, oblock, &prev);
				642	if (e && prev) {
				643	/*
				644	* Move to the front because this entry is likely
				645	* to be hit again.
				646	*/
				647	__h_unlink(ht, h, e, prev);
				648	__h_insert(ht, h, e);
				649	}
				650
				651	return e;
				652	}
				653
				654	static void h_remove(struct hash_table ht, struct entry e)
				655	{
				656	unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
				657	struct entry *prev;
				658
				659	/*
				660	* The down side of using a singly linked list is we have to
				661	* iterate the bucket to remove an item.
				662	*/
				663	e = __h_lookup(ht, h, e->oblock, &prev);
				664	if (e)
				665	__h_unlink(ht, h, e, prev);
				666	}
				667
				668	/----------------------------------------------------------------/
				669
				670	struct entry_alloc {
				671	struct entry_space *es;
				672	unsigned begin;
				673
				674	unsigned nr_allocated;
				675	struct ilist free;
				676	};
				677
				678	static void init_allocator(struct entry_alloc ea, struct entry_space es,
				679	unsigned begin, unsigned end)
				680	{
				681	unsigned i;
				682
				683	ea->es = es;
				684	ea->nr_allocated = 0u;
				685	ea->begin = begin;
				686
				687	l_init(&ea->free);
				688	for (i = begin; i != end; i++)
				689	l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i));
				690	}
				691
				692	static void init_entry(struct entry *e)
				693	{
				694	/*
				695	* We can't memset because that would clear the hotspot and
				696	* sentinel bits which remain constant.
				697	*/
				698	e->hash_next = INDEXER_NULL;
				699	e->next = INDEXER_NULL;
				700	e->prev = INDEXER_NULL;
				701	e->level = 0u;
				702	e->allocated = true;
				703	}
				704
				705	static struct entry alloc_entry(struct entry_alloc ea)
				706	{
				707	struct entry *e;
				708
				709	if (l_empty(&ea->free))
				710	return NULL;
				711
				712	e = l_pop_tail(ea->es, &ea->free);
				713	init_entry(e);
				714	ea->nr_allocated++;
				715
				716	return e;
				717	}
				718
				719	/*
				720	* This assumes the cblock hasn't already been allocated.
				721	*/
				722	static struct entry alloc_particular_entry(struct entry_alloc ea, unsigned i)
				723	{
				724	struct entry *e = __get_entry(ea->es, ea->begin + i);
				725
				726	BUG_ON(e->allocated);
				727
				728	l_del(ea->es, &ea->free, e);
				729	init_entry(e);
				730	ea->nr_allocated++;
				731
				732	return e;
				733	}
				734
				735	static void free_entry(struct entry_alloc ea, struct entry e)
				736	{
				737	BUG_ON(!ea->nr_allocated);
				738	BUG_ON(!e->allocated);
				739
				740	ea->nr_allocated--;
				741	e->allocated = false;
				742	l_add_tail(ea->es, &ea->free, e);
				743	}
				744
				745	static bool allocator_empty(struct entry_alloc *ea)
				746	{
				747	return l_empty(&ea->free);
				748	}
				749
				750	static unsigned get_index(struct entry_alloc ea, struct entry e)
				751	{
				752	return to_index(ea->es, e) - ea->begin;
				753	}
				754
				755	static struct entry get_entry(struct entry_alloc ea, unsigned index)
				756	{
				757	return __get_entry(ea->es, ea->begin + index);
				758	}
				759
				760	/----------------------------------------------------------------/
				761
				762	#define NR_HOTSPOT_LEVELS 64u
				763	#define NR_CACHE_LEVELS 64u
				764
				765	#define WRITEBACK_PERIOD (10 * HZ)
				766	#define DEMOTE_PERIOD (60 * HZ)
				767
				768	#define HOTSPOT_UPDATE_PERIOD (HZ)
				769	#define CACHE_UPDATE_PERIOD (10u * HZ)
				770
				771	struct smq_policy {
				772	struct dm_cache_policy policy;
				773
				774	/* protects everything */
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	775	spinlock_t lock;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	776	dm_cblock_t cache_size;
				777	sector_t cache_block_size;
				778
				779	sector_t hotspot_block_size;
				780	unsigned nr_hotspot_blocks;
				781	unsigned cache_blocks_per_hotspot_block;
				782	unsigned hotspot_level_jump;
				783
				784	struct entry_space es;
				785	struct entry_alloc writeback_sentinel_alloc;
				786	struct entry_alloc demote_sentinel_alloc;
				787	struct entry_alloc hotspot_alloc;
				788	struct entry_alloc cache_alloc;
				789
				790	unsigned long *hotspot_hit_bits;
				791	unsigned long *cache_hit_bits;
				792
				793	/*
				794	* We maintain three queues of entries. The cache proper,
				795	* consisting of a clean and dirty queue, containing the currently
				796	* active mappings. The hotspot queue uses a larger block size to
				797	* track blocks that are being hit frequently and potential
				798	* candidates for promotion to the cache.
				799	*/
				800	struct queue hotspot;
				801	struct queue clean;
				802	struct queue dirty;
				803
				804	struct stats hotspot_stats;
				805	struct stats cache_stats;
				806
				807	/*
				808	* Keeps track of time, incremented by the core. We use this to
				809	* avoid attributing multiple hits within the same tick.
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	810	*/
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	811	unsigned tick;
				812
				813	/*
				814	* The hash tables allows us to quickly find an entry by origin
				815	* block.
				816	*/
				817	struct hash_table table;
				818	struct hash_table hotspot_table;
				819
				820	bool current_writeback_sentinels;
				821	unsigned long next_writeback_period;
				822
				823	bool current_demote_sentinels;
				824	unsigned long next_demote_period;
				825
				826	unsigned write_promote_level;
				827	unsigned read_promote_level;
				828
				829	unsigned long next_hotspot_period;
				830	unsigned long next_cache_period;
				831	};
				832
				833	/----------------------------------------------------------------/
				834
				835	static struct entry get_sentinel(struct entry_alloc ea, unsigned level, bool which)
				836	{
				837	return get_entry(ea, which ? level : NR_CACHE_LEVELS + level);
				838	}
				839
				840	static struct entry writeback_sentinel(struct smq_policy mq, unsigned level)
				841	{
				842	return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels);
				843	}
				844
				845	static struct entry demote_sentinel(struct smq_policy mq, unsigned level)
				846	{
				847	return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels);
				848	}
				849
				850	static void __update_writeback_sentinels(struct smq_policy *mq)
				851	{
				852	unsigned level;
				853	struct queue *q = &mq->dirty;
				854	struct entry *sentinel;
				855
				856	for (level = 0; level < q->nr_levels; level++) {
				857	sentinel = writeback_sentinel(mq, level);
				858	q_del(q, sentinel);
				859	q_push(q, sentinel);
				860	}
				861	}
				862
				863	static void __update_demote_sentinels(struct smq_policy *mq)
				864	{
				865	unsigned level;
				866	struct queue *q = &mq->clean;
				867	struct entry *sentinel;
				868
				869	for (level = 0; level < q->nr_levels; level++) {
				870	sentinel = demote_sentinel(mq, level);
				871	q_del(q, sentinel);
				872	q_push(q, sentinel);
				873	}
				874	}
				875
				876	static void update_sentinels(struct smq_policy *mq)
				877	{
				878	if (time_after(jiffies, mq->next_writeback_period)) {
				879	__update_writeback_sentinels(mq);
				880	mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
				881	mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
				882	}
				883
				884	if (time_after(jiffies, mq->next_demote_period)) {
				885	__update_demote_sentinels(mq);
				886	mq->next_demote_period = jiffies + DEMOTE_PERIOD;
				887	mq->current_demote_sentinels = !mq->current_demote_sentinels;
				888	}
				889	}
				890
				891	static void __sentinels_init(struct smq_policy *mq)
				892	{
				893	unsigned level;
				894	struct entry *sentinel;
				895
				896	for (level = 0; level < NR_CACHE_LEVELS; level++) {
				897	sentinel = writeback_sentinel(mq, level);
				898	sentinel->level = level;
				899	q_push(&mq->dirty, sentinel);
				900
				901	sentinel = demote_sentinel(mq, level);
				902	sentinel->level = level;
				903	q_push(&mq->clean, sentinel);
				904	}
				905	}
				906
				907	static void sentinels_init(struct smq_policy *mq)
				908	{
				909	mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
				910	mq->next_demote_period = jiffies + DEMOTE_PERIOD;
				911
				912	mq->current_writeback_sentinels = false;
				913	mq->current_demote_sentinels = false;
				914	__sentinels_init(mq);
				915
				916	mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
				917	mq->current_demote_sentinels = !mq->current_demote_sentinels;
				918	__sentinels_init(mq);
				919	}
				920
				921	/----------------------------------------------------------------/
				922
				923	/*
				924	* These methods tie together the dirty queue, clean queue and hash table.
				925	*/
				926	static void push_new(struct smq_policy mq, struct entry e)
				927	{
				928	struct queue *q = e->dirty ? &mq->dirty : &mq->clean;
				929	h_insert(&mq->table, e);
				930	q_push(q, e);
				931	}
				932
				933	static void push(struct smq_policy mq, struct entry e)
				934	{
				935	struct entry *sentinel;
				936
				937	h_insert(&mq->table, e);
				938
				939	/*
				940	* Punch this into the queue just in front of the sentinel, to
				941	* ensure it's cleaned straight away.
				942	*/
				943	if (e->dirty) {
				944	sentinel = writeback_sentinel(mq, e->level);
				945	q_push_before(&mq->dirty, sentinel, e);
				946	} else {
				947	sentinel = demote_sentinel(mq, e->level);
				948	q_push_before(&mq->clean, sentinel, e);
				949	}
				950	}
				951
				952	/*
				953	* Removes an entry from cache. Removes from the hash table.
				954	*/
				955	static void __del(struct smq_policy mq, struct queue q, struct entry *e)
				956	{
				957	q_del(q, e);
				958	h_remove(&mq->table, e);
				959	}
				960
				961	static void del(struct smq_policy mq, struct entry e)
				962	{
				963	__del(mq, e->dirty ? &mq->dirty : &mq->clean, e);
				964	}
				965
				966	static struct entry pop_old(struct smq_policy mq, struct queue *q, unsigned max_level)
				967	{
				968	struct entry *e = q_pop_old(q, max_level);
				969	if (e)
				970	h_remove(&mq->table, e);
				971	return e;
				972	}
				973
				974	static dm_cblock_t infer_cblock(struct smq_policy mq, struct entry e)
				975	{
				976	return to_cblock(get_index(&mq->cache_alloc, e));
				977	}
				978
				979	static void requeue(struct smq_policy mq, struct entry e)
				980	{
				981	struct entry *sentinel;
				982
				983	if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
				984	if (e->dirty) {
				985	sentinel = writeback_sentinel(mq, e->level);
				986	q_requeue_before(&mq->dirty, sentinel, e, 1u);
				987	} else {
				988	sentinel = demote_sentinel(mq, e->level);
				989	q_requeue_before(&mq->clean, sentinel, e, 1u);
				990	}
				991	}
				992	}
				993
				994	static unsigned default_promote_level(struct smq_policy *mq)
				995	{
				996	/*
				997	* The promote level depends on the current performance of the
				998	* cache.
				999	*
				1000	* If the cache is performing badly, then we can't afford
				1001	* to promote much without causing performance to drop below that
				1002	* of the origin device.
				1003	*
				1004	* If the cache is performing well, then we don't need to promote
				1005	* much. If it isn't broken, don't fix it.
				1006	*
				1007	* If the cache is middling then we promote more.
				1008	*
				1009	* This scheme reminds me of a graph of entropy vs probability of a
				1010	* binary variable.
				1011	*/
				1012	static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
				1013
				1014	unsigned hits = mq->cache_stats.hits;
				1015	unsigned misses = mq->cache_stats.misses;
				1016	unsigned index = safe_div(hits << 4u, hits + misses);
				1017	return table[index];
				1018	}
				1019
				1020	static void update_promote_levels(struct smq_policy *mq)
				1021	{
				1022	/*
				1023	* If there are unused cache entries then we want to be really
				1024	* eager to promote.
				1025	*/
				1026	unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
				1027	default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
				1028
				1029	/*
				1030	* If the hotspot queue is performing badly then we have little
				1031	* confidence that we know which blocks to promote. So we cut down
				1032	* the amount of promotions.
				1033	*/
				1034	switch (stats_assess(&mq->hotspot_stats)) {
				1035	case Q_POOR:
				1036	threshold_level /= 4u;
				1037	break;
				1038
				1039	case Q_FAIR:
				1040	threshold_level /= 2u;
				1041	break;
				1042
				1043	case Q_WELL:
				1044	break;
				1045	}
				1046
				1047	mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
				1048	mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u;
				1049	}
				1050
				1051	/*
				1052	* If the hotspot queue is performing badly, then we try and move entries
				1053	* around more quickly.
				1054	*/
				1055	static void update_level_jump(struct smq_policy *mq)
				1056	{
				1057	switch (stats_assess(&mq->hotspot_stats)) {
				1058	case Q_POOR:
				1059	mq->hotspot_level_jump = 4u;
				1060	break;
				1061
				1062	case Q_FAIR:
				1063	mq->hotspot_level_jump = 2u;
				1064	break;
				1065
				1066	case Q_WELL:
				1067	mq->hotspot_level_jump = 1u;
				1068	break;
				1069	}
				1070	}
				1071
				1072	static void end_hotspot_period(struct smq_policy *mq)
				1073	{
				1074	clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
				1075	update_promote_levels(mq);
				1076
				1077	if (time_after(jiffies, mq->next_hotspot_period)) {
				1078	update_level_jump(mq);
				1079	q_redistribute(&mq->hotspot);
				1080	stats_reset(&mq->hotspot_stats);
				1081	mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD;
				1082	}
				1083	}
				1084
				1085	static void end_cache_period(struct smq_policy *mq)
				1086	{
				1087	if (time_after(jiffies, mq->next_cache_period)) {
				1088	clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
				1089
				1090	q_redistribute(&mq->dirty);
				1091	q_redistribute(&mq->clean);
				1092	stats_reset(&mq->cache_stats);
				1093
				1094	mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD;
				1095	}
				1096	}
				1097
				1098	static int demote_cblock(struct smq_policy *mq,
				1099	struct policy_locker *locker,
				1100	dm_oblock_t *oblock)
				1101	{
				1102	struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false);
				1103	if (!demoted)
				1104	/*
				1105	* We could get a block from mq->dirty, but that
				1106	* would add extra latency to the triggering bio as it
				1107	* waits for the writeback. Better to not promote this
				1108	* time and hope there's a clean block next time this block
				1109	* is hit.
				1110	*/
				1111	return -ENOSPC;
				1112
				1113	if (locker->fn(locker, demoted->oblock))
				1114	/*
				1115	* We couldn't lock this block.
				1116	*/
				1117	return -EBUSY;
				1118
				1119	del(mq, demoted);
				1120	*oblock = demoted->oblock;
				1121	free_entry(&mq->cache_alloc, demoted);
				1122
				1123	return 0;
				1124	}
				1125
				1126	enum promote_result {
				1127	PROMOTE_NOT,
				1128	PROMOTE_TEMPORARY,
				1129	PROMOTE_PERMANENT
				1130	};
				1131
				1132	/*
				1133	* Converts a boolean into a promote result.
				1134	*/
				1135	static enum promote_result maybe_promote(bool promote)
				1136	{
				1137	return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
				1138	}
				1139
				1140	static enum promote_result should_promote(struct smq_policy mq, struct entry hs_e, struct bio *bio,
				1141	bool fast_promote)
				1142	{
				1143	if (bio_data_dir(bio) == WRITE) {
				1144	if (!allocator_empty(&mq->cache_alloc) && fast_promote)
				1145	return PROMOTE_TEMPORARY;
				1146
				1147	else
				1148	return maybe_promote(hs_e->level >= mq->write_promote_level);
				1149	} else
				1150	return maybe_promote(hs_e->level >= mq->read_promote_level);
				1151	}
				1152
				1153	static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
				1154	struct policy_locker *locker,
				1155	struct policy_result *result, enum promote_result pr)
				1156	{
				1157	int r;
				1158	struct entry *e;
				1159
				1160	if (allocator_empty(&mq->cache_alloc)) {
				1161	result->op = POLICY_REPLACE;
				1162	r = demote_cblock(mq, locker, &result->old_oblock);
				1163	if (r) {
				1164	result->op = POLICY_MISS;
				1165	return;
				1166	}
				1167
				1168	} else
				1169	result->op = POLICY_NEW;
				1170
				1171	e = alloc_entry(&mq->cache_alloc);
				1172	BUG_ON(!e);
				1173	e->oblock = oblock;
				1174
				1175	if (pr == PROMOTE_TEMPORARY)
				1176	push(mq, e);
				1177	else
				1178	push_new(mq, e);
				1179
				1180	result->cblock = infer_cblock(mq, e);
				1181	}
				1182
				1183	static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
				1184	{
				1185	sector_t r = from_oblock(b);
				1186	(void) sector_div(r, mq->cache_blocks_per_hotspot_block);
				1187	return to_oblock(r);
				1188	}
				1189
				1190	static struct entry update_hotspot_queue(struct smq_policy mq, dm_oblock_t b, struct bio *bio)
				1191	{
				1192	unsigned hi;
				1193	dm_oblock_t hb = to_hblock(mq, b);
				1194	struct entry *e = h_lookup(&mq->hotspot_table, hb);
				1195
				1196	if (e) {
				1197	stats_level_accessed(&mq->hotspot_stats, e->level);
				1198
				1199	hi = get_index(&mq->hotspot_alloc, e);
				1200	q_requeue(&mq->hotspot, e,
				1201	test_and_set_bit(hi, mq->hotspot_hit_bits) ?
				1202	0u : mq->hotspot_level_jump);
				1203
				1204	} else {
				1205	stats_miss(&mq->hotspot_stats);
				1206
				1207	e = alloc_entry(&mq->hotspot_alloc);
				1208	if (!e) {
				1209	e = q_pop(&mq->hotspot);
				1210	if (e) {
				1211	h_remove(&mq->hotspot_table, e);
				1212	hi = get_index(&mq->hotspot_alloc, e);
				1213	clear_bit(hi, mq->hotspot_hit_bits);
				1214	}
				1215
				1216	}
				1217
				1218	if (e) {
				1219	e->oblock = hb;
				1220	q_push(&mq->hotspot, e);
				1221	h_insert(&mq->hotspot_table, e);
				1222	}
				1223	}
				1224
				1225	return e;
				1226	}
				1227
				1228	/*
				1229	* Looks the oblock up in the hash table, then decides whether to put in
				1230	* pre_cache, or cache etc.
				1231	*/
				1232	static int map(struct smq_policy mq, struct bio bio, dm_oblock_t oblock,
				1233	bool can_migrate, bool fast_promote,
				1234	struct policy_locker locker, struct policy_result result)
				1235	{
				1236	struct entry e, hs_e;
				1237	enum promote_result pr;
				1238
				1239	hs_e = update_hotspot_queue(mq, oblock, bio);
				1240
				1241	e = h_lookup(&mq->table, oblock);
				1242	if (e) {
				1243	stats_level_accessed(&mq->cache_stats, e->level);
				1244
				1245	requeue(mq, e);
				1246	result->op = POLICY_HIT;
				1247	result->cblock = infer_cblock(mq, e);
				1248
				1249	} else {
				1250	stats_miss(&mq->cache_stats);
				1251
				1252	pr = should_promote(mq, hs_e, bio, fast_promote);
				1253	if (pr == PROMOTE_NOT)
				1254	result->op = POLICY_MISS;
				1255
				1256	else {
				1257	if (!can_migrate) {
				1258	result->op = POLICY_MISS;
				1259	return -EWOULDBLOCK;
				1260	}
				1261
				1262	insert_in_cache(mq, oblock, locker, result, pr);
				1263	}
				1264	}
				1265
				1266	return 0;
				1267	}
				1268
				1269	/----------------------------------------------------------------/
				1270
				1271	/*
				1272	* Public interface, via the policy struct. See dm-cache-policy.h for a
				1273	* description of these.
				1274	*/
				1275
				1276	static struct smq_policy to_smq_policy(struct dm_cache_policy p)
				1277	{
				1278	return container_of(p, struct smq_policy, policy);
				1279	}
				1280
				1281	static void smq_destroy(struct dm_cache_policy *p)
				1282	{
				1283	struct smq_policy *mq = to_smq_policy(p);
				1284
				1285	h_exit(&mq->hotspot_table);
				1286	h_exit(&mq->table);
				1287	free_bitset(mq->hotspot_hit_bits);
				1288	free_bitset(mq->cache_hit_bits);
				1289	space_exit(&mq->es);
				1290	kfree(mq);
				1291	}
				1292
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1293	static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
				1294	bool can_block, bool can_migrate, bool fast_promote,
				1295	struct bio bio, struct policy_locker locker,
				1296	struct policy_result *result)
				1297	{
				1298	int r;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1299	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1300	struct smq_policy *mq = to_smq_policy(p);
				1301
				1302	result->op = POLICY_MISS;
				1303
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1304	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1305	r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1306	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1307
				1308	return r;
				1309	}
				1310
				1311	static int smq_lookup(struct dm_cache_policy p, dm_oblock_t oblock, dm_cblock_t cblock)
				1312	{
				1313	int r;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1314	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1315	struct smq_policy *mq = to_smq_policy(p);
				1316	struct entry *e;
				1317
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1318	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1319	e = h_lookup(&mq->table, oblock);
				1320	if (e) {
				1321	*cblock = infer_cblock(mq, e);
				1322	r = 0;
				1323	} else
				1324	r = -ENOENT;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1325	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1326
				1327	return r;
				1328	}
				1329
				1330	static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set)
				1331	{
				1332	struct entry *e;
				1333
				1334	e = h_lookup(&mq->table, oblock);
				1335	BUG_ON(!e);
				1336
				1337	del(mq, e);
				1338	e->dirty = set;
				1339	push(mq, e);
				1340	}
				1341
				1342	static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				1343	{
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1344	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1345	struct smq_policy *mq = to_smq_policy(p);
				1346
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1347	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1348	__smq_set_clear_dirty(mq, oblock, true);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1349	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1350	}
				1351
				1352	static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
				1353	{
				1354	struct smq_policy *mq = to_smq_policy(p);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1355	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1356
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1357	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1358	__smq_set_clear_dirty(mq, oblock, false);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1359	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1360	}
				1361
				1362	static int smq_load_mapping(struct dm_cache_policy *p,
				1363	dm_oblock_t oblock, dm_cblock_t cblock,
				1364	uint32_t hint, bool hint_valid)
				1365	{
				1366	struct smq_policy *mq = to_smq_policy(p);
				1367	struct entry *e;
				1368
				1369	e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
				1370	e->oblock = oblock;
				1371	e->dirty = false; /* this gets corrected in a minute */
				1372	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1;
				1373	push(mq, e);
				1374
				1375	return 0;
				1376	}
				1377
				1378	static int smq_save_hints(struct smq_policy mq, struct queue q,
				1379	policy_walk_fn fn, void *context)
				1380	{
				1381	int r;
				1382	unsigned level;
				1383	struct entry *e;
				1384
				1385	for (level = 0; level < q->nr_levels; level++)
				1386	for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
				1387	if (!e->sentinel) {
				1388	r = fn(context, infer_cblock(mq, e),
				1389	e->oblock, e->level);
				1390	if (r)
				1391	return r;
				1392	}
				1393	}
				1394
				1395	return 0;
				1396	}
				1397
				1398	static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
				1399	void *context)
				1400	{
				1401	struct smq_policy *mq = to_smq_policy(p);
				1402	int r = 0;
				1403
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1404	/*
				1405	* We don't need to lock here since this method is only called once
				1406	* the IO has stopped.
				1407	*/
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1408	r = smq_save_hints(mq, &mq->clean, fn, context);
				1409	if (!r)
				1410	r = smq_save_hints(mq, &mq->dirty, fn, context);
				1411
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1412	return r;
				1413	}
				1414
				1415	static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
				1416	{
				1417	struct entry *e;
				1418
				1419	e = h_lookup(&mq->table, oblock);
				1420	BUG_ON(!e);
				1421
				1422	del(mq, e);
				1423	free_entry(&mq->cache_alloc, e);
				1424	}
				1425
				1426	static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
				1427	{
				1428	struct smq_policy *mq = to_smq_policy(p);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1429	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1430
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1431	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1432	__remove_mapping(mq, oblock);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1433	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1434	}
				1435
				1436	static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
				1437	{
				1438	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
				1439
				1440	if (!e \|\| !e->allocated)
				1441	return -ENODATA;
				1442
				1443	del(mq, e);
				1444	free_entry(&mq->cache_alloc, e);
				1445
				1446	return 0;
				1447	}
				1448
				1449	static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
				1450	{
				1451	int r;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1452	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1453	struct smq_policy *mq = to_smq_policy(p);
				1454
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1455	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1456	r = __remove_cblock(mq, cblock);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1457	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1458
				1459	return r;
				1460	}
				1461
				1462
				1463	#define CLEAN_TARGET_CRITICAL 5u /* percent */
				1464
				1465	static bool clean_target_met(struct smq_policy *mq, bool critical)
				1466	{
				1467	if (critical) {
				1468	/*
				1469	* Cache entries may not be populated. So we're cannot rely on the
				1470	* size of the clean queue.
				1471	*/
				1472	unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
				1473	unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
				1474
				1475	return nr_clean >= target;
				1476	} else
				1477	return !q_size(&mq->dirty);
				1478	}
				1479
				1480	static int __smq_writeback_work(struct smq_policy mq, dm_oblock_t oblock,
				1481	dm_cblock_t *cblock, bool critical_only)
				1482	{
				1483	struct entry *e = NULL;
				1484	bool target_met = clean_target_met(mq, critical_only);
				1485
				1486	if (critical_only)
				1487	/*
				1488	* Always try and keep the bottom level clean.
				1489	*/
				1490	e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
				1491
				1492	else
				1493	e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels);
				1494
				1495	if (!e)
				1496	return -ENODATA;
				1497
				1498	*oblock = e->oblock;
				1499	*cblock = infer_cblock(mq, e);
				1500	e->dirty = false;
				1501	push_new(mq, e);
				1502
				1503	return 0;
				1504	}
				1505
				1506	static int smq_writeback_work(struct dm_cache_policy p, dm_oblock_t oblock,
				1507	dm_cblock_t *cblock, bool critical_only)
				1508	{
				1509	int r;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1510	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1511	struct smq_policy *mq = to_smq_policy(p);
				1512
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1513	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1514	r = __smq_writeback_work(mq, oblock, cblock, critical_only);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1515	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1516
				1517	return r;
				1518	}
				1519
				1520	static void __force_mapping(struct smq_policy *mq,
				1521	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1522	{
				1523	struct entry *e = h_lookup(&mq->table, current_oblock);
				1524
				1525	if (e) {
				1526	del(mq, e);
				1527	e->oblock = new_oblock;
				1528	e->dirty = true;
				1529	push(mq, e);
				1530	}
				1531	}
				1532
				1533	static void smq_force_mapping(struct dm_cache_policy *p,
				1534	dm_oblock_t current_oblock, dm_oblock_t new_oblock)
				1535	{
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1536	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1537	struct smq_policy *mq = to_smq_policy(p);
				1538
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1539	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1540	__force_mapping(mq, current_oblock, new_oblock);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1541	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1542	}
				1543
				1544	static dm_cblock_t smq_residency(struct dm_cache_policy *p)
				1545	{
				1546	dm_cblock_t r;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1547	unsigned long flags;
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1548	struct smq_policy *mq = to_smq_policy(p);
				1549
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1550	spin_lock_irqsave(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1551	r = to_cblock(mq->cache_alloc.nr_allocated);
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1552	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1553
				1554	return r;
				1555	}
				1556
Joe Thornber	fba1010	2015-05-29 10:20:56 +0100	[diff] [blame]	1557	static void smq_tick(struct dm_cache_policy *p, bool can_block)
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1558	{
				1559	struct smq_policy *mq = to_smq_policy(p);
				1560	unsigned long flags;
				1561
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1562	spin_lock_irqsave(&mq->lock, flags);
				1563	mq->tick++;
				1564	update_sentinels(mq);
				1565	end_hotspot_period(mq);
				1566	end_cache_period(mq);
				1567	spin_unlock_irqrestore(&mq->lock, flags);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1568	}
				1569
				1570	/* Init the policy plugin interface function pointers. */
				1571	static void init_policy_functions(struct smq_policy *mq)
				1572	{
				1573	mq->policy.destroy = smq_destroy;
				1574	mq->policy.map = smq_map;
				1575	mq->policy.lookup = smq_lookup;
				1576	mq->policy.set_dirty = smq_set_dirty;
				1577	mq->policy.clear_dirty = smq_clear_dirty;
				1578	mq->policy.load_mapping = smq_load_mapping;
				1579	mq->policy.walk_mappings = smq_walk_mappings;
				1580	mq->policy.remove_mapping = smq_remove_mapping;
				1581	mq->policy.remove_cblock = smq_remove_cblock;
				1582	mq->policy.writeback_work = smq_writeback_work;
				1583	mq->policy.force_mapping = smq_force_mapping;
				1584	mq->policy.residency = smq_residency;
				1585	mq->policy.tick = smq_tick;
				1586	}
				1587
				1588	static bool too_many_hotspot_blocks(sector_t origin_size,
				1589	sector_t hotspot_block_size,
				1590	unsigned nr_hotspot_blocks)
				1591	{
				1592	return (hotspot_block_size * nr_hotspot_blocks) > origin_size;
				1593	}
				1594
				1595	static void calc_hotspot_params(sector_t origin_size,
				1596	sector_t cache_block_size,
				1597	unsigned nr_cache_blocks,
				1598	sector_t *hotspot_block_size,
				1599	unsigned *nr_hotspot_blocks)
				1600	{
				1601	hotspot_block_size = cache_block_size 16u;
				1602	*nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u);
				1603
				1604	while ((*hotspot_block_size > cache_block_size) &&
				1605	too_many_hotspot_blocks(origin_size, hotspot_block_size, nr_hotspot_blocks))
				1606	*hotspot_block_size /= 2u;
				1607	}
				1608
				1609	static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
				1610	sector_t origin_size,
				1611	sector_t cache_block_size)
				1612	{
				1613	unsigned i;
				1614	unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
				1615	unsigned total_sentinels = 2u * nr_sentinels_per_queue;
				1616	struct smq_policy mq = kzalloc(sizeof(mq), GFP_KERNEL);
				1617
				1618	if (!mq)
				1619	return NULL;
				1620
				1621	init_policy_functions(mq);
				1622	mq->cache_size = cache_size;
				1623	mq->cache_block_size = cache_block_size;
				1624
				1625	calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size),
				1626	&mq->hotspot_block_size, &mq->nr_hotspot_blocks);
				1627
				1628	mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size);
				1629	mq->hotspot_level_jump = 1u;
				1630	if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) {
				1631	DMERR("couldn't initialize entry space");
				1632	goto bad_pool_init;
				1633	}
				1634
				1635	init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
				1636	for (i = 0; i < nr_sentinels_per_queue; i++)
				1637	get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
				1638
				1639	init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
				1640	for (i = 0; i < nr_sentinels_per_queue; i++)
				1641	get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
				1642
				1643	init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
				1644	total_sentinels + mq->nr_hotspot_blocks);
				1645
				1646	init_allocator(&mq->cache_alloc, &mq->es,
				1647	total_sentinels + mq->nr_hotspot_blocks,
				1648	total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size));
				1649
				1650	mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks);
				1651	if (!mq->hotspot_hit_bits) {
				1652	DMERR("couldn't allocate hotspot hit bitset");
				1653	goto bad_hotspot_hit_bits;
				1654	}
				1655	clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
				1656
				1657	if (from_cblock(cache_size)) {
				1658	mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
Colin Ian King	134bf30	2015-07-23 16:47:59 +0100	[diff] [blame]	1659	if (!mq->cache_hit_bits) {
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1660	DMERR("couldn't allocate cache hit bitset");
				1661	goto bad_cache_hit_bits;
				1662	}
				1663	clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
				1664	} else
				1665	mq->cache_hit_bits = NULL;
				1666
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1667	mq->tick = 0;
Joe Thornber	4051aab	2015-06-26 13:25:12 +0100	[diff] [blame]	1668	spin_lock_init(&mq->lock);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1669
				1670	q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
				1671	mq->hotspot.nr_top_levels = 8;
				1672	mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS,
				1673	from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block);
				1674
				1675	q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS);
				1676	q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS);
				1677
				1678	stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS);
				1679	stats_init(&mq->cache_stats, NR_CACHE_LEVELS);
				1680
				1681	if (h_init(&mq->table, &mq->es, from_cblock(cache_size)))
				1682	goto bad_alloc_table;
				1683
				1684	if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks))
				1685	goto bad_alloc_hotspot_table;
				1686
				1687	sentinels_init(mq);
				1688	mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS;
				1689
				1690	mq->next_hotspot_period = jiffies;
				1691	mq->next_cache_period = jiffies;
				1692
				1693	return &mq->policy;
				1694
				1695	bad_alloc_hotspot_table:
				1696	h_exit(&mq->table);
				1697	bad_alloc_table:
				1698	free_bitset(mq->cache_hit_bits);
				1699	bad_cache_hit_bits:
				1700	free_bitset(mq->hotspot_hit_bits);
				1701	bad_hotspot_hit_bits:
				1702	space_exit(&mq->es);
				1703	bad_pool_init:
				1704	kfree(mq);
				1705
				1706	return NULL;
				1707	}
				1708
				1709	/----------------------------------------------------------------/
				1710
				1711	static struct dm_cache_policy_type smq_policy_type = {
				1712	.name = "smq",
				1713	.version = {1, 0, 0},
				1714	.hint_size = 4,
				1715	.owner = THIS_MODULE,
				1716	.create = smq_create
				1717	};
				1718
Mike Snitzer	bccab6a	2015-06-17 11:43:38 -0400	[diff] [blame]	1719	static struct dm_cache_policy_type default_policy_type = {
				1720	.name = "default",
Mike Snitzer	b5451e4	2015-06-26 10:07:04 -0400	[diff] [blame]	1721	.version = {1, 4, 0},
Mike Snitzer	bccab6a	2015-06-17 11:43:38 -0400	[diff] [blame]	1722	.hint_size = 4,
				1723	.owner = THIS_MODULE,
				1724	.create = smq_create,
				1725	.real = &smq_policy_type
				1726	};
				1727
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1728	static int __init smq_init(void)
				1729	{
				1730	int r;
				1731
				1732	r = dm_cache_policy_register(&smq_policy_type);
				1733	if (r) {
				1734	DMERR("register failed %d", r);
				1735	return -ENOMEM;
				1736	}
				1737
Mike Snitzer	bccab6a	2015-06-17 11:43:38 -0400	[diff] [blame]	1738	r = dm_cache_policy_register(&default_policy_type);
				1739	if (r) {
				1740	DMERR("register failed (as default) %d", r);
				1741	dm_cache_policy_unregister(&smq_policy_type);
				1742	return -ENOMEM;
				1743	}
				1744
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1745	return 0;
				1746	}
				1747
				1748	static void __exit smq_exit(void)
				1749	{
				1750	dm_cache_policy_unregister(&smq_policy_type);
Mike Snitzer	bccab6a	2015-06-17 11:43:38 -0400	[diff] [blame]	1751	dm_cache_policy_unregister(&default_policy_type);
Joe Thornber	66a6363	2015-05-15 15:33:34 +0100	[diff] [blame]	1752	}
				1753
				1754	module_init(smq_init);
				1755	module_exit(smq_exit);
				1756
				1757	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				1758	MODULE_LICENSE("GPL");
				1759	MODULE_DESCRIPTION("smq cache policy");
Yi Zhang	34dd051	2015-08-12 19:22:43 +0800	[diff] [blame]	1760
				1761	MODULE_ALIAS("dm-cache-default");