Blame - block/as-iosched.c - kernel/msm-4.19

blob: 43fa2049568883d6b5c07cc304b77c93d3091abf [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	* Anticipatory & deadline i/o scheduler.
				3	*
				4	* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	5	* Nick Piggin <nickpiggin@yahoo.com.au>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	*
				7	*/
				8	#include <linux/kernel.h>
				9	#include <linux/fs.h>
				10	#include <linux/blkdev.h>
				11	#include <linux/elevator.h>
				12	#include <linux/bio.h>
				13	#include <linux/config.h>
				14	#include <linux/module.h>
				15	#include <linux/slab.h>
				16	#include <linux/init.h>
				17	#include <linux/compiler.h>
				18	#include <linux/hash.h>
				19	#include <linux/rbtree.h>
				20	#include <linux/interrupt.h>
				21
				22	#define REQ_SYNC 1
				23	#define REQ_ASYNC 0
				24
				25	/*
				26	* See Documentation/block/as-iosched.txt
				27	*/
				28
				29	/*
				30	* max time before a read is submitted.
				31	*/
				32	#define default_read_expire (HZ / 8)
				33
				34	/*
				35	* ditto for writes, these limits are not hard, even
				36	* if the disk is capable of satisfying them.
				37	*/
				38	#define default_write_expire (HZ / 4)
				39
				40	/*
				41	* read_batch_expire describes how long we will allow a stream of reads to
				42	* persist before looking to see whether it is time to switch over to writes.
				43	*/
				44	#define default_read_batch_expire (HZ / 2)
				45
				46	/*
				47	* write_batch_expire describes how long we want a stream of writes to run for.
				48	* This is not a hard limit, but a target we set for the auto-tuning thingy.
				49	* See, the problem is: we can send a lot of writes to disk cache / TCQ in
				50	* a short amount of time...
				51	*/
				52	#define default_write_batch_expire (HZ / 8)
				53
				54	/*
				55	* max time we may wait to anticipate a read (default around 6ms)
				56	*/
				57	#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
				58
				59	/*
				60	* Keep track of up to 20ms thinktimes. We can go as big as we like here,
				61	* however huge values tend to interfere and not decay fast enough. A program
				62	* might be in a non-io phase of operation. Waiting on user input for example,
				63	* or doing a lengthy computation. A small penalty can be justified there, and
				64	* will still catch out those processes that constantly have large thinktimes.
				65	*/
				66	#define MAX_THINKTIME (HZ/50UL)
				67
				68	/* Bits in as_io_context.state */
				69	enum as_io_states {
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	70	AS_TASK_RUNNING=0, /* Process has not exited */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	71	AS_TASK_IOSTARTED, /* Process has started some IO */
				72	AS_TASK_IORUNNING, /* Process has completed some IO */
				73	};
				74
				75	enum anticipation_status {
				76	ANTIC_OFF=0, /* Not anticipating (normal operation) */
				77	ANTIC_WAIT_REQ, /* The last read has not yet completed */
				78	ANTIC_WAIT_NEXT, /* Currently anticipating a request vs
				79	last read (which has completed) */
				80	ANTIC_FINISHED, /* Anticipating but have found a candidate
				81	* or timed out */
				82	};
				83
				84	struct as_data {
				85	/*
				86	* run time data
				87	*/
				88
				89	struct request_queue q; / the "owner" queue */
				90
				91	/*
				92	* requests (as_rq s) are present on both sort_list and fifo_list
				93	*/
				94	struct rb_root sort_list[2];
				95	struct list_head fifo_list[2];
				96
				97	struct as_rq next_arq[2]; / next in sort order */
				98	sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	99	struct list_head hash; / request hash */
				100
				101	unsigned long exit_prob; /* probability a task will exit while
				102	being waited on */
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	103	unsigned long exit_no_coop; /* probablility an exited task will
				104	not be part of a later cooperating
				105	request */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	106	unsigned long new_ttime_total; /* mean thinktime on new proc */
				107	unsigned long new_ttime_mean;
				108	u64 new_seek_total; /* mean seek on new proc */
				109	sector_t new_seek_mean;
				110
				111	unsigned long current_batch_expires;
				112	unsigned long last_check_fifo[2];
				113	int changed_batch; /* 1: waiting for old batch to end */
				114	int new_batch; /* 1: waiting on first read complete */
				115	int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
				116	int write_batch_count; /* max # of reqs in a write batch */
				117	int current_write_count; /* how many requests left this batch */
				118	int write_batch_idled; /* has the write batch gone idle? */
				119	mempool_t *arq_pool;
				120
				121	enum anticipation_status antic_status;
				122	unsigned long antic_start; /* jiffies: when it started */
				123	struct timer_list antic_timer; /* anticipatory scheduling timer */
				124	struct work_struct antic_work; /* Deferred unplugging */
				125	struct io_context io_context; / Identify the expected process */
				126	int ioc_finished; /* IO associated with io_context is finished */
				127	int nr_dispatched;
				128
				129	/*
				130	* settings that change how the i/o scheduler behaves
				131	*/
				132	unsigned long fifo_expire[2];
				133	unsigned long batch_expire[2];
				134	unsigned long antic_expire;
				135	};
				136
				137	#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
				138
				139	/*
				140	* per-request data.
				141	*/
				142	enum arq_state {
				143	AS_RQ_NEW=0, /* New - not referenced and not on any lists */
				144	AS_RQ_QUEUED, /* In the request queue. It belongs to the
				145	scheduler */
				146	AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
				147	driver now */
				148	AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
				149	AS_RQ_REMOVED,
				150	AS_RQ_MERGED,
				151	AS_RQ_POSTSCHED, /* when they shouldn't be */
				152	};
				153
				154	struct as_rq {
				155	/*
				156	* rbtree index, key is the starting offset
				157	*/
				158	struct rb_node rb_node;
				159	sector_t rb_key;
				160
				161	struct request *request;
				162
				163	struct io_context io_context; / The submitting task */
				164
				165	/*
				166	* request hash, key is the ending offset (for back merge lookup)
				167	*/
				168	struct list_head hash;
				169	unsigned int on_hash;
				170
				171	/*
				172	* expire fifo
				173	*/
				174	struct list_head fifo;
				175	unsigned long expires;
				176
				177	unsigned int is_sync;
				178	enum arq_state state;
				179	};
				180
				181	#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
				182
				183	static kmem_cache_t *arq_pool;
				184
				185	/*
				186	* IO Context helper functions
				187	*/
				188
				189	/* Called to deallocate the as_io_context */
				190	static void free_as_io_context(struct as_io_context *aic)
				191	{
				192	kfree(aic);
				193	}
				194
				195	/* Called when the task exits */
				196	static void exit_as_io_context(struct as_io_context *aic)
				197	{
				198	WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));
				199	clear_bit(AS_TASK_RUNNING, &aic->state);
				200	}
				201
				202	static struct as_io_context *alloc_as_io_context(void)
				203	{
				204	struct as_io_context *ret;
				205
				206	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
				207	if (ret) {
				208	ret->dtor = free_as_io_context;
				209	ret->exit = exit_as_io_context;
				210	ret->state = 1 << AS_TASK_RUNNING;
				211	atomic_set(&ret->nr_queued, 0);
				212	atomic_set(&ret->nr_dispatched, 0);
				213	spin_lock_init(&ret->lock);
				214	ret->ttime_total = 0;
				215	ret->ttime_samples = 0;
				216	ret->ttime_mean = 0;
				217	ret->seek_total = 0;
				218	ret->seek_samples = 0;
				219	ret->seek_mean = 0;
				220	}
				221
				222	return ret;
				223	}
				224
				225	/*
				226	* If the current task has no AS IO context then create one and initialise it.
				227	* Then take a ref on the task's io context and return it.
				228	*/
				229	static struct io_context *as_get_io_context(void)
				230	{
				231	struct io_context *ioc = get_io_context(GFP_ATOMIC);
				232	if (ioc && !ioc->aic) {
				233	ioc->aic = alloc_as_io_context();
				234	if (!ioc->aic) {
				235	put_io_context(ioc);
				236	ioc = NULL;
				237	}
				238	}
				239	return ioc;
				240	}
				241
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	242	static void as_put_io_context(struct as_rq *arq)
				243	{
				244	struct as_io_context *aic;
				245
				246	if (unlikely(!arq->io_context))
				247	return;
				248
				249	aic = arq->io_context->aic;
				250
				251	if (arq->is_sync == REQ_SYNC && aic) {
				252	spin_lock(&aic->lock);
				253	set_bit(AS_TASK_IORUNNING, &aic->state);
				254	aic->last_end_request = jiffies;
				255	spin_unlock(&aic->lock);
				256	}
				257
				258	put_io_context(arq->io_context);
				259	}
				260
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	261	/*
				262	* the back merge hash support functions
				263	*/
				264	static const int as_hash_shift = 6;
				265	#define AS_HASH_BLOCK(sec) ((sec) >> 3)
				266	#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
				267	#define AS_HASH_ENTRIES (1 << as_hash_shift)
				268	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
				269	#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash)
				270
				271	static inline void __as_del_arq_hash(struct as_rq *arq)
				272	{
				273	arq->on_hash = 0;
				274	list_del_init(&arq->hash);
				275	}
				276
				277	static inline void as_del_arq_hash(struct as_rq *arq)
				278	{
				279	if (arq->on_hash)
				280	__as_del_arq_hash(arq);
				281	}
				282
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	283	static void as_add_arq_hash(struct as_data ad, struct as_rq arq)
				284	{
				285	struct request *rq = arq->request;
				286
				287	BUG_ON(arq->on_hash);
				288
				289	arq->on_hash = 1;
				290	list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
				291	}
				292
				293	/*
				294	* move hot entry to front of chain
				295	*/
				296	static inline void as_hot_arq_hash(struct as_data ad, struct as_rq arq)
				297	{
				298	struct request *rq = arq->request;
				299	struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
				300
				301	if (!arq->on_hash) {
				302	WARN_ON(1);
				303	return;
				304	}
				305
				306	if (arq->hash.prev != head) {
				307	list_del(&arq->hash);
				308	list_add(&arq->hash, head);
				309	}
				310	}
				311
				312	static struct request as_find_arq_hash(struct as_data ad, sector_t offset)
				313	{
				314	struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
				315	struct list_head entry, next = hash_list->next;
				316
				317	while ((entry = next) != hash_list) {
				318	struct as_rq *arq = list_entry_hash(entry);
				319	struct request *__rq = arq->request;
				320
				321	next = entry->next;
				322
				323	BUG_ON(!arq->on_hash);
				324
				325	if (!rq_mergeable(__rq)) {
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame]	326	as_del_arq_hash(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	327	continue;
				328	}
				329
				330	if (rq_hash_key(__rq) == offset)
				331	return __rq;
				332	}
				333
				334	return NULL;
				335	}
				336
				337	/*
				338	* rb tree support functions
				339	*/
				340	#define RB_NONE (2)
				341	#define RB_EMPTY(root) ((root)->rb_node == NULL)
				342	#define ON_RB(node) ((node)->rb_color != RB_NONE)
				343	#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
				344	#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
				345	#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
				346	#define rq_rb_key(rq) (rq)->sector
				347
				348	/*
				349	* as_find_first_arq finds the first (lowest sector numbered) request
				350	* for the specified data_dir. Used to sweep back to the start of the disk
				351	* (1-way elevator) after we process the last (highest sector) request.
				352	*/
				353	static struct as_rq as_find_first_arq(struct as_data ad, int data_dir)
				354	{
				355	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				356
				357	if (n == NULL)
				358	return NULL;
				359
				360	for (;;) {
				361	if (n->rb_left == NULL)
				362	return rb_entry_arq(n);
				363
				364	n = n->rb_left;
				365	}
				366	}
				367
				368	/*
				369	* Add the request to the rb tree if it is unique. If there is an alias (an
				370	* existing request against the same sector), which can happen when using
				371	* direct IO, then return the alias.
				372	*/
				373	static struct as_rq as_add_arq_rb(struct as_data ad, struct as_rq *arq)
				374	{
				375	struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
				376	struct rb_node *parent = NULL;
				377	struct as_rq *__arq;
				378	struct request *rq = arq->request;
				379
				380	arq->rb_key = rq_rb_key(rq);
				381
				382	while (*p) {
				383	parent = *p;
				384	__arq = rb_entry_arq(parent);
				385
				386	if (arq->rb_key < __arq->rb_key)
				387	p = &(*p)->rb_left;
				388	else if (arq->rb_key > __arq->rb_key)
				389	p = &(*p)->rb_right;
				390	else
				391	return __arq;
				392	}
				393
				394	rb_link_node(&arq->rb_node, parent, p);
				395	rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				396
				397	return NULL;
				398	}
				399
				400	static inline void as_del_arq_rb(struct as_data ad, struct as_rq arq)
				401	{
				402	if (!ON_RB(&arq->rb_node)) {
				403	WARN_ON(1);
				404	return;
				405	}
				406
				407	rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				408	RB_CLEAR(&arq->rb_node);
				409	}
				410
				411	static struct request *
				412	as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
				413	{
				414	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				415	struct as_rq *arq;
				416
				417	while (n) {
				418	arq = rb_entry_arq(n);
				419
				420	if (sector < arq->rb_key)
				421	n = n->rb_left;
				422	else if (sector > arq->rb_key)
				423	n = n->rb_right;
				424	else
				425	return arq->request;
				426	}
				427
				428	return NULL;
				429	}
				430
				431	/*
				432	* IO Scheduler proper
				433	*/
				434
				435	#define MAXBACK (1024 * 1024) /*
				436	* Maximum distance the disk will go backward
				437	* for a request.
				438	*/
				439
				440	#define BACK_PENALTY 2
				441
				442	/*
				443	* as_choose_req selects the preferred one of two requests of the same data_dir
				444	* ignoring time - eg. timeouts, which is the job of as_dispatch_request
				445	*/
				446	static struct as_rq *
				447	as_choose_req(struct as_data ad, struct as_rq arq1, struct as_rq *arq2)
				448	{
				449	int data_dir;
				450	sector_t last, s1, s2, d1, d2;
				451	int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
				452	const sector_t maxback = MAXBACK;
				453
				454	if (arq1 == NULL \|\| arq1 == arq2)
				455	return arq2;
				456	if (arq2 == NULL)
				457	return arq1;
				458
				459	data_dir = arq1->is_sync;
				460
				461	last = ad->last_sector[data_dir];
				462	s1 = arq1->request->sector;
				463	s2 = arq2->request->sector;
				464
				465	BUG_ON(data_dir != arq2->is_sync);
				466
				467	/*
				468	* Strict one way elevator _except_ in the case where we allow
				469	* short backward seeks which are biased as twice the cost of a
				470	* similar forward seek.
				471	*/
				472	if (s1 >= last)
				473	d1 = s1 - last;
				474	else if (s1+maxback >= last)
				475	d1 = (last - s1)*BACK_PENALTY;
				476	else {
				477	r1_wrap = 1;
				478	d1 = 0; /* shut up, gcc */
				479	}
				480
				481	if (s2 >= last)
				482	d2 = s2 - last;
				483	else if (s2+maxback >= last)
				484	d2 = (last - s2)*BACK_PENALTY;
				485	else {
				486	r2_wrap = 1;
				487	d2 = 0;
				488	}
				489
				490	/* Found required data */
				491	if (!r1_wrap && r2_wrap)
				492	return arq1;
				493	else if (!r2_wrap && r1_wrap)
				494	return arq2;
				495	else if (r1_wrap && r2_wrap) {
				496	/* both behind the head */
				497	if (s1 <= s2)
				498	return arq1;
				499	else
				500	return arq2;
				501	}
				502
				503	/* Both requests in front of the head */
				504	if (d1 < d2)
				505	return arq1;
				506	else if (d2 < d1)
				507	return arq2;
				508	else {
				509	if (s1 >= s2)
				510	return arq1;
				511	else
				512	return arq2;
				513	}
				514	}
				515
				516	/*
				517	* as_find_next_arq finds the next request after @prev in elevator order.
				518	* this with as_choose_req form the basis for how the scheduler chooses
				519	* what request to process next. Anticipation works on top of this.
				520	*/
				521	static struct as_rq as_find_next_arq(struct as_data ad, struct as_rq *last)
				522	{
				523	const int data_dir = last->is_sync;
				524	struct as_rq *ret;
				525	struct rb_node *rbnext = rb_next(&last->rb_node);
				526	struct rb_node *rbprev = rb_prev(&last->rb_node);
				527	struct as_rq arq_next, arq_prev;
				528
				529	BUG_ON(!ON_RB(&last->rb_node));
				530
				531	if (rbprev)
				532	arq_prev = rb_entry_arq(rbprev);
				533	else
				534	arq_prev = NULL;
				535
				536	if (rbnext)
				537	arq_next = rb_entry_arq(rbnext);
				538	else {
				539	arq_next = as_find_first_arq(ad, data_dir);
				540	if (arq_next == last)
				541	arq_next = NULL;
				542	}
				543
				544	ret = as_choose_req(ad, arq_next, arq_prev);
				545
				546	return ret;
				547	}
				548
				549	/*
				550	* anticipatory scheduling functions follow
				551	*/
				552
				553	/*
				554	* as_antic_expired tells us when we have anticipated too long.
				555	* The funny "absolute difference" math on the elapsed time is to handle
				556	* jiffy wraps, and disks which have been idle for 0x80000000 jiffies.
				557	*/
				558	static int as_antic_expired(struct as_data *ad)
				559	{
				560	long delta_jif;
				561
				562	delta_jif = jiffies - ad->antic_start;
				563	if (unlikely(delta_jif < 0))
				564	delta_jif = -delta_jif;
				565	if (delta_jif < ad->antic_expire)
				566	return 0;
				567
				568	return 1;
				569	}
				570
				571	/*
				572	* as_antic_waitnext starts anticipating that a nice request will soon be
				573	* submitted. See also as_antic_waitreq
				574	*/
				575	static void as_antic_waitnext(struct as_data *ad)
				576	{
				577	unsigned long timeout;
				578
				579	BUG_ON(ad->antic_status != ANTIC_OFF
				580	&& ad->antic_status != ANTIC_WAIT_REQ);
				581
				582	timeout = ad->antic_start + ad->antic_expire;
				583
				584	mod_timer(&ad->antic_timer, timeout);
				585
				586	ad->antic_status = ANTIC_WAIT_NEXT;
				587	}
				588
				589	/*
				590	* as_antic_waitreq starts anticipating. We don't start timing the anticipation
				591	* until the request that we're anticipating on has finished. This means we
				592	* are timing from when the candidate process wakes up hopefully.
				593	*/
				594	static void as_antic_waitreq(struct as_data *ad)
				595	{
				596	BUG_ON(ad->antic_status == ANTIC_FINISHED);
				597	if (ad->antic_status == ANTIC_OFF) {
				598	if (!ad->io_context \|\| ad->ioc_finished)
				599	as_antic_waitnext(ad);
				600	else
				601	ad->antic_status = ANTIC_WAIT_REQ;
				602	}
				603	}
				604
				605	/*
				606	* This is called directly by the functions in this file to stop anticipation.
				607	* We kill the timer and schedule a call to the request_fn asap.
				608	*/
				609	static void as_antic_stop(struct as_data *ad)
				610	{
				611	int status = ad->antic_status;
				612
				613	if (status == ANTIC_WAIT_REQ \|\| status == ANTIC_WAIT_NEXT) {
				614	if (status == ANTIC_WAIT_NEXT)
				615	del_timer(&ad->antic_timer);
				616	ad->antic_status = ANTIC_FINISHED;
				617	/* see as_work_handler */
				618	kblockd_schedule_work(&ad->antic_work);
				619	}
				620	}
				621
				622	/*
				623	* as_antic_timeout is the timer function set by as_antic_waitnext.
				624	*/
				625	static void as_antic_timeout(unsigned long data)
				626	{
				627	struct request_queue q = (struct request_queue )data;
				628	struct as_data *ad = q->elevator->elevator_data;
				629	unsigned long flags;
				630
				631	spin_lock_irqsave(q->queue_lock, flags);
				632	if (ad->antic_status == ANTIC_WAIT_REQ
				633	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				634	struct as_io_context *aic = ad->io_context->aic;
				635
				636	ad->antic_status = ANTIC_FINISHED;
				637	kblockd_schedule_work(&ad->antic_work);
				638
				639	if (aic->ttime_samples == 0) {
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	640	/* process anticipated on has exited or timed out*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	641	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				642	}
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	643	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
				644	/* process not "saved" by a cooperating request */
				645	ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8;
				646	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	647	}
				648	spin_unlock_irqrestore(q->queue_lock, flags);
				649	}
				650
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	651	static void as_update_thinktime(struct as_data ad, struct as_io_context aic,
				652	unsigned long ttime)
				653	{
				654	/* fixed point: 1.0 == 1<<8 */
				655	if (aic->ttime_samples == 0) {
				656	ad->new_ttime_total = (7ad->new_ttime_total + 256ttime) / 8;
				657	ad->new_ttime_mean = ad->new_ttime_total / 256;
				658
				659	ad->exit_prob = (7*ad->exit_prob)/8;
				660	}
				661	aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
				662	aic->ttime_total = (7aic->ttime_total + 256ttime) / 8;
				663	aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
				664	}
				665
				666	static void as_update_seekdist(struct as_data ad, struct as_io_context aic,
				667	sector_t sdist)
				668	{
				669	u64 total;
				670
				671	if (aic->seek_samples == 0) {
				672	ad->new_seek_total = (7ad->new_seek_total + 256(u64)sdist)/8;
				673	ad->new_seek_mean = ad->new_seek_total / 256;
				674	}
				675
				676	/*
				677	* Don't allow the seek distance to get too large from the
				678	* odd fragment, pagein, etc
				679	*/
				680	if (aic->seek_samples <= 60) /* second&third seek */
				681	sdist = min(sdist, (aic->seek_mean * 4) + 210241024);
				682	else
				683	sdist = min(sdist, (aic->seek_mean * 4) + 2102464);
				684
				685	aic->seek_samples = (7*aic->seek_samples + 256) / 8;
				686	aic->seek_total = (7aic->seek_total + (u64)256sdist) / 8;
				687	total = aic->seek_total + (aic->seek_samples/2);
				688	do_div(total, aic->seek_samples);
				689	aic->seek_mean = (sector_t)total;
				690	}
				691
				692	/*
				693	* as_update_iohist keeps a decaying histogram of IO thinktimes, and
				694	* updates @aic->ttime_mean based on that. It is called when a new
				695	* request is queued.
				696	*/
				697	static void as_update_iohist(struct as_data ad, struct as_io_context aic,
				698	struct request *rq)
				699	{
				700	struct as_rq *arq = RQ_DATA(rq);
				701	int data_dir = arq->is_sync;
				702	unsigned long thinktime = 0;
				703	sector_t seek_dist;
				704
				705	if (aic == NULL)
				706	return;
				707
				708	if (data_dir == REQ_SYNC) {
				709	unsigned long in_flight = atomic_read(&aic->nr_queued)
				710	+ atomic_read(&aic->nr_dispatched);
				711	spin_lock(&aic->lock);
				712	if (test_bit(AS_TASK_IORUNNING, &aic->state) \|\|
				713	test_bit(AS_TASK_IOSTARTED, &aic->state)) {
				714	/* Calculate read -> read thinktime */
				715	if (test_bit(AS_TASK_IORUNNING, &aic->state)
				716	&& in_flight == 0) {
				717	thinktime = jiffies - aic->last_end_request;
				718	thinktime = min(thinktime, MAX_THINKTIME-1);
				719	}
				720	as_update_thinktime(ad, aic, thinktime);
				721
				722	/* Calculate read -> read seek distance */
				723	if (aic->last_request_pos < rq->sector)
				724	seek_dist = rq->sector - aic->last_request_pos;
				725	else
				726	seek_dist = aic->last_request_pos - rq->sector;
				727	as_update_seekdist(ad, aic, seek_dist);
				728	}
				729	aic->last_request_pos = rq->sector + rq->nr_sectors;
				730	set_bit(AS_TASK_IOSTARTED, &aic->state);
				731	spin_unlock(&aic->lock);
				732	}
				733	}
				734
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	735	/*
				736	* as_close_req decides if one request is considered "close" to the
				737	* previous one issued.
				738	*/
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	739	static int as_close_req(struct as_data ad, struct as_io_context aic,
				740	struct as_rq *arq)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	741	{
				742	unsigned long delay; /* milliseconds */
				743	sector_t last = ad->last_sector[ad->batch_data_dir];
				744	sector_t next = arq->request->sector;
				745	sector_t delta; /* acceptable close offset (in sectors) */
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	746	sector_t s;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	747
				748	if (ad->antic_status == ANTIC_OFF \|\| !ad->ioc_finished)
				749	delay = 0;
				750	else
				751	delay = ((jiffies - ad->antic_start) * 1000) / HZ;
				752
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	753	if (delay == 0)
				754	delta = 8192;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	755	else if (delay <= 20 && delay <= ad->antic_expire)
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	756	delta = 8192 << delay;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	757	else
				758	return 1;
				759
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	760	if ((last <= next + (delta>>1)) && (next <= last + delta))
				761	return 1;
				762
				763	if (last < next)
				764	s = next - last;
				765	else
				766	s = last - next;
				767
				768	if (aic->seek_samples == 0) {
				769	/*
				770	* Process has just started IO. Use past statistics to
				771	* gauge success possibility
				772	*/
				773	if (ad->new_seek_mean > s) {
				774	/* this request is better than what we're expecting */
				775	return 1;
				776	}
				777
				778	} else {
				779	if (aic->seek_mean > s) {
				780	/* this request is better than what we're expecting */
				781	return 1;
				782	}
				783	}
				784
				785	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	786	}
				787
				788	/*
				789	* as_can_break_anticipation returns true if we have been anticipating this
				790	* request.
				791	*
				792	* It also returns true if the process against which we are anticipating
				793	* submits a write - that's presumably an fsync, O_SYNC write, etc. We want to
				794	* dispatch it ASAP, because we know that application will not be submitting
				795	* any new reads.
				796	*
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	797	* If the task which has submitted the request has exited, break anticipation.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798	*
				799	* If this task has queued some other IO, do not enter enticipation.
				800	*/
				801	static int as_can_break_anticipation(struct as_data ad, struct as_rq arq)
				802	{
				803	struct io_context *ioc;
				804	struct as_io_context *aic;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805
				806	ioc = ad->io_context;
				807	BUG_ON(!ioc);
				808
				809	if (arq && ioc == arq->io_context) {
				810	/* request from same process */
				811	return 1;
				812	}
				813
				814	if (ad->ioc_finished && as_antic_expired(ad)) {
				815	/*
				816	* In this situation status should really be FINISHED,
				817	* however the timer hasn't had the chance to run yet.
				818	*/
				819	return 1;
				820	}
				821
				822	aic = ioc->aic;
				823	if (!aic)
				824	return 0;
				825
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	826	if (atomic_read(&aic->nr_queued) > 0) {
				827	/* process has more requests queued */
				828	return 1;
				829	}
				830
				831	if (atomic_read(&aic->nr_dispatched) > 0) {
				832	/* process has more requests dispatched */
				833	return 1;
				834	}
				835
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	836	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	837	/*
				838	* Found a close request that is not one of ours.
				839	*
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	840	* This makes close requests from another process update
				841	* our IO history. Is generally useful when there are
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	842	* two or more cooperating processes working in the same
				843	* area.
				844	*/
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	845	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
				846	if (aic->ttime_samples == 0)
				847	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				848
				849	ad->exit_no_coop = (7*ad->exit_no_coop)/8;
				850	}
				851
				852	as_update_iohist(ad, aic, arq->request);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	853	return 1;
				854	}
				855
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	856	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
				857	/* process anticipated on has exited */
				858	if (aic->ttime_samples == 0)
				859	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				860
				861	if (ad->exit_no_coop > 128)
				862	return 1;
				863	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	864
				865	if (aic->ttime_samples == 0) {
				866	if (ad->new_ttime_mean > ad->antic_expire)
				867	return 1;
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	868	if (ad->exit_prob * ad->exit_no_coop > 128*256)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	869	return 1;
				870	} else if (aic->ttime_mean > ad->antic_expire) {
				871	/* the process thinks too much between requests */
				872	return 1;
				873	}
				874
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	875	return 0;
				876	}
				877
				878	/*
				879	* as_can_anticipate indicates weather we should either run arq
				880	* or keep anticipating a better request.
				881	*/
				882	static int as_can_anticipate(struct as_data ad, struct as_rq arq)
				883	{
				884	if (!ad->io_context)
				885	/*
				886	* Last request submitted was a write
				887	*/
				888	return 0;
				889
				890	if (ad->antic_status == ANTIC_FINISHED)
				891	/*
				892	* Don't restart if we have just finished. Run the next request
				893	*/
				894	return 0;
				895
				896	if (as_can_break_anticipation(ad, arq))
				897	/*
				898	* This request is a good candidate. Don't keep anticipating,
				899	* run it.
				900	*/
				901	return 0;
				902
				903	/*
				904	* OK from here, we haven't finished, and don't have a decent request!
				905	* Status is either ANTIC_OFF so start waiting,
				906	* ANTIC_WAIT_REQ so continue waiting for request to finish
				907	* or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	908	*/
				909
				910	return 1;
				911	}
				912
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	913	/*
				914	* as_update_arq must be called whenever a request (arq) is added to
				915	* the sort_list. This function keeps caches up to date, and checks if the
				916	* request might be one we are "anticipating"
				917	*/
				918	static void as_update_arq(struct as_data ad, struct as_rq arq)
				919	{
				920	const int data_dir = arq->is_sync;
				921
				922	/* keep the next_arq cache up to date */
				923	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
				924
				925	/*
				926	* have we been anticipating this request?
				927	* or does it come from the same process as the one we are anticipating
				928	* for?
				929	*/
				930	if (ad->antic_status == ANTIC_WAIT_REQ
				931	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				932	if (as_can_break_anticipation(ad, arq))
				933	as_antic_stop(ad);
				934	}
				935	}
				936
				937	/*
				938	* Gathers timings and resizes the write batch automatically
				939	*/
				940	static void update_write_batch(struct as_data *ad)
				941	{
				942	unsigned long batch = ad->batch_expire[REQ_ASYNC];
				943	long write_time;
				944
				945	write_time = (jiffies - ad->current_batch_expires) + batch;
				946	if (write_time < 0)
				947	write_time = 0;
				948
				949	if (write_time > batch && !ad->write_batch_idled) {
				950	if (write_time > batch * 3)
				951	ad->write_batch_count /= 2;
				952	else
				953	ad->write_batch_count--;
				954	} else if (write_time < batch && ad->current_write_count == 0) {
				955	if (batch > write_time * 3)
				956	ad->write_batch_count *= 2;
				957	else
				958	ad->write_batch_count++;
				959	}
				960
				961	if (ad->write_batch_count < 1)
				962	ad->write_batch_count = 1;
				963	}
				964
				965	/*
				966	* as_completed_request is to be called when a request has completed and
				967	* returned something to the requesting process, be it an error or data.
				968	*/
				969	static void as_completed_request(request_queue_t q, struct request rq)
				970	{
				971	struct as_data *ad = q->elevator->elevator_data;
				972	struct as_rq *arq = RQ_DATA(rq);
				973
				974	WARN_ON(!list_empty(&rq->queuelist));
				975
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	976	if (arq->state != AS_RQ_REMOVED) {
				977	printk("arq->state %d\n", arq->state);
				978	WARN_ON(1);
				979	goto out;
				980	}
				981
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	982	if (ad->changed_batch && ad->nr_dispatched == 1) {
				983	kblockd_schedule_work(&ad->antic_work);
				984	ad->changed_batch = 0;
				985
				986	if (ad->batch_data_dir == REQ_SYNC)
				987	ad->new_batch = 1;
				988	}
				989	WARN_ON(ad->nr_dispatched == 0);
				990	ad->nr_dispatched--;
				991
				992	/*
				993	* Start counting the batch from when a request of that direction is
				994	* actually serviced. This should help devices with big TCQ windows
				995	* and writeback caches
				996	*/
				997	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
				998	update_write_batch(ad);
				999	ad->current_batch_expires = jiffies +
				1000	ad->batch_expire[REQ_SYNC];
				1001	ad->new_batch = 0;
				1002	}
				1003
				1004	if (ad->io_context == arq->io_context && ad->io_context) {
				1005	ad->antic_start = jiffies;
				1006	ad->ioc_finished = 1;
				1007	if (ad->antic_status == ANTIC_WAIT_REQ) {
				1008	/*
				1009	* We were waiting on this request, now anticipate
				1010	* the next one
				1011	*/
				1012	as_antic_waitnext(ad);
				1013	}
				1014	}
				1015
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1016	as_put_io_context(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1017	out:
				1018	arq->state = AS_RQ_POSTSCHED;
				1019	}
				1020
				1021	/*
				1022	* as_remove_queued_request removes a request from the pre dispatch queue
				1023	* without updating refcounts. It is expected the caller will drop the
				1024	* reference unless it replaces the request at somepart of the elevator
				1025	* (ie. the dispatch queue)
				1026	*/
				1027	static void as_remove_queued_request(request_queue_t q, struct request rq)
				1028	{
				1029	struct as_rq *arq = RQ_DATA(rq);
				1030	const int data_dir = arq->is_sync;
				1031	struct as_data *ad = q->elevator->elevator_data;
				1032
				1033	WARN_ON(arq->state != AS_RQ_QUEUED);
				1034
				1035	if (arq->io_context && arq->io_context->aic) {
				1036	BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
				1037	atomic_dec(&arq->io_context->aic->nr_queued);
				1038	}
				1039
				1040	/*
				1041	* Update the "next_arq" cache if we are about to remove its
				1042	* entry
				1043	*/
				1044	if (ad->next_arq[data_dir] == arq)
				1045	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1046
				1047	list_del_init(&arq->fifo);
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame]	1048	as_del_arq_hash(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1049	as_del_arq_rb(ad, arq);
				1050	}
				1051
				1052	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1053	* as_fifo_expired returns 0 if there are no expired reads on the fifo,
				1054	* 1 otherwise. It is ratelimited so that we only perform the check once per
				1055	* `fifo_expire' interval. Otherwise a large number of expired requests
				1056	* would create a hopeless seekstorm.
				1057	*
				1058	* See as_antic_expired comment.
				1059	*/
				1060	static int as_fifo_expired(struct as_data *ad, int adir)
				1061	{
				1062	struct as_rq *arq;
				1063	long delta_jif;
				1064
				1065	delta_jif = jiffies - ad->last_check_fifo[adir];
				1066	if (unlikely(delta_jif < 0))
				1067	delta_jif = -delta_jif;
				1068	if (delta_jif < ad->fifo_expire[adir])
				1069	return 0;
				1070
				1071	ad->last_check_fifo[adir] = jiffies;
				1072
				1073	if (list_empty(&ad->fifo_list[adir]))
				1074	return 0;
				1075
				1076	arq = list_entry_fifo(ad->fifo_list[adir].next);
				1077
				1078	return time_after(jiffies, arq->expires);
				1079	}
				1080
				1081	/*
				1082	* as_batch_expired returns true if the current batch has expired. A batch
				1083	* is a set of reads or a set of writes.
				1084	*/
				1085	static inline int as_batch_expired(struct as_data *ad)
				1086	{
				1087	if (ad->changed_batch \|\| ad->new_batch)
				1088	return 0;
				1089
				1090	if (ad->batch_data_dir == REQ_SYNC)
				1091	/* TODO! add a check so a complete fifo gets written? */
				1092	return time_after(jiffies, ad->current_batch_expires);
				1093
				1094	return time_after(jiffies, ad->current_batch_expires)
				1095	\|\| ad->current_write_count == 0;
				1096	}
				1097
				1098	/*
				1099	* move an entry to dispatch queue
				1100	*/
				1101	static void as_move_to_dispatch(struct as_data ad, struct as_rq arq)
				1102	{
				1103	struct request *rq = arq->request;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1104	const int data_dir = arq->is_sync;
				1105
				1106	BUG_ON(!ON_RB(&arq->rb_node));
				1107
				1108	as_antic_stop(ad);
				1109	ad->antic_status = ANTIC_OFF;
				1110
				1111	/*
				1112	* This has to be set in order to be correctly updated by
				1113	* as_find_next_arq
				1114	*/
				1115	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
				1116
				1117	if (data_dir == REQ_SYNC) {
				1118	/* In case we have to anticipate after this */
				1119	copy_io_context(&ad->io_context, &arq->io_context);
				1120	} else {
				1121	if (ad->io_context) {
				1122	put_io_context(ad->io_context);
				1123	ad->io_context = NULL;
				1124	}
				1125
				1126	if (ad->current_write_count != 0)
				1127	ad->current_write_count--;
				1128	}
				1129	ad->ioc_finished = 0;
				1130
				1131	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1132
				1133	/*
				1134	* take it off the sort and fifo list, add to dispatch queue
				1135	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1136	while (!list_empty(&rq->queuelist)) {
				1137	struct request *__rq = list_entry_rq(rq->queuelist.next);
				1138	struct as_rq *__arq = RQ_DATA(__rq);
				1139
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1140	list_del(&__rq->queuelist);
				1141
				1142	elv_dispatch_add_tail(ad->q, __rq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1143
				1144	if (__arq->io_context && __arq->io_context->aic)
				1145	atomic_inc(&__arq->io_context->aic->nr_dispatched);
				1146
				1147	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1148	__arq->state = AS_RQ_DISPATCHED;
				1149
				1150	ad->nr_dispatched++;
				1151	}
				1152
				1153	as_remove_queued_request(ad->q, rq);
				1154	WARN_ON(arq->state != AS_RQ_QUEUED);
				1155
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1156	elv_dispatch_sort(ad->q, rq);
				1157
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1158	arq->state = AS_RQ_DISPATCHED;
				1159	if (arq->io_context && arq->io_context->aic)
				1160	atomic_inc(&arq->io_context->aic->nr_dispatched);
				1161	ad->nr_dispatched++;
				1162	}
				1163
				1164	/*
				1165	* as_dispatch_request selects the best request according to
				1166	* read/write expire, batch expire, etc, and moves it to the dispatch
				1167	* queue. Returns 1 if a request was found, 0 otherwise.
				1168	*/
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1169	static int as_dispatch_request(request_queue_t *q, int force)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1170	{
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1171	struct as_data *ad = q->elevator->elevator_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1172	struct as_rq *arq;
				1173	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
				1174	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
				1175
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1176	if (unlikely(force)) {
				1177	/*
				1178	* Forced dispatch, accounting is useless. Reset
				1179	* accounting states and dump fifo_lists. Note that
				1180	* batch_data_dir is reset to REQ_SYNC to avoid
				1181	* screwing write batch accounting as write batch
				1182	* accounting occurs on W->R transition.
				1183	*/
				1184	int dispatched = 0;
				1185
				1186	ad->batch_data_dir = REQ_SYNC;
				1187	ad->changed_batch = 0;
				1188	ad->new_batch = 0;
				1189
				1190	while (ad->next_arq[REQ_SYNC]) {
				1191	as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
				1192	dispatched++;
				1193	}
				1194	ad->last_check_fifo[REQ_SYNC] = jiffies;
				1195
				1196	while (ad->next_arq[REQ_ASYNC]) {
				1197	as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
				1198	dispatched++;
				1199	}
				1200	ad->last_check_fifo[REQ_ASYNC] = jiffies;
				1201
				1202	return dispatched;
				1203	}
				1204
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1205	/* Signal that the write batch was uncontended, so we can't time it */
				1206	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
				1207	if (ad->current_write_count == 0 \|\| !writes)
				1208	ad->write_batch_idled = 1;
				1209	}
				1210
				1211	if (!(reads \|\| writes)
				1212	\|\| ad->antic_status == ANTIC_WAIT_REQ
				1213	\|\| ad->antic_status == ANTIC_WAIT_NEXT
				1214	\|\| ad->changed_batch)
				1215	return 0;
				1216
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1217	if (!(reads && writes && as_batch_expired(ad))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1218	/*
				1219	* batch is still running or no reads or no writes
				1220	*/
				1221	arq = ad->next_arq[ad->batch_data_dir];
				1222
				1223	if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
				1224	if (as_fifo_expired(ad, REQ_SYNC))
				1225	goto fifo_expired;
				1226
				1227	if (as_can_anticipate(ad, arq)) {
				1228	as_antic_waitreq(ad);
				1229	return 0;
				1230	}
				1231	}
				1232
				1233	if (arq) {
				1234	/* we have a "next request" */
				1235	if (reads && !writes)
				1236	ad->current_batch_expires =
				1237	jiffies + ad->batch_expire[REQ_SYNC];
				1238	goto dispatch_request;
				1239	}
				1240	}
				1241
				1242	/*
				1243	* at this point we are not running a batch. select the appropriate
				1244	* data direction (read / write)
				1245	*/
				1246
				1247	if (reads) {
				1248	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC]));
				1249
				1250	if (writes && ad->batch_data_dir == REQ_SYNC)
				1251	/*
				1252	* Last batch was a read, switch to writes
				1253	*/
				1254	goto dispatch_writes;
				1255
				1256	if (ad->batch_data_dir == REQ_ASYNC) {
				1257	WARN_ON(ad->new_batch);
				1258	ad->changed_batch = 1;
				1259	}
				1260	ad->batch_data_dir = REQ_SYNC;
				1261	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1262	ad->last_check_fifo[ad->batch_data_dir] = jiffies;
				1263	goto dispatch_request;
				1264	}
				1265
				1266	/*
				1267	* the last batch was a read
				1268	*/
				1269
				1270	if (writes) {
				1271	dispatch_writes:
				1272	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC]));
				1273
				1274	if (ad->batch_data_dir == REQ_SYNC) {
				1275	ad->changed_batch = 1;
				1276
				1277	/*
				1278	* new_batch might be 1 when the queue runs out of
				1279	* reads. A subsequent submission of a write might
				1280	* cause a change of batch before the read is finished.
				1281	*/
				1282	ad->new_batch = 0;
				1283	}
				1284	ad->batch_data_dir = REQ_ASYNC;
				1285	ad->current_write_count = ad->write_batch_count;
				1286	ad->write_batch_idled = 0;
				1287	arq = ad->next_arq[ad->batch_data_dir];
				1288	goto dispatch_request;
				1289	}
				1290
				1291	BUG();
				1292	return 0;
				1293
				1294	dispatch_request:
				1295	/*
				1296	* If a request has expired, service it.
				1297	*/
				1298
				1299	if (as_fifo_expired(ad, ad->batch_data_dir)) {
				1300	fifo_expired:
				1301	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1302	BUG_ON(arq == NULL);
				1303	}
				1304
				1305	if (ad->changed_batch) {
				1306	WARN_ON(ad->new_batch);
				1307
				1308	if (ad->nr_dispatched)
				1309	return 0;
				1310
				1311	if (ad->batch_data_dir == REQ_ASYNC)
				1312	ad->current_batch_expires = jiffies +
				1313	ad->batch_expire[REQ_ASYNC];
				1314	else
				1315	ad->new_batch = 1;
				1316
				1317	ad->changed_batch = 0;
				1318	}
				1319
				1320	/*
				1321	* arq is the selected appropriate request.
				1322	*/
				1323	as_move_to_dispatch(ad, arq);
				1324
				1325	return 1;
				1326	}
				1327
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1328	/*
				1329	* Add arq to a list behind alias
				1330	*/
				1331	static inline void
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1332	as_add_aliased_request(struct as_data ad, struct as_rq arq,
				1333	struct as_rq *alias)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1334	{
				1335	struct request *req = arq->request;
				1336	struct list_head *insert = alias->request->queuelist.prev;
				1337
				1338	/*
				1339	* Transfer list of aliases
				1340	*/
				1341	while (!list_empty(&req->queuelist)) {
				1342	struct request *__rq = list_entry_rq(req->queuelist.next);
				1343	struct as_rq *__arq = RQ_DATA(__rq);
				1344
				1345	list_move_tail(&__rq->queuelist, &alias->request->queuelist);
				1346
				1347	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1348	}
				1349
				1350	/*
				1351	* Another request with the same start sector on the rbtree.
				1352	* Link this request to that sector. They are untangled in
				1353	* as_move_to_dispatch
				1354	*/
				1355	list_add(&arq->request->queuelist, insert);
				1356
				1357	/*
				1358	* Don't want to have to handle merges.
				1359	*/
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame]	1360	as_del_arq_hash(arq);
Tejun Heo	47e627c	2005-10-29 18:18:42 +0900	[diff] [blame]	1361	arq->request->flags \|= REQ_NOMERGE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1362	}
				1363
				1364	/*
				1365	* add arq to rbtree and fifo
				1366	*/
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1367	static void as_add_request(request_queue_t q, struct request rq)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1368	{
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1369	struct as_data *ad = q->elevator->elevator_data;
				1370	struct as_rq *arq = RQ_DATA(rq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1371	struct as_rq *alias;
				1372	int data_dir;
				1373
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1374	arq->state = AS_RQ_NEW;
				1375
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1376	if (rq_data_dir(arq->request) == READ
				1377	\|\| current->flags&PF_SYNCWRITE)
				1378	arq->is_sync = 1;
				1379	else
				1380	arq->is_sync = 0;
				1381	data_dir = arq->is_sync;
				1382
				1383	arq->io_context = as_get_io_context();
				1384
				1385	if (arq->io_context) {
				1386	as_update_iohist(ad, arq->io_context->aic, arq->request);
				1387	atomic_inc(&arq->io_context->aic->nr_queued);
				1388	}
				1389
				1390	alias = as_add_arq_rb(ad, arq);
				1391	if (!alias) {
				1392	/*
				1393	* set expire time (only used for reads) and add to fifo list
				1394	*/
				1395	arq->expires = jiffies + ad->fifo_expire[data_dir];
				1396	list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
				1397
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame]	1398	if (rq_mergeable(arq->request))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1399	as_add_arq_hash(ad, arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1400	as_update_arq(ad, arq); /* keep state machine up to date */
				1401
				1402	} else {
				1403	as_add_aliased_request(ad, arq, alias);
				1404
				1405	/*
				1406	* have we been anticipating this request?
				1407	* or does it come from the same process as the one we are
				1408	* anticipating for?
				1409	*/
				1410	if (ad->antic_status == ANTIC_WAIT_REQ
				1411	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				1412	if (as_can_break_anticipation(ad, arq))
				1413	as_antic_stop(ad);
				1414	}
				1415	}
				1416
				1417	arq->state = AS_RQ_QUEUED;
				1418	}
				1419
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1420	static void as_activate_request(request_queue_t q, struct request rq)
				1421	{
				1422	struct as_rq *arq = RQ_DATA(rq);
				1423
				1424	WARN_ON(arq->state != AS_RQ_DISPATCHED);
				1425	arq->state = AS_RQ_REMOVED;
				1426	if (arq->io_context && arq->io_context->aic)
				1427	atomic_dec(&arq->io_context->aic->nr_dispatched);
				1428	}
				1429
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1430	static void as_deactivate_request(request_queue_t q, struct request rq)
				1431	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1432	struct as_rq *arq = RQ_DATA(rq);
				1433
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1434	WARN_ON(arq->state != AS_RQ_REMOVED);
				1435	arq->state = AS_RQ_DISPATCHED;
				1436	if (arq->io_context && arq->io_context->aic)
				1437	atomic_inc(&arq->io_context->aic->nr_dispatched);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1438	}
				1439
				1440	/*
				1441	* as_queue_empty tells us if there are requests left in the device. It may
				1442	* not be the case that a driver can get the next request even if the queue
				1443	* is not empty - it is used in the block layer to check for plugging and
				1444	* merging opportunities
				1445	*/
				1446	static int as_queue_empty(request_queue_t *q)
				1447	{
				1448	struct as_data *ad = q->elevator->elevator_data;
				1449
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1450	return list_empty(&ad->fifo_list[REQ_ASYNC])
				1451	&& list_empty(&ad->fifo_list[REQ_SYNC]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1452	}
				1453
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1454	static struct request as_former_request(request_queue_t q,
				1455	struct request *rq)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1456	{
				1457	struct as_rq *arq = RQ_DATA(rq);
				1458	struct rb_node *rbprev = rb_prev(&arq->rb_node);
				1459	struct request *ret = NULL;
				1460
				1461	if (rbprev)
				1462	ret = rb_entry_arq(rbprev)->request;
				1463
				1464	return ret;
				1465	}
				1466
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1467	static struct request as_latter_request(request_queue_t q,
				1468	struct request *rq)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1469	{
				1470	struct as_rq *arq = RQ_DATA(rq);
				1471	struct rb_node *rbnext = rb_next(&arq->rb_node);
				1472	struct request *ret = NULL;
				1473
				1474	if (rbnext)
				1475	ret = rb_entry_arq(rbnext)->request;
				1476
				1477	return ret;
				1478	}
				1479
				1480	static int
				1481	as_merge(request_queue_t q, struct request req, struct bio bio)
				1482	{
				1483	struct as_data *ad = q->elevator->elevator_data;
				1484	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
				1485	struct request *__rq;
				1486	int ret;
				1487
				1488	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1489	* see if the merge hash can satisfy a back merge
				1490	*/
				1491	__rq = as_find_arq_hash(ad, bio->bi_sector);
				1492	if (__rq) {
				1493	BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
				1494
				1495	if (elv_rq_merge_ok(__rq, bio)) {
				1496	ret = ELEVATOR_BACK_MERGE;
				1497	goto out;
				1498	}
				1499	}
				1500
				1501	/*
				1502	* check for front merge
				1503	*/
				1504	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
				1505	if (__rq) {
				1506	BUG_ON(rb_key != rq_rb_key(__rq));
				1507
				1508	if (elv_rq_merge_ok(__rq, bio)) {
				1509	ret = ELEVATOR_FRONT_MERGE;
				1510	goto out;
				1511	}
				1512	}
				1513
				1514	return ELEVATOR_NO_MERGE;
				1515	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1516	if (ret) {
				1517	if (rq_mergeable(__rq))
				1518	as_hot_arq_hash(ad, RQ_DATA(__rq));
				1519	}
				1520	*req = __rq;
				1521	return ret;
				1522	}
				1523
				1524	static void as_merged_request(request_queue_t q, struct request req)
				1525	{
				1526	struct as_data *ad = q->elevator->elevator_data;
				1527	struct as_rq *arq = RQ_DATA(req);
				1528
				1529	/*
				1530	* hash always needs to be repositioned, key is end sector
				1531	*/
				1532	as_del_arq_hash(arq);
				1533	as_add_arq_hash(ad, arq);
				1534
				1535	/*
				1536	* if the merge was a front merge, we need to reposition request
				1537	*/
				1538	if (rq_rb_key(req) != arq->rb_key) {
				1539	struct as_rq alias, next_arq = NULL;
				1540
				1541	if (ad->next_arq[arq->is_sync] == arq)
				1542	next_arq = as_find_next_arq(ad, arq);
				1543
				1544	/*
				1545	* Note! We should really be moving any old aliased requests
				1546	* off this request and try to insert them into the rbtree. We
				1547	* currently don't bother. Ditto the next function.
				1548	*/
				1549	as_del_arq_rb(ad, arq);
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1550	if ((alias = as_add_arq_rb(ad, arq))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1551	list_del_init(&arq->fifo);
				1552	as_add_aliased_request(ad, arq, alias);
				1553	if (next_arq)
				1554	ad->next_arq[arq->is_sync] = next_arq;
				1555	}
				1556	/*
				1557	* Note! At this stage of this and the next function, our next
				1558	* request may not be optimal - eg the request may have "grown"
				1559	* behind the disk head. We currently don't bother adjusting.
				1560	*/
				1561	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1562	}
				1563
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1564	static void as_merged_requests(request_queue_t q, struct request req,
				1565	struct request *next)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1566	{
				1567	struct as_data *ad = q->elevator->elevator_data;
				1568	struct as_rq *arq = RQ_DATA(req);
				1569	struct as_rq *anext = RQ_DATA(next);
				1570
				1571	BUG_ON(!arq);
				1572	BUG_ON(!anext);
				1573
				1574	/*
				1575	* reposition arq (this is the merged request) in hash, and in rbtree
				1576	* in case of a front merge
				1577	*/
				1578	as_del_arq_hash(arq);
				1579	as_add_arq_hash(ad, arq);
				1580
				1581	if (rq_rb_key(req) != arq->rb_key) {
				1582	struct as_rq alias, next_arq = NULL;
				1583
				1584	if (ad->next_arq[arq->is_sync] == arq)
				1585	next_arq = as_find_next_arq(ad, arq);
				1586
				1587	as_del_arq_rb(ad, arq);
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1588	if ((alias = as_add_arq_rb(ad, arq))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1589	list_del_init(&arq->fifo);
				1590	as_add_aliased_request(ad, arq, alias);
				1591	if (next_arq)
				1592	ad->next_arq[arq->is_sync] = next_arq;
				1593	}
				1594	}
				1595
				1596	/*
				1597	* if anext expires before arq, assign its expire time to arq
				1598	* and move into anext position (anext will be deleted) in fifo
				1599	*/
				1600	if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
				1601	if (time_before(anext->expires, arq->expires)) {
				1602	list_move(&arq->fifo, &anext->fifo);
				1603	arq->expires = anext->expires;
				1604	/*
				1605	* Don't copy here but swap, because when anext is
				1606	* removed below, it must contain the unused context
				1607	*/
				1608	swap_io_context(&arq->io_context, &anext->io_context);
				1609	}
				1610	}
				1611
				1612	/*
				1613	* Transfer list of aliases
				1614	*/
				1615	while (!list_empty(&next->queuelist)) {
				1616	struct request *__rq = list_entry_rq(next->queuelist.next);
				1617	struct as_rq *__arq = RQ_DATA(__rq);
				1618
				1619	list_move_tail(&__rq->queuelist, &req->queuelist);
				1620
				1621	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1622	}
				1623
				1624	/*
				1625	* kill knowledge of next, this one is a goner
				1626	*/
				1627	as_remove_queued_request(q, next);
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1628	as_put_io_context(anext);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1629
				1630	anext->state = AS_RQ_MERGED;
				1631	}
				1632
				1633	/*
				1634	* This is executed in a "deferred" process context, by kblockd. It calls the
				1635	* driver's request_fn so the driver can submit that request.
				1636	*
				1637	* IMPORTANT! This guy will reenter the elevator, so set up all queue global
				1638	* state before calling, and don't rely on any state over calls.
				1639	*
				1640	* FIXME! dispatch queue is not a queue at all!
				1641	*/
				1642	static void as_work_handler(void *data)
				1643	{
				1644	struct request_queue *q = data;
				1645	unsigned long flags;
				1646
				1647	spin_lock_irqsave(q->queue_lock, flags);
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1648	if (!as_queue_empty(q))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1649	q->request_fn(q);
				1650	spin_unlock_irqrestore(q->queue_lock, flags);
				1651	}
				1652
				1653	static void as_put_request(request_queue_t q, struct request rq)
				1654	{
				1655	struct as_data *ad = q->elevator->elevator_data;
				1656	struct as_rq *arq = RQ_DATA(rq);
				1657
				1658	if (!arq) {
				1659	WARN_ON(1);
				1660	return;
				1661	}
				1662
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1663	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
				1664	arq->state != AS_RQ_PRESCHED &&
				1665	arq->state != AS_RQ_MERGED)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1666	printk("arq->state %d\n", arq->state);
				1667	WARN_ON(1);
				1668	}
				1669
				1670	mempool_free(arq, ad->arq_pool);
				1671	rq->elevator_private = NULL;
				1672	}
				1673
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1674	static int as_set_request(request_queue_t q, struct request rq,
Al Viro	8267e26	2005-10-21 03:20:53 -0400	[diff] [blame]	1675	struct bio *bio, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1676	{
				1677	struct as_data *ad = q->elevator->elevator_data;
				1678	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
				1679
				1680	if (arq) {
				1681	memset(arq, 0, sizeof(*arq));
				1682	RB_CLEAR(&arq->rb_node);
				1683	arq->request = rq;
				1684	arq->state = AS_RQ_PRESCHED;
				1685	arq->io_context = NULL;
				1686	INIT_LIST_HEAD(&arq->hash);
				1687	arq->on_hash = 0;
				1688	INIT_LIST_HEAD(&arq->fifo);
				1689	rq->elevator_private = arq;
				1690	return 0;
				1691	}
				1692
				1693	return 1;
				1694	}
				1695
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1696	static int as_may_queue(request_queue_t q, int rw, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1697	{
				1698	int ret = ELV_MQUEUE_MAY;
				1699	struct as_data *ad = q->elevator->elevator_data;
				1700	struct io_context *ioc;
				1701	if (ad->antic_status == ANTIC_WAIT_REQ \|\|
				1702	ad->antic_status == ANTIC_WAIT_NEXT) {
				1703	ioc = as_get_io_context();
				1704	if (ad->io_context == ioc)
				1705	ret = ELV_MQUEUE_MUST;
				1706	put_io_context(ioc);
				1707	}
				1708
				1709	return ret;
				1710	}
				1711
				1712	static void as_exit_queue(elevator_t *e)
				1713	{
				1714	struct as_data *ad = e->elevator_data;
				1715
				1716	del_timer_sync(&ad->antic_timer);
				1717	kblockd_flush();
				1718
				1719	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
				1720	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
				1721
				1722	mempool_destroy(ad->arq_pool);
				1723	put_io_context(ad->io_context);
				1724	kfree(ad->hash);
				1725	kfree(ad);
				1726	}
				1727
				1728	/*
				1729	* initialize elevator private data (as_data), and alloc a arq for
				1730	* each request on the free lists
				1731	*/
				1732	static int as_init_queue(request_queue_t q, elevator_t e)
				1733	{
				1734	struct as_data *ad;
				1735	int i;
				1736
				1737	if (!arq_pool)
				1738	return -ENOMEM;
				1739
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1740	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1741	if (!ad)
				1742	return -ENOMEM;
				1743	memset(ad, 0, sizeof(*ad));
				1744
				1745	ad->q = q; /* Identify what queue the data belongs to */
				1746
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1747	ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES,
				1748	GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1749	if (!ad->hash) {
				1750	kfree(ad);
				1751	return -ENOMEM;
				1752	}
				1753
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1754	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
				1755	mempool_free_slab, arq_pool, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1756	if (!ad->arq_pool) {
				1757	kfree(ad->hash);
				1758	kfree(ad);
				1759	return -ENOMEM;
				1760	}
				1761
				1762	/* anticipatory scheduling helpers */
				1763	ad->antic_timer.function = as_antic_timeout;
				1764	ad->antic_timer.data = (unsigned long)q;
				1765	init_timer(&ad->antic_timer);
				1766	INIT_WORK(&ad->antic_work, as_work_handler, q);
				1767
				1768	for (i = 0; i < AS_HASH_ENTRIES; i++)
				1769	INIT_LIST_HEAD(&ad->hash[i]);
				1770
				1771	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
				1772	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
				1773	ad->sort_list[REQ_SYNC] = RB_ROOT;
				1774	ad->sort_list[REQ_ASYNC] = RB_ROOT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1775	ad->fifo_expire[REQ_SYNC] = default_read_expire;
				1776	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
				1777	ad->antic_expire = default_antic_expire;
				1778	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
				1779	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
				1780	e->elevator_data = ad;
				1781
				1782	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
				1783	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
				1784	if (ad->write_batch_count < 2)
				1785	ad->write_batch_count = 2;
				1786
				1787	return 0;
				1788	}
				1789
				1790	/*
				1791	* sysfs parts below
				1792	*/
				1793	struct as_fs_entry {
				1794	struct attribute attr;
				1795	ssize_t (show)(struct as_data , char *);
				1796	ssize_t (store)(struct as_data , const char *, size_t);
				1797	};
				1798
				1799	static ssize_t
				1800	as_var_show(unsigned int var, char *page)
				1801	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1802	return sprintf(page, "%d\n", var);
				1803	}
				1804
				1805	static ssize_t
				1806	as_var_store(unsigned long var, const char page, size_t count)
				1807	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1808	char p = (char ) page;
				1809
Jens Axboe	c9b3ad6	2005-07-27 11:43:37 -0700	[diff] [blame]	1810	*var = simple_strtoul(p, &p, 10);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1811	return count;
				1812	}
				1813
				1814	static ssize_t as_est_show(struct as_data ad, char page)
				1815	{
				1816	int pos = 0;
				1817
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1818	pos += sprintf(page+pos, "%lu %% exit probability\n",
				1819	100*ad->exit_prob/256);
				1820	pos += sprintf(page+pos, "%lu %% probability of exiting without a "
				1821	"cooperating process submitting IO\n",
				1822	100*ad->exit_no_coop/256);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1823	pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean);
Nick Piggin	f5b3db0	2005-11-07 00:59:53 -0800	[diff] [blame]	1824	pos += sprintf(page+pos, "%llu sectors new seek distance\n",
				1825	(unsigned long long)ad->new_seek_mean);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1826
				1827	return pos;
				1828	}
				1829
				1830	#define SHOW_FUNCTION(__FUNC, __VAR) \
				1831	static ssize_t __FUNC(struct as_data ad, char page) \
				1832	{ \
				1833	return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
				1834	}
				1835	SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]);
				1836	SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]);
				1837	SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire);
				1838	SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]);
				1839	SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]);
				1840	#undef SHOW_FUNCTION
				1841
				1842	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
				1843	static ssize_t __FUNC(struct as_data ad, const char page, size_t count) \
				1844	{ \
				1845	int ret = as_var_store(__PTR, (page), count); \
				1846	if (*(__PTR) < (MIN)) \
				1847	*(__PTR) = (MIN); \
				1848	else if (*(__PTR) > (MAX)) \
				1849	*(__PTR) = (MAX); \
				1850	(__PTR) = msecs_to_jiffies((__PTR)); \
				1851	return ret; \
				1852	}
				1853	STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
				1854	STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
				1855	STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX);
				1856	STORE_FUNCTION(as_read_batchexpire_store,
				1857	&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
				1858	STORE_FUNCTION(as_write_batchexpire_store,
				1859	&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
				1860	#undef STORE_FUNCTION
				1861
				1862	static struct as_fs_entry as_est_entry = {
				1863	.attr = {.name = "est_time", .mode = S_IRUGO },
				1864	.show = as_est_show,
				1865	};
				1866	static struct as_fs_entry as_readexpire_entry = {
				1867	.attr = {.name = "read_expire", .mode = S_IRUGO \| S_IWUSR },
				1868	.show = as_readexpire_show,
				1869	.store = as_readexpire_store,
				1870	};
				1871	static struct as_fs_entry as_writeexpire_entry = {
				1872	.attr = {.name = "write_expire", .mode = S_IRUGO \| S_IWUSR },
				1873	.show = as_writeexpire_show,
				1874	.store = as_writeexpire_store,
				1875	};
				1876	static struct as_fs_entry as_anticexpire_entry = {
				1877	.attr = {.name = "antic_expire", .mode = S_IRUGO \| S_IWUSR },
				1878	.show = as_anticexpire_show,
				1879	.store = as_anticexpire_store,
				1880	};
				1881	static struct as_fs_entry as_read_batchexpire_entry = {
				1882	.attr = {.name = "read_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				1883	.show = as_read_batchexpire_show,
				1884	.store = as_read_batchexpire_store,
				1885	};
				1886	static struct as_fs_entry as_write_batchexpire_entry = {
				1887	.attr = {.name = "write_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				1888	.show = as_write_batchexpire_show,
				1889	.store = as_write_batchexpire_store,
				1890	};
				1891
				1892	static struct attribute *default_attrs[] = {
				1893	&as_est_entry.attr,
				1894	&as_readexpire_entry.attr,
				1895	&as_writeexpire_entry.attr,
				1896	&as_anticexpire_entry.attr,
				1897	&as_read_batchexpire_entry.attr,
				1898	&as_write_batchexpire_entry.attr,
				1899	NULL,
				1900	};
				1901
				1902	#define to_as(atr) container_of((atr), struct as_fs_entry, attr)
				1903
				1904	static ssize_t
				1905	as_attr_show(struct kobject kobj, struct attribute attr, char *page)
				1906	{
				1907	elevator_t *e = container_of(kobj, elevator_t, kobj);
				1908	struct as_fs_entry *entry = to_as(attr);
				1909
				1910	if (!entry->show)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	1911	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1912
				1913	return entry->show(e->elevator_data, page);
				1914	}
				1915
				1916	static ssize_t
				1917	as_attr_store(struct kobject kobj, struct attribute attr,
				1918	const char *page, size_t length)
				1919	{
				1920	elevator_t *e = container_of(kobj, elevator_t, kobj);
				1921	struct as_fs_entry *entry = to_as(attr);
				1922
				1923	if (!entry->store)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	1924	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1925
				1926	return entry->store(e->elevator_data, page, length);
				1927	}
				1928
				1929	static struct sysfs_ops as_sysfs_ops = {
				1930	.show = as_attr_show,
				1931	.store = as_attr_store,
				1932	};
				1933
				1934	static struct kobj_type as_ktype = {
				1935	.sysfs_ops = &as_sysfs_ops,
				1936	.default_attrs = default_attrs,
				1937	};
				1938
				1939	static struct elevator_type iosched_as = {
				1940	.ops = {
				1941	.elevator_merge_fn = as_merge,
				1942	.elevator_merged_fn = as_merged_request,
				1943	.elevator_merge_req_fn = as_merged_requests,
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1944	.elevator_dispatch_fn = as_dispatch_request,
				1945	.elevator_add_req_fn = as_add_request,
				1946	.elevator_activate_req_fn = as_activate_request,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1947	.elevator_deactivate_req_fn = as_deactivate_request,
				1948	.elevator_queue_empty_fn = as_queue_empty,
				1949	.elevator_completed_req_fn = as_completed_request,
				1950	.elevator_former_req_fn = as_former_request,
				1951	.elevator_latter_req_fn = as_latter_request,
				1952	.elevator_set_req_fn = as_set_request,
				1953	.elevator_put_req_fn = as_put_request,
				1954	.elevator_may_queue_fn = as_may_queue,
				1955	.elevator_init_fn = as_init_queue,
				1956	.elevator_exit_fn = as_exit_queue,
				1957	},
				1958
				1959	.elevator_ktype = &as_ktype,
				1960	.elevator_name = "anticipatory",
				1961	.elevator_owner = THIS_MODULE,
				1962	};
				1963
				1964	static int __init as_init(void)
				1965	{
				1966	int ret;
				1967
				1968	arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
				1969	0, 0, NULL, NULL);
				1970	if (!arq_pool)
				1971	return -ENOMEM;
				1972
				1973	ret = elv_register(&iosched_as);
				1974	if (!ret) {
				1975	/*
				1976	* don't allow AS to get unregistered, since we would have
				1977	* to browse all tasks in the system and release their
				1978	* as_io_context first
				1979	*/
				1980	__module_get(THIS_MODULE);
				1981	return 0;
				1982	}
				1983
				1984	kmem_cache_destroy(arq_pool);
				1985	return ret;
				1986	}
				1987
				1988	static void __exit as_exit(void)
				1989	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1990	elv_unregister(&iosched_as);
Christoph Hellwig	83521d3	2005-10-30 15:01:39 -0800	[diff] [blame]	1991	kmem_cache_destroy(arq_pool);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1992	}
				1993
				1994	module_init(as_init);
				1995	module_exit(as_exit);
				1996
				1997	MODULE_AUTHOR("Nick Piggin");
				1998	MODULE_LICENSE("GPL");
				1999	MODULE_DESCRIPTION("anticipatory IO scheduler");