Blame - drivers/block/as-iosched.c - kernel/msm-4.9

blob: 95c0a3690b0ffc15618acb6dfcae29f81234d846 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/drivers/block/as-iosched.c
				3	*
				4	* Anticipatory & deadline i/o scheduler.
				5	*
				6	* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
				7	* Nick Piggin <piggin@cyberone.com.au>
				8	*
				9	*/
				10	#include <linux/kernel.h>
				11	#include <linux/fs.h>
				12	#include <linux/blkdev.h>
				13	#include <linux/elevator.h>
				14	#include <linux/bio.h>
				15	#include <linux/config.h>
				16	#include <linux/module.h>
				17	#include <linux/slab.h>
				18	#include <linux/init.h>
				19	#include <linux/compiler.h>
				20	#include <linux/hash.h>
				21	#include <linux/rbtree.h>
				22	#include <linux/interrupt.h>
				23
				24	#define REQ_SYNC 1
				25	#define REQ_ASYNC 0
				26
				27	/*
				28	* See Documentation/block/as-iosched.txt
				29	*/
				30
				31	/*
				32	* max time before a read is submitted.
				33	*/
				34	#define default_read_expire (HZ / 8)
				35
				36	/*
				37	* ditto for writes, these limits are not hard, even
				38	* if the disk is capable of satisfying them.
				39	*/
				40	#define default_write_expire (HZ / 4)
				41
				42	/*
				43	* read_batch_expire describes how long we will allow a stream of reads to
				44	* persist before looking to see whether it is time to switch over to writes.
				45	*/
				46	#define default_read_batch_expire (HZ / 2)
				47
				48	/*
				49	* write_batch_expire describes how long we want a stream of writes to run for.
				50	* This is not a hard limit, but a target we set for the auto-tuning thingy.
				51	* See, the problem is: we can send a lot of writes to disk cache / TCQ in
				52	* a short amount of time...
				53	*/
				54	#define default_write_batch_expire (HZ / 8)
				55
				56	/*
				57	* max time we may wait to anticipate a read (default around 6ms)
				58	*/
				59	#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
				60
				61	/*
				62	* Keep track of up to 20ms thinktimes. We can go as big as we like here,
				63	* however huge values tend to interfere and not decay fast enough. A program
				64	* might be in a non-io phase of operation. Waiting on user input for example,
				65	* or doing a lengthy computation. A small penalty can be justified there, and
				66	* will still catch out those processes that constantly have large thinktimes.
				67	*/
				68	#define MAX_THINKTIME (HZ/50UL)
				69
				70	/* Bits in as_io_context.state */
				71	enum as_io_states {
				72	AS_TASK_RUNNING=0, /* Process has not exitted */
				73	AS_TASK_IOSTARTED, /* Process has started some IO */
				74	AS_TASK_IORUNNING, /* Process has completed some IO */
				75	};
				76
				77	enum anticipation_status {
				78	ANTIC_OFF=0, /* Not anticipating (normal operation) */
				79	ANTIC_WAIT_REQ, /* The last read has not yet completed */
				80	ANTIC_WAIT_NEXT, /* Currently anticipating a request vs
				81	last read (which has completed) */
				82	ANTIC_FINISHED, /* Anticipating but have found a candidate
				83	* or timed out */
				84	};
				85
				86	struct as_data {
				87	/*
				88	* run time data
				89	*/
				90
				91	struct request_queue q; / the "owner" queue */
				92
				93	/*
				94	* requests (as_rq s) are present on both sort_list and fifo_list
				95	*/
				96	struct rb_root sort_list[2];
				97	struct list_head fifo_list[2];
				98
				99	struct as_rq next_arq[2]; / next in sort order */
				100	sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
				101	struct list_head dispatch; / driver dispatch queue */
				102	struct list_head hash; / request hash */
				103
				104	unsigned long exit_prob; /* probability a task will exit while
				105	being waited on */
				106	unsigned long new_ttime_total; /* mean thinktime on new proc */
				107	unsigned long new_ttime_mean;
				108	u64 new_seek_total; /* mean seek on new proc */
				109	sector_t new_seek_mean;
				110
				111	unsigned long current_batch_expires;
				112	unsigned long last_check_fifo[2];
				113	int changed_batch; /* 1: waiting for old batch to end */
				114	int new_batch; /* 1: waiting on first read complete */
				115	int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
				116	int write_batch_count; /* max # of reqs in a write batch */
				117	int current_write_count; /* how many requests left this batch */
				118	int write_batch_idled; /* has the write batch gone idle? */
				119	mempool_t *arq_pool;
				120
				121	enum anticipation_status antic_status;
				122	unsigned long antic_start; /* jiffies: when it started */
				123	struct timer_list antic_timer; /* anticipatory scheduling timer */
				124	struct work_struct antic_work; /* Deferred unplugging */
				125	struct io_context io_context; / Identify the expected process */
				126	int ioc_finished; /* IO associated with io_context is finished */
				127	int nr_dispatched;
				128
				129	/*
				130	* settings that change how the i/o scheduler behaves
				131	*/
				132	unsigned long fifo_expire[2];
				133	unsigned long batch_expire[2];
				134	unsigned long antic_expire;
				135	};
				136
				137	#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
				138
				139	/*
				140	* per-request data.
				141	*/
				142	enum arq_state {
				143	AS_RQ_NEW=0, /* New - not referenced and not on any lists */
				144	AS_RQ_QUEUED, /* In the request queue. It belongs to the
				145	scheduler */
				146	AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
				147	driver now */
				148	AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
				149	AS_RQ_REMOVED,
				150	AS_RQ_MERGED,
				151	AS_RQ_POSTSCHED, /* when they shouldn't be */
				152	};
				153
				154	struct as_rq {
				155	/*
				156	* rbtree index, key is the starting offset
				157	*/
				158	struct rb_node rb_node;
				159	sector_t rb_key;
				160
				161	struct request *request;
				162
				163	struct io_context io_context; / The submitting task */
				164
				165	/*
				166	* request hash, key is the ending offset (for back merge lookup)
				167	*/
				168	struct list_head hash;
				169	unsigned int on_hash;
				170
				171	/*
				172	* expire fifo
				173	*/
				174	struct list_head fifo;
				175	unsigned long expires;
				176
				177	unsigned int is_sync;
				178	enum arq_state state;
				179	};
				180
				181	#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
				182
				183	static kmem_cache_t *arq_pool;
				184
				185	/*
				186	* IO Context helper functions
				187	*/
				188
				189	/* Called to deallocate the as_io_context */
				190	static void free_as_io_context(struct as_io_context *aic)
				191	{
				192	kfree(aic);
				193	}
				194
				195	/* Called when the task exits */
				196	static void exit_as_io_context(struct as_io_context *aic)
				197	{
				198	WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));
				199	clear_bit(AS_TASK_RUNNING, &aic->state);
				200	}
				201
				202	static struct as_io_context *alloc_as_io_context(void)
				203	{
				204	struct as_io_context *ret;
				205
				206	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
				207	if (ret) {
				208	ret->dtor = free_as_io_context;
				209	ret->exit = exit_as_io_context;
				210	ret->state = 1 << AS_TASK_RUNNING;
				211	atomic_set(&ret->nr_queued, 0);
				212	atomic_set(&ret->nr_dispatched, 0);
				213	spin_lock_init(&ret->lock);
				214	ret->ttime_total = 0;
				215	ret->ttime_samples = 0;
				216	ret->ttime_mean = 0;
				217	ret->seek_total = 0;
				218	ret->seek_samples = 0;
				219	ret->seek_mean = 0;
				220	}
				221
				222	return ret;
				223	}
				224
				225	/*
				226	* If the current task has no AS IO context then create one and initialise it.
				227	* Then take a ref on the task's io context and return it.
				228	*/
				229	static struct io_context *as_get_io_context(void)
				230	{
				231	struct io_context *ioc = get_io_context(GFP_ATOMIC);
				232	if (ioc && !ioc->aic) {
				233	ioc->aic = alloc_as_io_context();
				234	if (!ioc->aic) {
				235	put_io_context(ioc);
				236	ioc = NULL;
				237	}
				238	}
				239	return ioc;
				240	}
				241
				242	/*
				243	* the back merge hash support functions
				244	*/
				245	static const int as_hash_shift = 6;
				246	#define AS_HASH_BLOCK(sec) ((sec) >> 3)
				247	#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
				248	#define AS_HASH_ENTRIES (1 << as_hash_shift)
				249	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
				250	#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash)
				251
				252	static inline void __as_del_arq_hash(struct as_rq *arq)
				253	{
				254	arq->on_hash = 0;
				255	list_del_init(&arq->hash);
				256	}
				257
				258	static inline void as_del_arq_hash(struct as_rq *arq)
				259	{
				260	if (arq->on_hash)
				261	__as_del_arq_hash(arq);
				262	}
				263
				264	static void as_remove_merge_hints(request_queue_t q, struct as_rq arq)
				265	{
				266	as_del_arq_hash(arq);
				267
				268	if (q->last_merge == arq->request)
				269	q->last_merge = NULL;
				270	}
				271
				272	static void as_add_arq_hash(struct as_data ad, struct as_rq arq)
				273	{
				274	struct request *rq = arq->request;
				275
				276	BUG_ON(arq->on_hash);
				277
				278	arq->on_hash = 1;
				279	list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
				280	}
				281
				282	/*
				283	* move hot entry to front of chain
				284	*/
				285	static inline void as_hot_arq_hash(struct as_data ad, struct as_rq arq)
				286	{
				287	struct request *rq = arq->request;
				288	struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
				289
				290	if (!arq->on_hash) {
				291	WARN_ON(1);
				292	return;
				293	}
				294
				295	if (arq->hash.prev != head) {
				296	list_del(&arq->hash);
				297	list_add(&arq->hash, head);
				298	}
				299	}
				300
				301	static struct request as_find_arq_hash(struct as_data ad, sector_t offset)
				302	{
				303	struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
				304	struct list_head entry, next = hash_list->next;
				305
				306	while ((entry = next) != hash_list) {
				307	struct as_rq *arq = list_entry_hash(entry);
				308	struct request *__rq = arq->request;
				309
				310	next = entry->next;
				311
				312	BUG_ON(!arq->on_hash);
				313
				314	if (!rq_mergeable(__rq)) {
				315	as_remove_merge_hints(ad->q, arq);
				316	continue;
				317	}
				318
				319	if (rq_hash_key(__rq) == offset)
				320	return __rq;
				321	}
				322
				323	return NULL;
				324	}
				325
				326	/*
				327	* rb tree support functions
				328	*/
				329	#define RB_NONE (2)
				330	#define RB_EMPTY(root) ((root)->rb_node == NULL)
				331	#define ON_RB(node) ((node)->rb_color != RB_NONE)
				332	#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
				333	#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
				334	#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
				335	#define rq_rb_key(rq) (rq)->sector
				336
				337	/*
				338	* as_find_first_arq finds the first (lowest sector numbered) request
				339	* for the specified data_dir. Used to sweep back to the start of the disk
				340	* (1-way elevator) after we process the last (highest sector) request.
				341	*/
				342	static struct as_rq as_find_first_arq(struct as_data ad, int data_dir)
				343	{
				344	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				345
				346	if (n == NULL)
				347	return NULL;
				348
				349	for (;;) {
				350	if (n->rb_left == NULL)
				351	return rb_entry_arq(n);
				352
				353	n = n->rb_left;
				354	}
				355	}
				356
				357	/*
				358	* Add the request to the rb tree if it is unique. If there is an alias (an
				359	* existing request against the same sector), which can happen when using
				360	* direct IO, then return the alias.
				361	*/
				362	static struct as_rq as_add_arq_rb(struct as_data ad, struct as_rq *arq)
				363	{
				364	struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
				365	struct rb_node *parent = NULL;
				366	struct as_rq *__arq;
				367	struct request *rq = arq->request;
				368
				369	arq->rb_key = rq_rb_key(rq);
				370
				371	while (*p) {
				372	parent = *p;
				373	__arq = rb_entry_arq(parent);
				374
				375	if (arq->rb_key < __arq->rb_key)
				376	p = &(*p)->rb_left;
				377	else if (arq->rb_key > __arq->rb_key)
				378	p = &(*p)->rb_right;
				379	else
				380	return __arq;
				381	}
				382
				383	rb_link_node(&arq->rb_node, parent, p);
				384	rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				385
				386	return NULL;
				387	}
				388
				389	static inline void as_del_arq_rb(struct as_data ad, struct as_rq arq)
				390	{
				391	if (!ON_RB(&arq->rb_node)) {
				392	WARN_ON(1);
				393	return;
				394	}
				395
				396	rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				397	RB_CLEAR(&arq->rb_node);
				398	}
				399
				400	static struct request *
				401	as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
				402	{
				403	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				404	struct as_rq *arq;
				405
				406	while (n) {
				407	arq = rb_entry_arq(n);
				408
				409	if (sector < arq->rb_key)
				410	n = n->rb_left;
				411	else if (sector > arq->rb_key)
				412	n = n->rb_right;
				413	else
				414	return arq->request;
				415	}
				416
				417	return NULL;
				418	}
				419
				420	/*
				421	* IO Scheduler proper
				422	*/
				423
				424	#define MAXBACK (1024 * 1024) /*
				425	* Maximum distance the disk will go backward
				426	* for a request.
				427	*/
				428
				429	#define BACK_PENALTY 2
				430
				431	/*
				432	* as_choose_req selects the preferred one of two requests of the same data_dir
				433	* ignoring time - eg. timeouts, which is the job of as_dispatch_request
				434	*/
				435	static struct as_rq *
				436	as_choose_req(struct as_data ad, struct as_rq arq1, struct as_rq *arq2)
				437	{
				438	int data_dir;
				439	sector_t last, s1, s2, d1, d2;
				440	int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
				441	const sector_t maxback = MAXBACK;
				442
				443	if (arq1 == NULL \|\| arq1 == arq2)
				444	return arq2;
				445	if (arq2 == NULL)
				446	return arq1;
				447
				448	data_dir = arq1->is_sync;
				449
				450	last = ad->last_sector[data_dir];
				451	s1 = arq1->request->sector;
				452	s2 = arq2->request->sector;
				453
				454	BUG_ON(data_dir != arq2->is_sync);
				455
				456	/*
				457	* Strict one way elevator _except_ in the case where we allow
				458	* short backward seeks which are biased as twice the cost of a
				459	* similar forward seek.
				460	*/
				461	if (s1 >= last)
				462	d1 = s1 - last;
				463	else if (s1+maxback >= last)
				464	d1 = (last - s1)*BACK_PENALTY;
				465	else {
				466	r1_wrap = 1;
				467	d1 = 0; /* shut up, gcc */
				468	}
				469
				470	if (s2 >= last)
				471	d2 = s2 - last;
				472	else if (s2+maxback >= last)
				473	d2 = (last - s2)*BACK_PENALTY;
				474	else {
				475	r2_wrap = 1;
				476	d2 = 0;
				477	}
				478
				479	/* Found required data */
				480	if (!r1_wrap && r2_wrap)
				481	return arq1;
				482	else if (!r2_wrap && r1_wrap)
				483	return arq2;
				484	else if (r1_wrap && r2_wrap) {
				485	/* both behind the head */
				486	if (s1 <= s2)
				487	return arq1;
				488	else
				489	return arq2;
				490	}
				491
				492	/* Both requests in front of the head */
				493	if (d1 < d2)
				494	return arq1;
				495	else if (d2 < d1)
				496	return arq2;
				497	else {
				498	if (s1 >= s2)
				499	return arq1;
				500	else
				501	return arq2;
				502	}
				503	}
				504
				505	/*
				506	* as_find_next_arq finds the next request after @prev in elevator order.
				507	* this with as_choose_req form the basis for how the scheduler chooses
				508	* what request to process next. Anticipation works on top of this.
				509	*/
				510	static struct as_rq as_find_next_arq(struct as_data ad, struct as_rq *last)
				511	{
				512	const int data_dir = last->is_sync;
				513	struct as_rq *ret;
				514	struct rb_node *rbnext = rb_next(&last->rb_node);
				515	struct rb_node *rbprev = rb_prev(&last->rb_node);
				516	struct as_rq arq_next, arq_prev;
				517
				518	BUG_ON(!ON_RB(&last->rb_node));
				519
				520	if (rbprev)
				521	arq_prev = rb_entry_arq(rbprev);
				522	else
				523	arq_prev = NULL;
				524
				525	if (rbnext)
				526	arq_next = rb_entry_arq(rbnext);
				527	else {
				528	arq_next = as_find_first_arq(ad, data_dir);
				529	if (arq_next == last)
				530	arq_next = NULL;
				531	}
				532
				533	ret = as_choose_req(ad, arq_next, arq_prev);
				534
				535	return ret;
				536	}
				537
				538	/*
				539	* anticipatory scheduling functions follow
				540	*/
				541
				542	/*
				543	* as_antic_expired tells us when we have anticipated too long.
				544	* The funny "absolute difference" math on the elapsed time is to handle
				545	* jiffy wraps, and disks which have been idle for 0x80000000 jiffies.
				546	*/
				547	static int as_antic_expired(struct as_data *ad)
				548	{
				549	long delta_jif;
				550
				551	delta_jif = jiffies - ad->antic_start;
				552	if (unlikely(delta_jif < 0))
				553	delta_jif = -delta_jif;
				554	if (delta_jif < ad->antic_expire)
				555	return 0;
				556
				557	return 1;
				558	}
				559
				560	/*
				561	* as_antic_waitnext starts anticipating that a nice request will soon be
				562	* submitted. See also as_antic_waitreq
				563	*/
				564	static void as_antic_waitnext(struct as_data *ad)
				565	{
				566	unsigned long timeout;
				567
				568	BUG_ON(ad->antic_status != ANTIC_OFF
				569	&& ad->antic_status != ANTIC_WAIT_REQ);
				570
				571	timeout = ad->antic_start + ad->antic_expire;
				572
				573	mod_timer(&ad->antic_timer, timeout);
				574
				575	ad->antic_status = ANTIC_WAIT_NEXT;
				576	}
				577
				578	/*
				579	* as_antic_waitreq starts anticipating. We don't start timing the anticipation
				580	* until the request that we're anticipating on has finished. This means we
				581	* are timing from when the candidate process wakes up hopefully.
				582	*/
				583	static void as_antic_waitreq(struct as_data *ad)
				584	{
				585	BUG_ON(ad->antic_status == ANTIC_FINISHED);
				586	if (ad->antic_status == ANTIC_OFF) {
				587	if (!ad->io_context \|\| ad->ioc_finished)
				588	as_antic_waitnext(ad);
				589	else
				590	ad->antic_status = ANTIC_WAIT_REQ;
				591	}
				592	}
				593
				594	/*
				595	* This is called directly by the functions in this file to stop anticipation.
				596	* We kill the timer and schedule a call to the request_fn asap.
				597	*/
				598	static void as_antic_stop(struct as_data *ad)
				599	{
				600	int status = ad->antic_status;
				601
				602	if (status == ANTIC_WAIT_REQ \|\| status == ANTIC_WAIT_NEXT) {
				603	if (status == ANTIC_WAIT_NEXT)
				604	del_timer(&ad->antic_timer);
				605	ad->antic_status = ANTIC_FINISHED;
				606	/* see as_work_handler */
				607	kblockd_schedule_work(&ad->antic_work);
				608	}
				609	}
				610
				611	/*
				612	* as_antic_timeout is the timer function set by as_antic_waitnext.
				613	*/
				614	static void as_antic_timeout(unsigned long data)
				615	{
				616	struct request_queue q = (struct request_queue )data;
				617	struct as_data *ad = q->elevator->elevator_data;
				618	unsigned long flags;
				619
				620	spin_lock_irqsave(q->queue_lock, flags);
				621	if (ad->antic_status == ANTIC_WAIT_REQ
				622	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				623	struct as_io_context *aic = ad->io_context->aic;
				624
				625	ad->antic_status = ANTIC_FINISHED;
				626	kblockd_schedule_work(&ad->antic_work);
				627
				628	if (aic->ttime_samples == 0) {
				629	/* process anticipated on has exitted or timed out*/
				630	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				631	}
				632	}
				633	spin_unlock_irqrestore(q->queue_lock, flags);
				634	}
				635
				636	/*
				637	* as_close_req decides if one request is considered "close" to the
				638	* previous one issued.
				639	*/
				640	static int as_close_req(struct as_data ad, struct as_rq arq)
				641	{
				642	unsigned long delay; /* milliseconds */
				643	sector_t last = ad->last_sector[ad->batch_data_dir];
				644	sector_t next = arq->request->sector;
				645	sector_t delta; /* acceptable close offset (in sectors) */
				646
				647	if (ad->antic_status == ANTIC_OFF \|\| !ad->ioc_finished)
				648	delay = 0;
				649	else
				650	delay = ((jiffies - ad->antic_start) * 1000) / HZ;
				651
				652	if (delay <= 1)
				653	delta = 64;
				654	else if (delay <= 20 && delay <= ad->antic_expire)
				655	delta = 64 << (delay-1);
				656	else
				657	return 1;
				658
				659	return (last - (delta>>1) <= next) && (next <= last + delta);
				660	}
				661
				662	/*
				663	* as_can_break_anticipation returns true if we have been anticipating this
				664	* request.
				665	*
				666	* It also returns true if the process against which we are anticipating
				667	* submits a write - that's presumably an fsync, O_SYNC write, etc. We want to
				668	* dispatch it ASAP, because we know that application will not be submitting
				669	* any new reads.
				670	*
				671	* If the task which has submitted the request has exitted, break anticipation.
				672	*
				673	* If this task has queued some other IO, do not enter enticipation.
				674	*/
				675	static int as_can_break_anticipation(struct as_data ad, struct as_rq arq)
				676	{
				677	struct io_context *ioc;
				678	struct as_io_context *aic;
				679	sector_t s;
				680
				681	ioc = ad->io_context;
				682	BUG_ON(!ioc);
				683
				684	if (arq && ioc == arq->io_context) {
				685	/* request from same process */
				686	return 1;
				687	}
				688
				689	if (ad->ioc_finished && as_antic_expired(ad)) {
				690	/*
				691	* In this situation status should really be FINISHED,
				692	* however the timer hasn't had the chance to run yet.
				693	*/
				694	return 1;
				695	}
				696
				697	aic = ioc->aic;
				698	if (!aic)
				699	return 0;
				700
				701	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
				702	/* process anticipated on has exitted */
				703	if (aic->ttime_samples == 0)
				704	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				705	return 1;
				706	}
				707
				708	if (atomic_read(&aic->nr_queued) > 0) {
				709	/* process has more requests queued */
				710	return 1;
				711	}
				712
				713	if (atomic_read(&aic->nr_dispatched) > 0) {
				714	/* process has more requests dispatched */
				715	return 1;
				716	}
				717
				718	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) {
				719	/*
				720	* Found a close request that is not one of ours.
				721	*
				722	* This makes close requests from another process reset
				723	* our thinktime delay. Is generally useful when there are
				724	* two or more cooperating processes working in the same
				725	* area.
				726	*/
				727	spin_lock(&aic->lock);
				728	aic->last_end_request = jiffies;
				729	spin_unlock(&aic->lock);
				730	return 1;
				731	}
				732
				733
				734	if (aic->ttime_samples == 0) {
				735	if (ad->new_ttime_mean > ad->antic_expire)
				736	return 1;
				737	if (ad->exit_prob > 128)
				738	return 1;
				739	} else if (aic->ttime_mean > ad->antic_expire) {
				740	/* the process thinks too much between requests */
				741	return 1;
				742	}
				743
				744	if (!arq)
				745	return 0;
				746
				747	if (ad->last_sector[REQ_SYNC] < arq->request->sector)
				748	s = arq->request->sector - ad->last_sector[REQ_SYNC];
				749	else
				750	s = ad->last_sector[REQ_SYNC] - arq->request->sector;
				751
				752	if (aic->seek_samples == 0) {
				753	/*
				754	* Process has just started IO. Use past statistics to
				755	* guage success possibility
				756	*/
				757	if (ad->new_seek_mean > s) {
				758	/* this request is better than what we're expecting */
				759	return 1;
				760	}
				761
				762	} else {
				763	if (aic->seek_mean > s) {
				764	/* this request is better than what we're expecting */
				765	return 1;
				766	}
				767	}
				768
				769	return 0;
				770	}
				771
				772	/*
				773	* as_can_anticipate indicates weather we should either run arq
				774	* or keep anticipating a better request.
				775	*/
				776	static int as_can_anticipate(struct as_data ad, struct as_rq arq)
				777	{
				778	if (!ad->io_context)
				779	/*
				780	* Last request submitted was a write
				781	*/
				782	return 0;
				783
				784	if (ad->antic_status == ANTIC_FINISHED)
				785	/*
				786	* Don't restart if we have just finished. Run the next request
				787	*/
				788	return 0;
				789
				790	if (as_can_break_anticipation(ad, arq))
				791	/*
				792	* This request is a good candidate. Don't keep anticipating,
				793	* run it.
				794	*/
				795	return 0;
				796
				797	/*
				798	* OK from here, we haven't finished, and don't have a decent request!
				799	* Status is either ANTIC_OFF so start waiting,
				800	* ANTIC_WAIT_REQ so continue waiting for request to finish
				801	* or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
				802	*
				803	*/
				804
				805	return 1;
				806	}
				807
				808	static void as_update_thinktime(struct as_data ad, struct as_io_context aic, unsigned long ttime)
				809	{
				810	/* fixed point: 1.0 == 1<<8 */
				811	if (aic->ttime_samples == 0) {
				812	ad->new_ttime_total = (7ad->new_ttime_total + 256ttime) / 8;
				813	ad->new_ttime_mean = ad->new_ttime_total / 256;
				814
				815	ad->exit_prob = (7*ad->exit_prob)/8;
				816	}
				817	aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
				818	aic->ttime_total = (7aic->ttime_total + 256ttime) / 8;
				819	aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
				820	}
				821
				822	static void as_update_seekdist(struct as_data ad, struct as_io_context aic, sector_t sdist)
				823	{
				824	u64 total;
				825
				826	if (aic->seek_samples == 0) {
				827	ad->new_seek_total = (7ad->new_seek_total + 256(u64)sdist)/8;
				828	ad->new_seek_mean = ad->new_seek_total / 256;
				829	}
				830
				831	/*
				832	* Don't allow the seek distance to get too large from the
				833	* odd fragment, pagein, etc
				834	*/
				835	if (aic->seek_samples <= 60) /* second&third seek */
				836	sdist = min(sdist, (aic->seek_mean * 4) + 210241024);
				837	else
				838	sdist = min(sdist, (aic->seek_mean * 4) + 2102464);
				839
				840	aic->seek_samples = (7*aic->seek_samples + 256) / 8;
				841	aic->seek_total = (7aic->seek_total + (u64)256sdist) / 8;
				842	total = aic->seek_total + (aic->seek_samples/2);
				843	do_div(total, aic->seek_samples);
				844	aic->seek_mean = (sector_t)total;
				845	}
				846
				847	/*
				848	* as_update_iohist keeps a decaying histogram of IO thinktimes, and
				849	* updates @aic->ttime_mean based on that. It is called when a new
				850	* request is queued.
				851	*/
				852	static void as_update_iohist(struct as_data ad, struct as_io_context aic, struct request *rq)
				853	{
				854	struct as_rq *arq = RQ_DATA(rq);
				855	int data_dir = arq->is_sync;
				856	unsigned long thinktime;
				857	sector_t seek_dist;
				858
				859	if (aic == NULL)
				860	return;
				861
				862	if (data_dir == REQ_SYNC) {
				863	unsigned long in_flight = atomic_read(&aic->nr_queued)
				864	+ atomic_read(&aic->nr_dispatched);
				865	spin_lock(&aic->lock);
				866	if (test_bit(AS_TASK_IORUNNING, &aic->state) \|\|
				867	test_bit(AS_TASK_IOSTARTED, &aic->state)) {
				868	/* Calculate read -> read thinktime */
				869	if (test_bit(AS_TASK_IORUNNING, &aic->state)
				870	&& in_flight == 0) {
				871	thinktime = jiffies - aic->last_end_request;
				872	thinktime = min(thinktime, MAX_THINKTIME-1);
				873	} else
				874	thinktime = 0;
				875	as_update_thinktime(ad, aic, thinktime);
				876
				877	/* Calculate read -> read seek distance */
				878	if (aic->last_request_pos < rq->sector)
				879	seek_dist = rq->sector - aic->last_request_pos;
				880	else
				881	seek_dist = aic->last_request_pos - rq->sector;
				882	as_update_seekdist(ad, aic, seek_dist);
				883	}
				884	aic->last_request_pos = rq->sector + rq->nr_sectors;
				885	set_bit(AS_TASK_IOSTARTED, &aic->state);
				886	spin_unlock(&aic->lock);
				887	}
				888	}
				889
				890	/*
				891	* as_update_arq must be called whenever a request (arq) is added to
				892	* the sort_list. This function keeps caches up to date, and checks if the
				893	* request might be one we are "anticipating"
				894	*/
				895	static void as_update_arq(struct as_data ad, struct as_rq arq)
				896	{
				897	const int data_dir = arq->is_sync;
				898
				899	/* keep the next_arq cache up to date */
				900	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
				901
				902	/*
				903	* have we been anticipating this request?
				904	* or does it come from the same process as the one we are anticipating
				905	* for?
				906	*/
				907	if (ad->antic_status == ANTIC_WAIT_REQ
				908	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				909	if (as_can_break_anticipation(ad, arq))
				910	as_antic_stop(ad);
				911	}
				912	}
				913
				914	/*
				915	* Gathers timings and resizes the write batch automatically
				916	*/
				917	static void update_write_batch(struct as_data *ad)
				918	{
				919	unsigned long batch = ad->batch_expire[REQ_ASYNC];
				920	long write_time;
				921
				922	write_time = (jiffies - ad->current_batch_expires) + batch;
				923	if (write_time < 0)
				924	write_time = 0;
				925
				926	if (write_time > batch && !ad->write_batch_idled) {
				927	if (write_time > batch * 3)
				928	ad->write_batch_count /= 2;
				929	else
				930	ad->write_batch_count--;
				931	} else if (write_time < batch && ad->current_write_count == 0) {
				932	if (batch > write_time * 3)
				933	ad->write_batch_count *= 2;
				934	else
				935	ad->write_batch_count++;
				936	}
				937
				938	if (ad->write_batch_count < 1)
				939	ad->write_batch_count = 1;
				940	}
				941
				942	/*
				943	* as_completed_request is to be called when a request has completed and
				944	* returned something to the requesting process, be it an error or data.
				945	*/
				946	static void as_completed_request(request_queue_t q, struct request rq)
				947	{
				948	struct as_data *ad = q->elevator->elevator_data;
				949	struct as_rq *arq = RQ_DATA(rq);
				950
				951	WARN_ON(!list_empty(&rq->queuelist));
				952
				953	if (arq->state == AS_RQ_PRESCHED) {
				954	WARN_ON(arq->io_context);
				955	goto out;
				956	}
				957
				958	if (arq->state == AS_RQ_MERGED)
				959	goto out_ioc;
				960
				961	if (arq->state != AS_RQ_REMOVED) {
				962	printk("arq->state %d\n", arq->state);
				963	WARN_ON(1);
				964	goto out;
				965	}
				966
				967	if (!blk_fs_request(rq))
				968	goto out;
				969
				970	if (ad->changed_batch && ad->nr_dispatched == 1) {
				971	kblockd_schedule_work(&ad->antic_work);
				972	ad->changed_batch = 0;
				973
				974	if (ad->batch_data_dir == REQ_SYNC)
				975	ad->new_batch = 1;
				976	}
				977	WARN_ON(ad->nr_dispatched == 0);
				978	ad->nr_dispatched--;
				979
				980	/*
				981	* Start counting the batch from when a request of that direction is
				982	* actually serviced. This should help devices with big TCQ windows
				983	* and writeback caches
				984	*/
				985	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
				986	update_write_batch(ad);
				987	ad->current_batch_expires = jiffies +
				988	ad->batch_expire[REQ_SYNC];
				989	ad->new_batch = 0;
				990	}
				991
				992	if (ad->io_context == arq->io_context && ad->io_context) {
				993	ad->antic_start = jiffies;
				994	ad->ioc_finished = 1;
				995	if (ad->antic_status == ANTIC_WAIT_REQ) {
				996	/*
				997	* We were waiting on this request, now anticipate
				998	* the next one
				999	*/
				1000	as_antic_waitnext(ad);
				1001	}
				1002	}
				1003
				1004	out_ioc:
				1005	if (!arq->io_context)
				1006	goto out;
				1007
				1008	if (arq->is_sync == REQ_SYNC) {
				1009	struct as_io_context *aic = arq->io_context->aic;
				1010	if (aic) {
				1011	spin_lock(&aic->lock);
				1012	set_bit(AS_TASK_IORUNNING, &aic->state);
				1013	aic->last_end_request = jiffies;
				1014	spin_unlock(&aic->lock);
				1015	}
				1016	}
				1017
				1018	put_io_context(arq->io_context);
				1019	out:
				1020	arq->state = AS_RQ_POSTSCHED;
				1021	}
				1022
				1023	/*
				1024	* as_remove_queued_request removes a request from the pre dispatch queue
				1025	* without updating refcounts. It is expected the caller will drop the
				1026	* reference unless it replaces the request at somepart of the elevator
				1027	* (ie. the dispatch queue)
				1028	*/
				1029	static void as_remove_queued_request(request_queue_t q, struct request rq)
				1030	{
				1031	struct as_rq *arq = RQ_DATA(rq);
				1032	const int data_dir = arq->is_sync;
				1033	struct as_data *ad = q->elevator->elevator_data;
				1034
				1035	WARN_ON(arq->state != AS_RQ_QUEUED);
				1036
				1037	if (arq->io_context && arq->io_context->aic) {
				1038	BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
				1039	atomic_dec(&arq->io_context->aic->nr_queued);
				1040	}
				1041
				1042	/*
				1043	* Update the "next_arq" cache if we are about to remove its
				1044	* entry
				1045	*/
				1046	if (ad->next_arq[data_dir] == arq)
				1047	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1048
				1049	list_del_init(&arq->fifo);
				1050	as_remove_merge_hints(q, arq);
				1051	as_del_arq_rb(ad, arq);
				1052	}
				1053
				1054	/*
				1055	* as_remove_dispatched_request is called to remove a request which has gone
				1056	* to the dispatch list.
				1057	*/
				1058	static void as_remove_dispatched_request(request_queue_t q, struct request rq)
				1059	{
				1060	struct as_rq *arq = RQ_DATA(rq);
				1061	struct as_io_context *aic;
				1062
				1063	if (!arq) {
				1064	WARN_ON(1);
				1065	return;
				1066	}
				1067
				1068	WARN_ON(arq->state != AS_RQ_DISPATCHED);
				1069	WARN_ON(ON_RB(&arq->rb_node));
				1070	if (arq->io_context && arq->io_context->aic) {
				1071	aic = arq->io_context->aic;
				1072	if (aic) {
				1073	WARN_ON(!atomic_read(&aic->nr_dispatched));
				1074	atomic_dec(&aic->nr_dispatched);
				1075	}
				1076	}
				1077	}
				1078
				1079	/*
				1080	* as_remove_request is called when a driver has finished with a request.
				1081	* This should be only called for dispatched requests, but for some reason
				1082	* a POWER4 box running hwscan it does not.
				1083	*/
				1084	static void as_remove_request(request_queue_t q, struct request rq)
				1085	{
				1086	struct as_rq *arq = RQ_DATA(rq);
				1087
				1088	if (unlikely(arq->state == AS_RQ_NEW))
				1089	goto out;
				1090
				1091	if (ON_RB(&arq->rb_node)) {
				1092	if (arq->state != AS_RQ_QUEUED) {
				1093	printk("arq->state %d\n", arq->state);
				1094	WARN_ON(1);
				1095	goto out;
				1096	}
				1097	/*
				1098	* We'll lose the aliased request(s) here. I don't think this
				1099	* will ever happen, but if it does, hopefully someone will
				1100	* report it.
				1101	*/
				1102	WARN_ON(!list_empty(&rq->queuelist));
				1103	as_remove_queued_request(q, rq);
				1104	} else {
				1105	if (arq->state != AS_RQ_DISPATCHED) {
				1106	printk("arq->state %d\n", arq->state);
				1107	WARN_ON(1);
				1108	goto out;
				1109	}
				1110	as_remove_dispatched_request(q, rq);
				1111	}
				1112	out:
				1113	arq->state = AS_RQ_REMOVED;
				1114	}
				1115
				1116	/*
				1117	* as_fifo_expired returns 0 if there are no expired reads on the fifo,
				1118	* 1 otherwise. It is ratelimited so that we only perform the check once per
				1119	* `fifo_expire' interval. Otherwise a large number of expired requests
				1120	* would create a hopeless seekstorm.
				1121	*
				1122	* See as_antic_expired comment.
				1123	*/
				1124	static int as_fifo_expired(struct as_data *ad, int adir)
				1125	{
				1126	struct as_rq *arq;
				1127	long delta_jif;
				1128
				1129	delta_jif = jiffies - ad->last_check_fifo[adir];
				1130	if (unlikely(delta_jif < 0))
				1131	delta_jif = -delta_jif;
				1132	if (delta_jif < ad->fifo_expire[adir])
				1133	return 0;
				1134
				1135	ad->last_check_fifo[adir] = jiffies;
				1136
				1137	if (list_empty(&ad->fifo_list[adir]))
				1138	return 0;
				1139
				1140	arq = list_entry_fifo(ad->fifo_list[adir].next);
				1141
				1142	return time_after(jiffies, arq->expires);
				1143	}
				1144
				1145	/*
				1146	* as_batch_expired returns true if the current batch has expired. A batch
				1147	* is a set of reads or a set of writes.
				1148	*/
				1149	static inline int as_batch_expired(struct as_data *ad)
				1150	{
				1151	if (ad->changed_batch \|\| ad->new_batch)
				1152	return 0;
				1153
				1154	if (ad->batch_data_dir == REQ_SYNC)
				1155	/* TODO! add a check so a complete fifo gets written? */
				1156	return time_after(jiffies, ad->current_batch_expires);
				1157
				1158	return time_after(jiffies, ad->current_batch_expires)
				1159	\|\| ad->current_write_count == 0;
				1160	}
				1161
				1162	/*
				1163	* move an entry to dispatch queue
				1164	*/
				1165	static void as_move_to_dispatch(struct as_data ad, struct as_rq arq)
				1166	{
				1167	struct request *rq = arq->request;
				1168	struct list_head *insert;
				1169	const int data_dir = arq->is_sync;
				1170
				1171	BUG_ON(!ON_RB(&arq->rb_node));
				1172
				1173	as_antic_stop(ad);
				1174	ad->antic_status = ANTIC_OFF;
				1175
				1176	/*
				1177	* This has to be set in order to be correctly updated by
				1178	* as_find_next_arq
				1179	*/
				1180	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
				1181
				1182	if (data_dir == REQ_SYNC) {
				1183	/* In case we have to anticipate after this */
				1184	copy_io_context(&ad->io_context, &arq->io_context);
				1185	} else {
				1186	if (ad->io_context) {
				1187	put_io_context(ad->io_context);
				1188	ad->io_context = NULL;
				1189	}
				1190
				1191	if (ad->current_write_count != 0)
				1192	ad->current_write_count--;
				1193	}
				1194	ad->ioc_finished = 0;
				1195
				1196	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1197
				1198	/*
				1199	* take it off the sort and fifo list, add to dispatch queue
				1200	*/
				1201	insert = ad->dispatch->prev;
				1202
				1203	while (!list_empty(&rq->queuelist)) {
				1204	struct request *__rq = list_entry_rq(rq->queuelist.next);
				1205	struct as_rq *__arq = RQ_DATA(__rq);
				1206
				1207	list_move_tail(&__rq->queuelist, ad->dispatch);
				1208
				1209	if (__arq->io_context && __arq->io_context->aic)
				1210	atomic_inc(&__arq->io_context->aic->nr_dispatched);
				1211
				1212	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1213	__arq->state = AS_RQ_DISPATCHED;
				1214
				1215	ad->nr_dispatched++;
				1216	}
				1217
				1218	as_remove_queued_request(ad->q, rq);
				1219	WARN_ON(arq->state != AS_RQ_QUEUED);
				1220
				1221	list_add(&rq->queuelist, insert);
				1222	arq->state = AS_RQ_DISPATCHED;
				1223	if (arq->io_context && arq->io_context->aic)
				1224	atomic_inc(&arq->io_context->aic->nr_dispatched);
				1225	ad->nr_dispatched++;
				1226	}
				1227
				1228	/*
				1229	* as_dispatch_request selects the best request according to
				1230	* read/write expire, batch expire, etc, and moves it to the dispatch
				1231	* queue. Returns 1 if a request was found, 0 otherwise.
				1232	*/
				1233	static int as_dispatch_request(struct as_data *ad)
				1234	{
				1235	struct as_rq *arq;
				1236	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
				1237	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
				1238
				1239	/* Signal that the write batch was uncontended, so we can't time it */
				1240	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
				1241	if (ad->current_write_count == 0 \|\| !writes)
				1242	ad->write_batch_idled = 1;
				1243	}
				1244
				1245	if (!(reads \|\| writes)
				1246	\|\| ad->antic_status == ANTIC_WAIT_REQ
				1247	\|\| ad->antic_status == ANTIC_WAIT_NEXT
				1248	\|\| ad->changed_batch)
				1249	return 0;
				1250
				1251	if (!(reads && writes && as_batch_expired(ad)) ) {
				1252	/*
				1253	* batch is still running or no reads or no writes
				1254	*/
				1255	arq = ad->next_arq[ad->batch_data_dir];
				1256
				1257	if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
				1258	if (as_fifo_expired(ad, REQ_SYNC))
				1259	goto fifo_expired;
				1260
				1261	if (as_can_anticipate(ad, arq)) {
				1262	as_antic_waitreq(ad);
				1263	return 0;
				1264	}
				1265	}
				1266
				1267	if (arq) {
				1268	/* we have a "next request" */
				1269	if (reads && !writes)
				1270	ad->current_batch_expires =
				1271	jiffies + ad->batch_expire[REQ_SYNC];
				1272	goto dispatch_request;
				1273	}
				1274	}
				1275
				1276	/*
				1277	* at this point we are not running a batch. select the appropriate
				1278	* data direction (read / write)
				1279	*/
				1280
				1281	if (reads) {
				1282	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC]));
				1283
				1284	if (writes && ad->batch_data_dir == REQ_SYNC)
				1285	/*
				1286	* Last batch was a read, switch to writes
				1287	*/
				1288	goto dispatch_writes;
				1289
				1290	if (ad->batch_data_dir == REQ_ASYNC) {
				1291	WARN_ON(ad->new_batch);
				1292	ad->changed_batch = 1;
				1293	}
				1294	ad->batch_data_dir = REQ_SYNC;
				1295	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1296	ad->last_check_fifo[ad->batch_data_dir] = jiffies;
				1297	goto dispatch_request;
				1298	}
				1299
				1300	/*
				1301	* the last batch was a read
				1302	*/
				1303
				1304	if (writes) {
				1305	dispatch_writes:
				1306	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC]));
				1307
				1308	if (ad->batch_data_dir == REQ_SYNC) {
				1309	ad->changed_batch = 1;
				1310
				1311	/*
				1312	* new_batch might be 1 when the queue runs out of
				1313	* reads. A subsequent submission of a write might
				1314	* cause a change of batch before the read is finished.
				1315	*/
				1316	ad->new_batch = 0;
				1317	}
				1318	ad->batch_data_dir = REQ_ASYNC;
				1319	ad->current_write_count = ad->write_batch_count;
				1320	ad->write_batch_idled = 0;
				1321	arq = ad->next_arq[ad->batch_data_dir];
				1322	goto dispatch_request;
				1323	}
				1324
				1325	BUG();
				1326	return 0;
				1327
				1328	dispatch_request:
				1329	/*
				1330	* If a request has expired, service it.
				1331	*/
				1332
				1333	if (as_fifo_expired(ad, ad->batch_data_dir)) {
				1334	fifo_expired:
				1335	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1336	BUG_ON(arq == NULL);
				1337	}
				1338
				1339	if (ad->changed_batch) {
				1340	WARN_ON(ad->new_batch);
				1341
				1342	if (ad->nr_dispatched)
				1343	return 0;
				1344
				1345	if (ad->batch_data_dir == REQ_ASYNC)
				1346	ad->current_batch_expires = jiffies +
				1347	ad->batch_expire[REQ_ASYNC];
				1348	else
				1349	ad->new_batch = 1;
				1350
				1351	ad->changed_batch = 0;
				1352	}
				1353
				1354	/*
				1355	* arq is the selected appropriate request.
				1356	*/
				1357	as_move_to_dispatch(ad, arq);
				1358
				1359	return 1;
				1360	}
				1361
				1362	static struct request as_next_request(request_queue_t q)
				1363	{
				1364	struct as_data *ad = q->elevator->elevator_data;
				1365	struct request *rq = NULL;
				1366
				1367	/*
				1368	* if there are still requests on the dispatch queue, grab the first
				1369	*/
				1370	if (!list_empty(ad->dispatch) \|\| as_dispatch_request(ad))
				1371	rq = list_entry_rq(ad->dispatch->next);
				1372
				1373	return rq;
				1374	}
				1375
				1376	/*
				1377	* Add arq to a list behind alias
				1378	*/
				1379	static inline void
				1380	as_add_aliased_request(struct as_data ad, struct as_rq arq, struct as_rq *alias)
				1381	{
				1382	struct request *req = arq->request;
				1383	struct list_head *insert = alias->request->queuelist.prev;
				1384
				1385	/*
				1386	* Transfer list of aliases
				1387	*/
				1388	while (!list_empty(&req->queuelist)) {
				1389	struct request *__rq = list_entry_rq(req->queuelist.next);
				1390	struct as_rq *__arq = RQ_DATA(__rq);
				1391
				1392	list_move_tail(&__rq->queuelist, &alias->request->queuelist);
				1393
				1394	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1395	}
				1396
				1397	/*
				1398	* Another request with the same start sector on the rbtree.
				1399	* Link this request to that sector. They are untangled in
				1400	* as_move_to_dispatch
				1401	*/
				1402	list_add(&arq->request->queuelist, insert);
				1403
				1404	/*
				1405	* Don't want to have to handle merges.
				1406	*/
				1407	as_remove_merge_hints(ad->q, arq);
				1408	}
				1409
				1410	/*
				1411	* add arq to rbtree and fifo
				1412	*/
				1413	static void as_add_request(struct as_data ad, struct as_rq arq)
				1414	{
				1415	struct as_rq *alias;
				1416	int data_dir;
				1417
				1418	if (rq_data_dir(arq->request) == READ
				1419	\|\| current->flags&PF_SYNCWRITE)
				1420	arq->is_sync = 1;
				1421	else
				1422	arq->is_sync = 0;
				1423	data_dir = arq->is_sync;
				1424
				1425	arq->io_context = as_get_io_context();
				1426
				1427	if (arq->io_context) {
				1428	as_update_iohist(ad, arq->io_context->aic, arq->request);
				1429	atomic_inc(&arq->io_context->aic->nr_queued);
				1430	}
				1431
				1432	alias = as_add_arq_rb(ad, arq);
				1433	if (!alias) {
				1434	/*
				1435	* set expire time (only used for reads) and add to fifo list
				1436	*/
				1437	arq->expires = jiffies + ad->fifo_expire[data_dir];
				1438	list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
				1439
				1440	if (rq_mergeable(arq->request)) {
				1441	as_add_arq_hash(ad, arq);
				1442
				1443	if (!ad->q->last_merge)
				1444	ad->q->last_merge = arq->request;
				1445	}
				1446	as_update_arq(ad, arq); /* keep state machine up to date */
				1447
				1448	} else {
				1449	as_add_aliased_request(ad, arq, alias);
				1450
				1451	/*
				1452	* have we been anticipating this request?
				1453	* or does it come from the same process as the one we are
				1454	* anticipating for?
				1455	*/
				1456	if (ad->antic_status == ANTIC_WAIT_REQ
				1457	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				1458	if (as_can_break_anticipation(ad, arq))
				1459	as_antic_stop(ad);
				1460	}
				1461	}
				1462
				1463	arq->state = AS_RQ_QUEUED;
				1464	}
				1465
				1466	static void as_deactivate_request(request_queue_t q, struct request rq)
				1467	{
				1468	struct as_data *ad = q->elevator->elevator_data;
				1469	struct as_rq *arq = RQ_DATA(rq);
				1470
				1471	if (arq) {
				1472	if (arq->state == AS_RQ_REMOVED) {
				1473	arq->state = AS_RQ_DISPATCHED;
				1474	if (arq->io_context && arq->io_context->aic)
				1475	atomic_inc(&arq->io_context->aic->nr_dispatched);
				1476	}
				1477	} else
				1478	WARN_ON(blk_fs_request(rq)
				1479	&& (!(rq->flags & (REQ_HARDBARRIER\|REQ_SOFTBARRIER))) );
				1480
				1481	/* Stop anticipating - let this request get through */
				1482	as_antic_stop(ad);
				1483	}
				1484
				1485	/*
				1486	* requeue the request. The request has not been completed, nor is it a
				1487	* new request, so don't touch accounting.
				1488	*/
				1489	static void as_requeue_request(request_queue_t q, struct request rq)
				1490	{
				1491	as_deactivate_request(q, rq);
				1492	list_add(&rq->queuelist, &q->queue_head);
				1493	}
				1494
				1495	/*
				1496	* Account a request that is inserted directly onto the dispatch queue.
				1497	* arq->io_context->aic->nr_dispatched should not need to be incremented
				1498	* because only new requests should come through here: requeues go through
				1499	* our explicit requeue handler.
				1500	*/
				1501	static void as_account_queued_request(struct as_data ad, struct request rq)
				1502	{
				1503	if (blk_fs_request(rq)) {
				1504	struct as_rq *arq = RQ_DATA(rq);
				1505	arq->state = AS_RQ_DISPATCHED;
				1506	ad->nr_dispatched++;
				1507	}
				1508	}
				1509
				1510	static void
				1511	as_insert_request(request_queue_t q, struct request rq, int where)
				1512	{
				1513	struct as_data *ad = q->elevator->elevator_data;
				1514	struct as_rq *arq = RQ_DATA(rq);
				1515
				1516	if (arq) {
				1517	if (arq->state != AS_RQ_PRESCHED) {
				1518	printk("arq->state: %d\n", arq->state);
				1519	WARN_ON(1);
				1520	}
				1521	arq->state = AS_RQ_NEW;
				1522	}
				1523
				1524	/* barriers must flush the reorder queue */
				1525	if (unlikely(rq->flags & (REQ_SOFTBARRIER \| REQ_HARDBARRIER)
				1526	&& where == ELEVATOR_INSERT_SORT)) {
				1527	WARN_ON(1);
				1528	where = ELEVATOR_INSERT_BACK;
				1529	}
				1530
				1531	switch (where) {
				1532	case ELEVATOR_INSERT_BACK:
				1533	while (ad->next_arq[REQ_SYNC])
				1534	as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
				1535
				1536	while (ad->next_arq[REQ_ASYNC])
				1537	as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
				1538
				1539	list_add_tail(&rq->queuelist, ad->dispatch);
				1540	as_account_queued_request(ad, rq);
				1541	as_antic_stop(ad);
				1542	break;
				1543	case ELEVATOR_INSERT_FRONT:
				1544	list_add(&rq->queuelist, ad->dispatch);
				1545	as_account_queued_request(ad, rq);
				1546	as_antic_stop(ad);
				1547	break;
				1548	case ELEVATOR_INSERT_SORT:
				1549	BUG_ON(!blk_fs_request(rq));
				1550	as_add_request(ad, arq);
				1551	break;
				1552	default:
				1553	BUG();
				1554	return;
				1555	}
				1556	}
				1557
				1558	/*
				1559	* as_queue_empty tells us if there are requests left in the device. It may
				1560	* not be the case that a driver can get the next request even if the queue
				1561	* is not empty - it is used in the block layer to check for plugging and
				1562	* merging opportunities
				1563	*/
				1564	static int as_queue_empty(request_queue_t *q)
				1565	{
				1566	struct as_data *ad = q->elevator->elevator_data;
				1567
				1568	if (!list_empty(&ad->fifo_list[REQ_ASYNC])
				1569	\|\| !list_empty(&ad->fifo_list[REQ_SYNC])
				1570	\|\| !list_empty(ad->dispatch))
				1571	return 0;
				1572
				1573	return 1;
				1574	}
				1575
				1576	static struct request *
				1577	as_former_request(request_queue_t q, struct request rq)
				1578	{
				1579	struct as_rq *arq = RQ_DATA(rq);
				1580	struct rb_node *rbprev = rb_prev(&arq->rb_node);
				1581	struct request *ret = NULL;
				1582
				1583	if (rbprev)
				1584	ret = rb_entry_arq(rbprev)->request;
				1585
				1586	return ret;
				1587	}
				1588
				1589	static struct request *
				1590	as_latter_request(request_queue_t q, struct request rq)
				1591	{
				1592	struct as_rq *arq = RQ_DATA(rq);
				1593	struct rb_node *rbnext = rb_next(&arq->rb_node);
				1594	struct request *ret = NULL;
				1595
				1596	if (rbnext)
				1597	ret = rb_entry_arq(rbnext)->request;
				1598
				1599	return ret;
				1600	}
				1601
				1602	static int
				1603	as_merge(request_queue_t q, struct request req, struct bio bio)
				1604	{
				1605	struct as_data *ad = q->elevator->elevator_data;
				1606	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
				1607	struct request *__rq;
				1608	int ret;
				1609
				1610	/*
				1611	* try last_merge to avoid going to hash
				1612	*/
				1613	ret = elv_try_last_merge(q, bio);
				1614	if (ret != ELEVATOR_NO_MERGE) {
				1615	__rq = q->last_merge;
				1616	goto out_insert;
				1617	}
				1618
				1619	/*
				1620	* see if the merge hash can satisfy a back merge
				1621	*/
				1622	__rq = as_find_arq_hash(ad, bio->bi_sector);
				1623	if (__rq) {
				1624	BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
				1625
				1626	if (elv_rq_merge_ok(__rq, bio)) {
				1627	ret = ELEVATOR_BACK_MERGE;
				1628	goto out;
				1629	}
				1630	}
				1631
				1632	/*
				1633	* check for front merge
				1634	*/
				1635	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
				1636	if (__rq) {
				1637	BUG_ON(rb_key != rq_rb_key(__rq));
				1638
				1639	if (elv_rq_merge_ok(__rq, bio)) {
				1640	ret = ELEVATOR_FRONT_MERGE;
				1641	goto out;
				1642	}
				1643	}
				1644
				1645	return ELEVATOR_NO_MERGE;
				1646	out:
				1647	if (rq_mergeable(__rq))
				1648	q->last_merge = __rq;
				1649	out_insert:
				1650	if (ret) {
				1651	if (rq_mergeable(__rq))
				1652	as_hot_arq_hash(ad, RQ_DATA(__rq));
				1653	}
				1654	*req = __rq;
				1655	return ret;
				1656	}
				1657
				1658	static void as_merged_request(request_queue_t q, struct request req)
				1659	{
				1660	struct as_data *ad = q->elevator->elevator_data;
				1661	struct as_rq *arq = RQ_DATA(req);
				1662
				1663	/*
				1664	* hash always needs to be repositioned, key is end sector
				1665	*/
				1666	as_del_arq_hash(arq);
				1667	as_add_arq_hash(ad, arq);
				1668
				1669	/*
				1670	* if the merge was a front merge, we need to reposition request
				1671	*/
				1672	if (rq_rb_key(req) != arq->rb_key) {
				1673	struct as_rq alias, next_arq = NULL;
				1674
				1675	if (ad->next_arq[arq->is_sync] == arq)
				1676	next_arq = as_find_next_arq(ad, arq);
				1677
				1678	/*
				1679	* Note! We should really be moving any old aliased requests
				1680	* off this request and try to insert them into the rbtree. We
				1681	* currently don't bother. Ditto the next function.
				1682	*/
				1683	as_del_arq_rb(ad, arq);
				1684	if ((alias = as_add_arq_rb(ad, arq)) ) {
				1685	list_del_init(&arq->fifo);
				1686	as_add_aliased_request(ad, arq, alias);
				1687	if (next_arq)
				1688	ad->next_arq[arq->is_sync] = next_arq;
				1689	}
				1690	/*
				1691	* Note! At this stage of this and the next function, our next
				1692	* request may not be optimal - eg the request may have "grown"
				1693	* behind the disk head. We currently don't bother adjusting.
				1694	*/
				1695	}
				1696
				1697	if (arq->on_hash)
				1698	q->last_merge = req;
				1699	}
				1700
				1701	static void
				1702	as_merged_requests(request_queue_t q, struct request req,
				1703	struct request *next)
				1704	{
				1705	struct as_data *ad = q->elevator->elevator_data;
				1706	struct as_rq *arq = RQ_DATA(req);
				1707	struct as_rq *anext = RQ_DATA(next);
				1708
				1709	BUG_ON(!arq);
				1710	BUG_ON(!anext);
				1711
				1712	/*
				1713	* reposition arq (this is the merged request) in hash, and in rbtree
				1714	* in case of a front merge
				1715	*/
				1716	as_del_arq_hash(arq);
				1717	as_add_arq_hash(ad, arq);
				1718
				1719	if (rq_rb_key(req) != arq->rb_key) {
				1720	struct as_rq alias, next_arq = NULL;
				1721
				1722	if (ad->next_arq[arq->is_sync] == arq)
				1723	next_arq = as_find_next_arq(ad, arq);
				1724
				1725	as_del_arq_rb(ad, arq);
				1726	if ((alias = as_add_arq_rb(ad, arq)) ) {
				1727	list_del_init(&arq->fifo);
				1728	as_add_aliased_request(ad, arq, alias);
				1729	if (next_arq)
				1730	ad->next_arq[arq->is_sync] = next_arq;
				1731	}
				1732	}
				1733
				1734	/*
				1735	* if anext expires before arq, assign its expire time to arq
				1736	* and move into anext position (anext will be deleted) in fifo
				1737	*/
				1738	if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
				1739	if (time_before(anext->expires, arq->expires)) {
				1740	list_move(&arq->fifo, &anext->fifo);
				1741	arq->expires = anext->expires;
				1742	/*
				1743	* Don't copy here but swap, because when anext is
				1744	* removed below, it must contain the unused context
				1745	*/
				1746	swap_io_context(&arq->io_context, &anext->io_context);
				1747	}
				1748	}
				1749
				1750	/*
				1751	* Transfer list of aliases
				1752	*/
				1753	while (!list_empty(&next->queuelist)) {
				1754	struct request *__rq = list_entry_rq(next->queuelist.next);
				1755	struct as_rq *__arq = RQ_DATA(__rq);
				1756
				1757	list_move_tail(&__rq->queuelist, &req->queuelist);
				1758
				1759	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1760	}
				1761
				1762	/*
				1763	* kill knowledge of next, this one is a goner
				1764	*/
				1765	as_remove_queued_request(q, next);
				1766
				1767	anext->state = AS_RQ_MERGED;
				1768	}
				1769
				1770	/*
				1771	* This is executed in a "deferred" process context, by kblockd. It calls the
				1772	* driver's request_fn so the driver can submit that request.
				1773	*
				1774	* IMPORTANT! This guy will reenter the elevator, so set up all queue global
				1775	* state before calling, and don't rely on any state over calls.
				1776	*
				1777	* FIXME! dispatch queue is not a queue at all!
				1778	*/
				1779	static void as_work_handler(void *data)
				1780	{
				1781	struct request_queue *q = data;
				1782	unsigned long flags;
				1783
				1784	spin_lock_irqsave(q->queue_lock, flags);
				1785	if (as_next_request(q))
				1786	q->request_fn(q);
				1787	spin_unlock_irqrestore(q->queue_lock, flags);
				1788	}
				1789
				1790	static void as_put_request(request_queue_t q, struct request rq)
				1791	{
				1792	struct as_data *ad = q->elevator->elevator_data;
				1793	struct as_rq *arq = RQ_DATA(rq);
				1794
				1795	if (!arq) {
				1796	WARN_ON(1);
				1797	return;
				1798	}
				1799
				1800	if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
				1801	printk("arq->state %d\n", arq->state);
				1802	WARN_ON(1);
				1803	}
				1804
				1805	mempool_free(arq, ad->arq_pool);
				1806	rq->elevator_private = NULL;
				1807	}
				1808
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1809	static int as_set_request(request_queue_t q, struct request rq,
				1810	struct bio *bio, int gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1811	{
				1812	struct as_data *ad = q->elevator->elevator_data;
				1813	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
				1814
				1815	if (arq) {
				1816	memset(arq, 0, sizeof(*arq));
				1817	RB_CLEAR(&arq->rb_node);
				1818	arq->request = rq;
				1819	arq->state = AS_RQ_PRESCHED;
				1820	arq->io_context = NULL;
				1821	INIT_LIST_HEAD(&arq->hash);
				1822	arq->on_hash = 0;
				1823	INIT_LIST_HEAD(&arq->fifo);
				1824	rq->elevator_private = arq;
				1825	return 0;
				1826	}
				1827
				1828	return 1;
				1829	}
				1830
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1831	static int as_may_queue(request_queue_t q, int rw, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1832	{
				1833	int ret = ELV_MQUEUE_MAY;
				1834	struct as_data *ad = q->elevator->elevator_data;
				1835	struct io_context *ioc;
				1836	if (ad->antic_status == ANTIC_WAIT_REQ \|\|
				1837	ad->antic_status == ANTIC_WAIT_NEXT) {
				1838	ioc = as_get_io_context();
				1839	if (ad->io_context == ioc)
				1840	ret = ELV_MQUEUE_MUST;
				1841	put_io_context(ioc);
				1842	}
				1843
				1844	return ret;
				1845	}
				1846
				1847	static void as_exit_queue(elevator_t *e)
				1848	{
				1849	struct as_data *ad = e->elevator_data;
				1850
				1851	del_timer_sync(&ad->antic_timer);
				1852	kblockd_flush();
				1853
				1854	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
				1855	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
				1856
				1857	mempool_destroy(ad->arq_pool);
				1858	put_io_context(ad->io_context);
				1859	kfree(ad->hash);
				1860	kfree(ad);
				1861	}
				1862
				1863	/*
				1864	* initialize elevator private data (as_data), and alloc a arq for
				1865	* each request on the free lists
				1866	*/
				1867	static int as_init_queue(request_queue_t q, elevator_t e)
				1868	{
				1869	struct as_data *ad;
				1870	int i;
				1871
				1872	if (!arq_pool)
				1873	return -ENOMEM;
				1874
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1875	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1876	if (!ad)
				1877	return -ENOMEM;
				1878	memset(ad, 0, sizeof(*ad));
				1879
				1880	ad->q = q; /* Identify what queue the data belongs to */
				1881
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1882	ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES,
				1883	GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1884	if (!ad->hash) {
				1885	kfree(ad);
				1886	return -ENOMEM;
				1887	}
				1888
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1889	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
				1890	mempool_free_slab, arq_pool, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1891	if (!ad->arq_pool) {
				1892	kfree(ad->hash);
				1893	kfree(ad);
				1894	return -ENOMEM;
				1895	}
				1896
				1897	/* anticipatory scheduling helpers */
				1898	ad->antic_timer.function = as_antic_timeout;
				1899	ad->antic_timer.data = (unsigned long)q;
				1900	init_timer(&ad->antic_timer);
				1901	INIT_WORK(&ad->antic_work, as_work_handler, q);
				1902
				1903	for (i = 0; i < AS_HASH_ENTRIES; i++)
				1904	INIT_LIST_HEAD(&ad->hash[i]);
				1905
				1906	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
				1907	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
				1908	ad->sort_list[REQ_SYNC] = RB_ROOT;
				1909	ad->sort_list[REQ_ASYNC] = RB_ROOT;
				1910	ad->dispatch = &q->queue_head;
				1911	ad->fifo_expire[REQ_SYNC] = default_read_expire;
				1912	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
				1913	ad->antic_expire = default_antic_expire;
				1914	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
				1915	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
				1916	e->elevator_data = ad;
				1917
				1918	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
				1919	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
				1920	if (ad->write_batch_count < 2)
				1921	ad->write_batch_count = 2;
				1922
				1923	return 0;
				1924	}
				1925
				1926	/*
				1927	* sysfs parts below
				1928	*/
				1929	struct as_fs_entry {
				1930	struct attribute attr;
				1931	ssize_t (show)(struct as_data , char *);
				1932	ssize_t (store)(struct as_data , const char *, size_t);
				1933	};
				1934
				1935	static ssize_t
				1936	as_var_show(unsigned int var, char *page)
				1937	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1938	return sprintf(page, "%d\n", var);
				1939	}
				1940
				1941	static ssize_t
				1942	as_var_store(unsigned long var, const char page, size_t count)
				1943	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1944	char p = (char ) page;
				1945
Jens Axboe	c9b3ad6	2005-07-27 11:43:37 -0700	[diff] [blame]	1946	*var = simple_strtoul(p, &p, 10);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1947	return count;
				1948	}
				1949
				1950	static ssize_t as_est_show(struct as_data ad, char page)
				1951	{
				1952	int pos = 0;
				1953
				1954	pos += sprintf(page+pos, "%lu %% exit probability\n", 100*ad->exit_prob/256);
				1955	pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean);
				1956	pos += sprintf(page+pos, "%llu sectors new seek distance\n", (unsigned long long)ad->new_seek_mean);
				1957
				1958	return pos;
				1959	}
				1960
				1961	#define SHOW_FUNCTION(__FUNC, __VAR) \
				1962	static ssize_t __FUNC(struct as_data ad, char page) \
				1963	{ \
				1964	return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
				1965	}
				1966	SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]);
				1967	SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]);
				1968	SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire);
				1969	SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]);
				1970	SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]);
				1971	#undef SHOW_FUNCTION
				1972
				1973	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
				1974	static ssize_t __FUNC(struct as_data ad, const char page, size_t count) \
				1975	{ \
				1976	int ret = as_var_store(__PTR, (page), count); \
				1977	if (*(__PTR) < (MIN)) \
				1978	*(__PTR) = (MIN); \
				1979	else if (*(__PTR) > (MAX)) \
				1980	*(__PTR) = (MAX); \
				1981	(__PTR) = msecs_to_jiffies((__PTR)); \
				1982	return ret; \
				1983	}
				1984	STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
				1985	STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
				1986	STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX);
				1987	STORE_FUNCTION(as_read_batchexpire_store,
				1988	&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
				1989	STORE_FUNCTION(as_write_batchexpire_store,
				1990	&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
				1991	#undef STORE_FUNCTION
				1992
				1993	static struct as_fs_entry as_est_entry = {
				1994	.attr = {.name = "est_time", .mode = S_IRUGO },
				1995	.show = as_est_show,
				1996	};
				1997	static struct as_fs_entry as_readexpire_entry = {
				1998	.attr = {.name = "read_expire", .mode = S_IRUGO \| S_IWUSR },
				1999	.show = as_readexpire_show,
				2000	.store = as_readexpire_store,
				2001	};
				2002	static struct as_fs_entry as_writeexpire_entry = {
				2003	.attr = {.name = "write_expire", .mode = S_IRUGO \| S_IWUSR },
				2004	.show = as_writeexpire_show,
				2005	.store = as_writeexpire_store,
				2006	};
				2007	static struct as_fs_entry as_anticexpire_entry = {
				2008	.attr = {.name = "antic_expire", .mode = S_IRUGO \| S_IWUSR },
				2009	.show = as_anticexpire_show,
				2010	.store = as_anticexpire_store,
				2011	};
				2012	static struct as_fs_entry as_read_batchexpire_entry = {
				2013	.attr = {.name = "read_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				2014	.show = as_read_batchexpire_show,
				2015	.store = as_read_batchexpire_store,
				2016	};
				2017	static struct as_fs_entry as_write_batchexpire_entry = {
				2018	.attr = {.name = "write_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				2019	.show = as_write_batchexpire_show,
				2020	.store = as_write_batchexpire_store,
				2021	};
				2022
				2023	static struct attribute *default_attrs[] = {
				2024	&as_est_entry.attr,
				2025	&as_readexpire_entry.attr,
				2026	&as_writeexpire_entry.attr,
				2027	&as_anticexpire_entry.attr,
				2028	&as_read_batchexpire_entry.attr,
				2029	&as_write_batchexpire_entry.attr,
				2030	NULL,
				2031	};
				2032
				2033	#define to_as(atr) container_of((atr), struct as_fs_entry, attr)
				2034
				2035	static ssize_t
				2036	as_attr_show(struct kobject kobj, struct attribute attr, char *page)
				2037	{
				2038	elevator_t *e = container_of(kobj, elevator_t, kobj);
				2039	struct as_fs_entry *entry = to_as(attr);
				2040
				2041	if (!entry->show)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	2042	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2043
				2044	return entry->show(e->elevator_data, page);
				2045	}
				2046
				2047	static ssize_t
				2048	as_attr_store(struct kobject kobj, struct attribute attr,
				2049	const char *page, size_t length)
				2050	{
				2051	elevator_t *e = container_of(kobj, elevator_t, kobj);
				2052	struct as_fs_entry *entry = to_as(attr);
				2053
				2054	if (!entry->store)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	2055	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2056
				2057	return entry->store(e->elevator_data, page, length);
				2058	}
				2059
				2060	static struct sysfs_ops as_sysfs_ops = {
				2061	.show = as_attr_show,
				2062	.store = as_attr_store,
				2063	};
				2064
				2065	static struct kobj_type as_ktype = {
				2066	.sysfs_ops = &as_sysfs_ops,
				2067	.default_attrs = default_attrs,
				2068	};
				2069
				2070	static struct elevator_type iosched_as = {
				2071	.ops = {
				2072	.elevator_merge_fn = as_merge,
				2073	.elevator_merged_fn = as_merged_request,
				2074	.elevator_merge_req_fn = as_merged_requests,
				2075	.elevator_next_req_fn = as_next_request,
				2076	.elevator_add_req_fn = as_insert_request,
				2077	.elevator_remove_req_fn = as_remove_request,
				2078	.elevator_requeue_req_fn = as_requeue_request,
				2079	.elevator_deactivate_req_fn = as_deactivate_request,
				2080	.elevator_queue_empty_fn = as_queue_empty,
				2081	.elevator_completed_req_fn = as_completed_request,
				2082	.elevator_former_req_fn = as_former_request,
				2083	.elevator_latter_req_fn = as_latter_request,
				2084	.elevator_set_req_fn = as_set_request,
				2085	.elevator_put_req_fn = as_put_request,
				2086	.elevator_may_queue_fn = as_may_queue,
				2087	.elevator_init_fn = as_init_queue,
				2088	.elevator_exit_fn = as_exit_queue,
				2089	},
				2090
				2091	.elevator_ktype = &as_ktype,
				2092	.elevator_name = "anticipatory",
				2093	.elevator_owner = THIS_MODULE,
				2094	};
				2095
				2096	static int __init as_init(void)
				2097	{
				2098	int ret;
				2099
				2100	arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
				2101	0, 0, NULL, NULL);
				2102	if (!arq_pool)
				2103	return -ENOMEM;
				2104
				2105	ret = elv_register(&iosched_as);
				2106	if (!ret) {
				2107	/*
				2108	* don't allow AS to get unregistered, since we would have
				2109	* to browse all tasks in the system and release their
				2110	* as_io_context first
				2111	*/
				2112	__module_get(THIS_MODULE);
				2113	return 0;
				2114	}
				2115
				2116	kmem_cache_destroy(arq_pool);
				2117	return ret;
				2118	}
				2119
				2120	static void __exit as_exit(void)
				2121	{
				2122	kmem_cache_destroy(arq_pool);
				2123	elv_unregister(&iosched_as);
				2124	}
				2125
				2126	module_init(as_init);
				2127	module_exit(as_exit);
				2128
				2129	MODULE_AUTHOR("Nick Piggin");
				2130	MODULE_LICENSE("GPL");
				2131	MODULE_DESCRIPTION("anticipatory IO scheduler");