Blame - drivers/block/as-iosched.c - kernel/msm-4.19

blob: 5d20e4bcbc78f02b389abc1640d8da9fdc30a9da [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/drivers/block/as-iosched.c
				3	*
				4	* Anticipatory & deadline i/o scheduler.
				5	*
				6	* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
				7	* Nick Piggin <piggin@cyberone.com.au>
				8	*
				9	*/
				10	#include <linux/kernel.h>
				11	#include <linux/fs.h>
				12	#include <linux/blkdev.h>
				13	#include <linux/elevator.h>
				14	#include <linux/bio.h>
				15	#include <linux/config.h>
				16	#include <linux/module.h>
				17	#include <linux/slab.h>
				18	#include <linux/init.h>
				19	#include <linux/compiler.h>
				20	#include <linux/hash.h>
				21	#include <linux/rbtree.h>
				22	#include <linux/interrupt.h>
				23
				24	#define REQ_SYNC 1
				25	#define REQ_ASYNC 0
				26
				27	/*
				28	* See Documentation/block/as-iosched.txt
				29	*/
				30
				31	/*
				32	* max time before a read is submitted.
				33	*/
				34	#define default_read_expire (HZ / 8)
				35
				36	/*
				37	* ditto for writes, these limits are not hard, even
				38	* if the disk is capable of satisfying them.
				39	*/
				40	#define default_write_expire (HZ / 4)
				41
				42	/*
				43	* read_batch_expire describes how long we will allow a stream of reads to
				44	* persist before looking to see whether it is time to switch over to writes.
				45	*/
				46	#define default_read_batch_expire (HZ / 2)
				47
				48	/*
				49	* write_batch_expire describes how long we want a stream of writes to run for.
				50	* This is not a hard limit, but a target we set for the auto-tuning thingy.
				51	* See, the problem is: we can send a lot of writes to disk cache / TCQ in
				52	* a short amount of time...
				53	*/
				54	#define default_write_batch_expire (HZ / 8)
				55
				56	/*
				57	* max time we may wait to anticipate a read (default around 6ms)
				58	*/
				59	#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
				60
				61	/*
				62	* Keep track of up to 20ms thinktimes. We can go as big as we like here,
				63	* however huge values tend to interfere and not decay fast enough. A program
				64	* might be in a non-io phase of operation. Waiting on user input for example,
				65	* or doing a lengthy computation. A small penalty can be justified there, and
				66	* will still catch out those processes that constantly have large thinktimes.
				67	*/
				68	#define MAX_THINKTIME (HZ/50UL)
				69
				70	/* Bits in as_io_context.state */
				71	enum as_io_states {
				72	AS_TASK_RUNNING=0, /* Process has not exitted */
				73	AS_TASK_IOSTARTED, /* Process has started some IO */
				74	AS_TASK_IORUNNING, /* Process has completed some IO */
				75	};
				76
				77	enum anticipation_status {
				78	ANTIC_OFF=0, /* Not anticipating (normal operation) */
				79	ANTIC_WAIT_REQ, /* The last read has not yet completed */
				80	ANTIC_WAIT_NEXT, /* Currently anticipating a request vs
				81	last read (which has completed) */
				82	ANTIC_FINISHED, /* Anticipating but have found a candidate
				83	* or timed out */
				84	};
				85
				86	struct as_data {
				87	/*
				88	* run time data
				89	*/
				90
				91	struct request_queue q; / the "owner" queue */
				92
				93	/*
				94	* requests (as_rq s) are present on both sort_list and fifo_list
				95	*/
				96	struct rb_root sort_list[2];
				97	struct list_head fifo_list[2];
				98
				99	struct as_rq next_arq[2]; / next in sort order */
				100	sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	101	struct list_head hash; / request hash */
				102
				103	unsigned long exit_prob; /* probability a task will exit while
				104	being waited on */
				105	unsigned long new_ttime_total; /* mean thinktime on new proc */
				106	unsigned long new_ttime_mean;
				107	u64 new_seek_total; /* mean seek on new proc */
				108	sector_t new_seek_mean;
				109
				110	unsigned long current_batch_expires;
				111	unsigned long last_check_fifo[2];
				112	int changed_batch; /* 1: waiting for old batch to end */
				113	int new_batch; /* 1: waiting on first read complete */
				114	int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
				115	int write_batch_count; /* max # of reqs in a write batch */
				116	int current_write_count; /* how many requests left this batch */
				117	int write_batch_idled; /* has the write batch gone idle? */
				118	mempool_t *arq_pool;
				119
				120	enum anticipation_status antic_status;
				121	unsigned long antic_start; /* jiffies: when it started */
				122	struct timer_list antic_timer; /* anticipatory scheduling timer */
				123	struct work_struct antic_work; /* Deferred unplugging */
				124	struct io_context io_context; / Identify the expected process */
				125	int ioc_finished; /* IO associated with io_context is finished */
				126	int nr_dispatched;
				127
				128	/*
				129	* settings that change how the i/o scheduler behaves
				130	*/
				131	unsigned long fifo_expire[2];
				132	unsigned long batch_expire[2];
				133	unsigned long antic_expire;
				134	};
				135
				136	#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
				137
				138	/*
				139	* per-request data.
				140	*/
				141	enum arq_state {
				142	AS_RQ_NEW=0, /* New - not referenced and not on any lists */
				143	AS_RQ_QUEUED, /* In the request queue. It belongs to the
				144	scheduler */
				145	AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
				146	driver now */
				147	AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
				148	AS_RQ_REMOVED,
				149	AS_RQ_MERGED,
				150	AS_RQ_POSTSCHED, /* when they shouldn't be */
				151	};
				152
				153	struct as_rq {
				154	/*
				155	* rbtree index, key is the starting offset
				156	*/
				157	struct rb_node rb_node;
				158	sector_t rb_key;
				159
				160	struct request *request;
				161
				162	struct io_context io_context; / The submitting task */
				163
				164	/*
				165	* request hash, key is the ending offset (for back merge lookup)
				166	*/
				167	struct list_head hash;
				168	unsigned int on_hash;
				169
				170	/*
				171	* expire fifo
				172	*/
				173	struct list_head fifo;
				174	unsigned long expires;
				175
				176	unsigned int is_sync;
				177	enum arq_state state;
				178	};
				179
				180	#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
				181
				182	static kmem_cache_t *arq_pool;
				183
				184	/*
				185	* IO Context helper functions
				186	*/
				187
				188	/* Called to deallocate the as_io_context */
				189	static void free_as_io_context(struct as_io_context *aic)
				190	{
				191	kfree(aic);
				192	}
				193
				194	/* Called when the task exits */
				195	static void exit_as_io_context(struct as_io_context *aic)
				196	{
				197	WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));
				198	clear_bit(AS_TASK_RUNNING, &aic->state);
				199	}
				200
				201	static struct as_io_context *alloc_as_io_context(void)
				202	{
				203	struct as_io_context *ret;
				204
				205	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
				206	if (ret) {
				207	ret->dtor = free_as_io_context;
				208	ret->exit = exit_as_io_context;
				209	ret->state = 1 << AS_TASK_RUNNING;
				210	atomic_set(&ret->nr_queued, 0);
				211	atomic_set(&ret->nr_dispatched, 0);
				212	spin_lock_init(&ret->lock);
				213	ret->ttime_total = 0;
				214	ret->ttime_samples = 0;
				215	ret->ttime_mean = 0;
				216	ret->seek_total = 0;
				217	ret->seek_samples = 0;
				218	ret->seek_mean = 0;
				219	}
				220
				221	return ret;
				222	}
				223
				224	/*
				225	* If the current task has no AS IO context then create one and initialise it.
				226	* Then take a ref on the task's io context and return it.
				227	*/
				228	static struct io_context *as_get_io_context(void)
				229	{
				230	struct io_context *ioc = get_io_context(GFP_ATOMIC);
				231	if (ioc && !ioc->aic) {
				232	ioc->aic = alloc_as_io_context();
				233	if (!ioc->aic) {
				234	put_io_context(ioc);
				235	ioc = NULL;
				236	}
				237	}
				238	return ioc;
				239	}
				240
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	241	static void as_put_io_context(struct as_rq *arq)
				242	{
				243	struct as_io_context *aic;
				244
				245	if (unlikely(!arq->io_context))
				246	return;
				247
				248	aic = arq->io_context->aic;
				249
				250	if (arq->is_sync == REQ_SYNC && aic) {
				251	spin_lock(&aic->lock);
				252	set_bit(AS_TASK_IORUNNING, &aic->state);
				253	aic->last_end_request = jiffies;
				254	spin_unlock(&aic->lock);
				255	}
				256
				257	put_io_context(arq->io_context);
				258	}
				259
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	260	/*
				261	* the back merge hash support functions
				262	*/
				263	static const int as_hash_shift = 6;
				264	#define AS_HASH_BLOCK(sec) ((sec) >> 3)
				265	#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
				266	#define AS_HASH_ENTRIES (1 << as_hash_shift)
				267	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
				268	#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash)
				269
				270	static inline void __as_del_arq_hash(struct as_rq *arq)
				271	{
				272	arq->on_hash = 0;
				273	list_del_init(&arq->hash);
				274	}
				275
				276	static inline void as_del_arq_hash(struct as_rq *arq)
				277	{
				278	if (arq->on_hash)
				279	__as_del_arq_hash(arq);
				280	}
				281
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	282	static void as_add_arq_hash(struct as_data ad, struct as_rq arq)
				283	{
				284	struct request *rq = arq->request;
				285
				286	BUG_ON(arq->on_hash);
				287
				288	arq->on_hash = 1;
				289	list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
				290	}
				291
				292	/*
				293	* move hot entry to front of chain
				294	*/
				295	static inline void as_hot_arq_hash(struct as_data ad, struct as_rq arq)
				296	{
				297	struct request *rq = arq->request;
				298	struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
				299
				300	if (!arq->on_hash) {
				301	WARN_ON(1);
				302	return;
				303	}
				304
				305	if (arq->hash.prev != head) {
				306	list_del(&arq->hash);
				307	list_add(&arq->hash, head);
				308	}
				309	}
				310
				311	static struct request as_find_arq_hash(struct as_data ad, sector_t offset)
				312	{
				313	struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
				314	struct list_head entry, next = hash_list->next;
				315
				316	while ((entry = next) != hash_list) {
				317	struct as_rq *arq = list_entry_hash(entry);
				318	struct request *__rq = arq->request;
				319
				320	next = entry->next;
				321
				322	BUG_ON(!arq->on_hash);
				323
				324	if (!rq_mergeable(__rq)) {
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame^]	325	as_del_arq_hash(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	326	continue;
				327	}
				328
				329	if (rq_hash_key(__rq) == offset)
				330	return __rq;
				331	}
				332
				333	return NULL;
				334	}
				335
				336	/*
				337	* rb tree support functions
				338	*/
				339	#define RB_NONE (2)
				340	#define RB_EMPTY(root) ((root)->rb_node == NULL)
				341	#define ON_RB(node) ((node)->rb_color != RB_NONE)
				342	#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
				343	#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
				344	#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
				345	#define rq_rb_key(rq) (rq)->sector
				346
				347	/*
				348	* as_find_first_arq finds the first (lowest sector numbered) request
				349	* for the specified data_dir. Used to sweep back to the start of the disk
				350	* (1-way elevator) after we process the last (highest sector) request.
				351	*/
				352	static struct as_rq as_find_first_arq(struct as_data ad, int data_dir)
				353	{
				354	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				355
				356	if (n == NULL)
				357	return NULL;
				358
				359	for (;;) {
				360	if (n->rb_left == NULL)
				361	return rb_entry_arq(n);
				362
				363	n = n->rb_left;
				364	}
				365	}
				366
				367	/*
				368	* Add the request to the rb tree if it is unique. If there is an alias (an
				369	* existing request against the same sector), which can happen when using
				370	* direct IO, then return the alias.
				371	*/
				372	static struct as_rq as_add_arq_rb(struct as_data ad, struct as_rq *arq)
				373	{
				374	struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
				375	struct rb_node *parent = NULL;
				376	struct as_rq *__arq;
				377	struct request *rq = arq->request;
				378
				379	arq->rb_key = rq_rb_key(rq);
				380
				381	while (*p) {
				382	parent = *p;
				383	__arq = rb_entry_arq(parent);
				384
				385	if (arq->rb_key < __arq->rb_key)
				386	p = &(*p)->rb_left;
				387	else if (arq->rb_key > __arq->rb_key)
				388	p = &(*p)->rb_right;
				389	else
				390	return __arq;
				391	}
				392
				393	rb_link_node(&arq->rb_node, parent, p);
				394	rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				395
				396	return NULL;
				397	}
				398
				399	static inline void as_del_arq_rb(struct as_data ad, struct as_rq arq)
				400	{
				401	if (!ON_RB(&arq->rb_node)) {
				402	WARN_ON(1);
				403	return;
				404	}
				405
				406	rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
				407	RB_CLEAR(&arq->rb_node);
				408	}
				409
				410	static struct request *
				411	as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
				412	{
				413	struct rb_node *n = ad->sort_list[data_dir].rb_node;
				414	struct as_rq *arq;
				415
				416	while (n) {
				417	arq = rb_entry_arq(n);
				418
				419	if (sector < arq->rb_key)
				420	n = n->rb_left;
				421	else if (sector > arq->rb_key)
				422	n = n->rb_right;
				423	else
				424	return arq->request;
				425	}
				426
				427	return NULL;
				428	}
				429
				430	/*
				431	* IO Scheduler proper
				432	*/
				433
				434	#define MAXBACK (1024 * 1024) /*
				435	* Maximum distance the disk will go backward
				436	* for a request.
				437	*/
				438
				439	#define BACK_PENALTY 2
				440
				441	/*
				442	* as_choose_req selects the preferred one of two requests of the same data_dir
				443	* ignoring time - eg. timeouts, which is the job of as_dispatch_request
				444	*/
				445	static struct as_rq *
				446	as_choose_req(struct as_data ad, struct as_rq arq1, struct as_rq *arq2)
				447	{
				448	int data_dir;
				449	sector_t last, s1, s2, d1, d2;
				450	int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
				451	const sector_t maxback = MAXBACK;
				452
				453	if (arq1 == NULL \|\| arq1 == arq2)
				454	return arq2;
				455	if (arq2 == NULL)
				456	return arq1;
				457
				458	data_dir = arq1->is_sync;
				459
				460	last = ad->last_sector[data_dir];
				461	s1 = arq1->request->sector;
				462	s2 = arq2->request->sector;
				463
				464	BUG_ON(data_dir != arq2->is_sync);
				465
				466	/*
				467	* Strict one way elevator _except_ in the case where we allow
				468	* short backward seeks which are biased as twice the cost of a
				469	* similar forward seek.
				470	*/
				471	if (s1 >= last)
				472	d1 = s1 - last;
				473	else if (s1+maxback >= last)
				474	d1 = (last - s1)*BACK_PENALTY;
				475	else {
				476	r1_wrap = 1;
				477	d1 = 0; /* shut up, gcc */
				478	}
				479
				480	if (s2 >= last)
				481	d2 = s2 - last;
				482	else if (s2+maxback >= last)
				483	d2 = (last - s2)*BACK_PENALTY;
				484	else {
				485	r2_wrap = 1;
				486	d2 = 0;
				487	}
				488
				489	/* Found required data */
				490	if (!r1_wrap && r2_wrap)
				491	return arq1;
				492	else if (!r2_wrap && r1_wrap)
				493	return arq2;
				494	else if (r1_wrap && r2_wrap) {
				495	/* both behind the head */
				496	if (s1 <= s2)
				497	return arq1;
				498	else
				499	return arq2;
				500	}
				501
				502	/* Both requests in front of the head */
				503	if (d1 < d2)
				504	return arq1;
				505	else if (d2 < d1)
				506	return arq2;
				507	else {
				508	if (s1 >= s2)
				509	return arq1;
				510	else
				511	return arq2;
				512	}
				513	}
				514
				515	/*
				516	* as_find_next_arq finds the next request after @prev in elevator order.
				517	* this with as_choose_req form the basis for how the scheduler chooses
				518	* what request to process next. Anticipation works on top of this.
				519	*/
				520	static struct as_rq as_find_next_arq(struct as_data ad, struct as_rq *last)
				521	{
				522	const int data_dir = last->is_sync;
				523	struct as_rq *ret;
				524	struct rb_node *rbnext = rb_next(&last->rb_node);
				525	struct rb_node *rbprev = rb_prev(&last->rb_node);
				526	struct as_rq arq_next, arq_prev;
				527
				528	BUG_ON(!ON_RB(&last->rb_node));
				529
				530	if (rbprev)
				531	arq_prev = rb_entry_arq(rbprev);
				532	else
				533	arq_prev = NULL;
				534
				535	if (rbnext)
				536	arq_next = rb_entry_arq(rbnext);
				537	else {
				538	arq_next = as_find_first_arq(ad, data_dir);
				539	if (arq_next == last)
				540	arq_next = NULL;
				541	}
				542
				543	ret = as_choose_req(ad, arq_next, arq_prev);
				544
				545	return ret;
				546	}
				547
				548	/*
				549	* anticipatory scheduling functions follow
				550	*/
				551
				552	/*
				553	* as_antic_expired tells us when we have anticipated too long.
				554	* The funny "absolute difference" math on the elapsed time is to handle
				555	* jiffy wraps, and disks which have been idle for 0x80000000 jiffies.
				556	*/
				557	static int as_antic_expired(struct as_data *ad)
				558	{
				559	long delta_jif;
				560
				561	delta_jif = jiffies - ad->antic_start;
				562	if (unlikely(delta_jif < 0))
				563	delta_jif = -delta_jif;
				564	if (delta_jif < ad->antic_expire)
				565	return 0;
				566
				567	return 1;
				568	}
				569
				570	/*
				571	* as_antic_waitnext starts anticipating that a nice request will soon be
				572	* submitted. See also as_antic_waitreq
				573	*/
				574	static void as_antic_waitnext(struct as_data *ad)
				575	{
				576	unsigned long timeout;
				577
				578	BUG_ON(ad->antic_status != ANTIC_OFF
				579	&& ad->antic_status != ANTIC_WAIT_REQ);
				580
				581	timeout = ad->antic_start + ad->antic_expire;
				582
				583	mod_timer(&ad->antic_timer, timeout);
				584
				585	ad->antic_status = ANTIC_WAIT_NEXT;
				586	}
				587
				588	/*
				589	* as_antic_waitreq starts anticipating. We don't start timing the anticipation
				590	* until the request that we're anticipating on has finished. This means we
				591	* are timing from when the candidate process wakes up hopefully.
				592	*/
				593	static void as_antic_waitreq(struct as_data *ad)
				594	{
				595	BUG_ON(ad->antic_status == ANTIC_FINISHED);
				596	if (ad->antic_status == ANTIC_OFF) {
				597	if (!ad->io_context \|\| ad->ioc_finished)
				598	as_antic_waitnext(ad);
				599	else
				600	ad->antic_status = ANTIC_WAIT_REQ;
				601	}
				602	}
				603
				604	/*
				605	* This is called directly by the functions in this file to stop anticipation.
				606	* We kill the timer and schedule a call to the request_fn asap.
				607	*/
				608	static void as_antic_stop(struct as_data *ad)
				609	{
				610	int status = ad->antic_status;
				611
				612	if (status == ANTIC_WAIT_REQ \|\| status == ANTIC_WAIT_NEXT) {
				613	if (status == ANTIC_WAIT_NEXT)
				614	del_timer(&ad->antic_timer);
				615	ad->antic_status = ANTIC_FINISHED;
				616	/* see as_work_handler */
				617	kblockd_schedule_work(&ad->antic_work);
				618	}
				619	}
				620
				621	/*
				622	* as_antic_timeout is the timer function set by as_antic_waitnext.
				623	*/
				624	static void as_antic_timeout(unsigned long data)
				625	{
				626	struct request_queue q = (struct request_queue )data;
				627	struct as_data *ad = q->elevator->elevator_data;
				628	unsigned long flags;
				629
				630	spin_lock_irqsave(q->queue_lock, flags);
				631	if (ad->antic_status == ANTIC_WAIT_REQ
				632	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				633	struct as_io_context *aic = ad->io_context->aic;
				634
				635	ad->antic_status = ANTIC_FINISHED;
				636	kblockd_schedule_work(&ad->antic_work);
				637
				638	if (aic->ttime_samples == 0) {
				639	/* process anticipated on has exitted or timed out*/
				640	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				641	}
				642	}
				643	spin_unlock_irqrestore(q->queue_lock, flags);
				644	}
				645
				646	/*
				647	* as_close_req decides if one request is considered "close" to the
				648	* previous one issued.
				649	*/
				650	static int as_close_req(struct as_data ad, struct as_rq arq)
				651	{
				652	unsigned long delay; /* milliseconds */
				653	sector_t last = ad->last_sector[ad->batch_data_dir];
				654	sector_t next = arq->request->sector;
				655	sector_t delta; /* acceptable close offset (in sectors) */
				656
				657	if (ad->antic_status == ANTIC_OFF \|\| !ad->ioc_finished)
				658	delay = 0;
				659	else
				660	delay = ((jiffies - ad->antic_start) * 1000) / HZ;
				661
				662	if (delay <= 1)
				663	delta = 64;
				664	else if (delay <= 20 && delay <= ad->antic_expire)
				665	delta = 64 << (delay-1);
				666	else
				667	return 1;
				668
				669	return (last - (delta>>1) <= next) && (next <= last + delta);
				670	}
				671
				672	/*
				673	* as_can_break_anticipation returns true if we have been anticipating this
				674	* request.
				675	*
				676	* It also returns true if the process against which we are anticipating
				677	* submits a write - that's presumably an fsync, O_SYNC write, etc. We want to
				678	* dispatch it ASAP, because we know that application will not be submitting
				679	* any new reads.
				680	*
				681	* If the task which has submitted the request has exitted, break anticipation.
				682	*
				683	* If this task has queued some other IO, do not enter enticipation.
				684	*/
				685	static int as_can_break_anticipation(struct as_data ad, struct as_rq arq)
				686	{
				687	struct io_context *ioc;
				688	struct as_io_context *aic;
				689	sector_t s;
				690
				691	ioc = ad->io_context;
				692	BUG_ON(!ioc);
				693
				694	if (arq && ioc == arq->io_context) {
				695	/* request from same process */
				696	return 1;
				697	}
				698
				699	if (ad->ioc_finished && as_antic_expired(ad)) {
				700	/*
				701	* In this situation status should really be FINISHED,
				702	* however the timer hasn't had the chance to run yet.
				703	*/
				704	return 1;
				705	}
				706
				707	aic = ioc->aic;
				708	if (!aic)
				709	return 0;
				710
				711	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
				712	/* process anticipated on has exitted */
				713	if (aic->ttime_samples == 0)
				714	ad->exit_prob = (7*ad->exit_prob + 256)/8;
				715	return 1;
				716	}
				717
				718	if (atomic_read(&aic->nr_queued) > 0) {
				719	/* process has more requests queued */
				720	return 1;
				721	}
				722
				723	if (atomic_read(&aic->nr_dispatched) > 0) {
				724	/* process has more requests dispatched */
				725	return 1;
				726	}
				727
				728	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) {
				729	/*
				730	* Found a close request that is not one of ours.
				731	*
				732	* This makes close requests from another process reset
				733	* our thinktime delay. Is generally useful when there are
				734	* two or more cooperating processes working in the same
				735	* area.
				736	*/
				737	spin_lock(&aic->lock);
				738	aic->last_end_request = jiffies;
				739	spin_unlock(&aic->lock);
				740	return 1;
				741	}
				742
				743
				744	if (aic->ttime_samples == 0) {
				745	if (ad->new_ttime_mean > ad->antic_expire)
				746	return 1;
				747	if (ad->exit_prob > 128)
				748	return 1;
				749	} else if (aic->ttime_mean > ad->antic_expire) {
				750	/* the process thinks too much between requests */
				751	return 1;
				752	}
				753
				754	if (!arq)
				755	return 0;
				756
				757	if (ad->last_sector[REQ_SYNC] < arq->request->sector)
				758	s = arq->request->sector - ad->last_sector[REQ_SYNC];
				759	else
				760	s = ad->last_sector[REQ_SYNC] - arq->request->sector;
				761
				762	if (aic->seek_samples == 0) {
				763	/*
				764	* Process has just started IO. Use past statistics to
				765	* guage success possibility
				766	*/
				767	if (ad->new_seek_mean > s) {
				768	/* this request is better than what we're expecting */
				769	return 1;
				770	}
				771
				772	} else {
				773	if (aic->seek_mean > s) {
				774	/* this request is better than what we're expecting */
				775	return 1;
				776	}
				777	}
				778
				779	return 0;
				780	}
				781
				782	/*
				783	* as_can_anticipate indicates weather we should either run arq
				784	* or keep anticipating a better request.
				785	*/
				786	static int as_can_anticipate(struct as_data ad, struct as_rq arq)
				787	{
				788	if (!ad->io_context)
				789	/*
				790	* Last request submitted was a write
				791	*/
				792	return 0;
				793
				794	if (ad->antic_status == ANTIC_FINISHED)
				795	/*
				796	* Don't restart if we have just finished. Run the next request
				797	*/
				798	return 0;
				799
				800	if (as_can_break_anticipation(ad, arq))
				801	/*
				802	* This request is a good candidate. Don't keep anticipating,
				803	* run it.
				804	*/
				805	return 0;
				806
				807	/*
				808	* OK from here, we haven't finished, and don't have a decent request!
				809	* Status is either ANTIC_OFF so start waiting,
				810	* ANTIC_WAIT_REQ so continue waiting for request to finish
				811	* or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
				812	*
				813	*/
				814
				815	return 1;
				816	}
				817
				818	static void as_update_thinktime(struct as_data ad, struct as_io_context aic, unsigned long ttime)
				819	{
				820	/* fixed point: 1.0 == 1<<8 */
				821	if (aic->ttime_samples == 0) {
				822	ad->new_ttime_total = (7ad->new_ttime_total + 256ttime) / 8;
				823	ad->new_ttime_mean = ad->new_ttime_total / 256;
				824
				825	ad->exit_prob = (7*ad->exit_prob)/8;
				826	}
				827	aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
				828	aic->ttime_total = (7aic->ttime_total + 256ttime) / 8;
				829	aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
				830	}
				831
				832	static void as_update_seekdist(struct as_data ad, struct as_io_context aic, sector_t sdist)
				833	{
				834	u64 total;
				835
				836	if (aic->seek_samples == 0) {
				837	ad->new_seek_total = (7ad->new_seek_total + 256(u64)sdist)/8;
				838	ad->new_seek_mean = ad->new_seek_total / 256;
				839	}
				840
				841	/*
				842	* Don't allow the seek distance to get too large from the
				843	* odd fragment, pagein, etc
				844	*/
				845	if (aic->seek_samples <= 60) /* second&third seek */
				846	sdist = min(sdist, (aic->seek_mean * 4) + 210241024);
				847	else
				848	sdist = min(sdist, (aic->seek_mean * 4) + 2102464);
				849
				850	aic->seek_samples = (7*aic->seek_samples + 256) / 8;
				851	aic->seek_total = (7aic->seek_total + (u64)256sdist) / 8;
				852	total = aic->seek_total + (aic->seek_samples/2);
				853	do_div(total, aic->seek_samples);
				854	aic->seek_mean = (sector_t)total;
				855	}
				856
				857	/*
				858	* as_update_iohist keeps a decaying histogram of IO thinktimes, and
				859	* updates @aic->ttime_mean based on that. It is called when a new
				860	* request is queued.
				861	*/
				862	static void as_update_iohist(struct as_data ad, struct as_io_context aic, struct request *rq)
				863	{
				864	struct as_rq *arq = RQ_DATA(rq);
				865	int data_dir = arq->is_sync;
				866	unsigned long thinktime;
				867	sector_t seek_dist;
				868
				869	if (aic == NULL)
				870	return;
				871
				872	if (data_dir == REQ_SYNC) {
				873	unsigned long in_flight = atomic_read(&aic->nr_queued)
				874	+ atomic_read(&aic->nr_dispatched);
				875	spin_lock(&aic->lock);
				876	if (test_bit(AS_TASK_IORUNNING, &aic->state) \|\|
				877	test_bit(AS_TASK_IOSTARTED, &aic->state)) {
				878	/* Calculate read -> read thinktime */
				879	if (test_bit(AS_TASK_IORUNNING, &aic->state)
				880	&& in_flight == 0) {
				881	thinktime = jiffies - aic->last_end_request;
				882	thinktime = min(thinktime, MAX_THINKTIME-1);
				883	} else
				884	thinktime = 0;
				885	as_update_thinktime(ad, aic, thinktime);
				886
				887	/* Calculate read -> read seek distance */
				888	if (aic->last_request_pos < rq->sector)
				889	seek_dist = rq->sector - aic->last_request_pos;
				890	else
				891	seek_dist = aic->last_request_pos - rq->sector;
				892	as_update_seekdist(ad, aic, seek_dist);
				893	}
				894	aic->last_request_pos = rq->sector + rq->nr_sectors;
				895	set_bit(AS_TASK_IOSTARTED, &aic->state);
				896	spin_unlock(&aic->lock);
				897	}
				898	}
				899
				900	/*
				901	* as_update_arq must be called whenever a request (arq) is added to
				902	* the sort_list. This function keeps caches up to date, and checks if the
				903	* request might be one we are "anticipating"
				904	*/
				905	static void as_update_arq(struct as_data ad, struct as_rq arq)
				906	{
				907	const int data_dir = arq->is_sync;
				908
				909	/* keep the next_arq cache up to date */
				910	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
				911
				912	/*
				913	* have we been anticipating this request?
				914	* or does it come from the same process as the one we are anticipating
				915	* for?
				916	*/
				917	if (ad->antic_status == ANTIC_WAIT_REQ
				918	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				919	if (as_can_break_anticipation(ad, arq))
				920	as_antic_stop(ad);
				921	}
				922	}
				923
				924	/*
				925	* Gathers timings and resizes the write batch automatically
				926	*/
				927	static void update_write_batch(struct as_data *ad)
				928	{
				929	unsigned long batch = ad->batch_expire[REQ_ASYNC];
				930	long write_time;
				931
				932	write_time = (jiffies - ad->current_batch_expires) + batch;
				933	if (write_time < 0)
				934	write_time = 0;
				935
				936	if (write_time > batch && !ad->write_batch_idled) {
				937	if (write_time > batch * 3)
				938	ad->write_batch_count /= 2;
				939	else
				940	ad->write_batch_count--;
				941	} else if (write_time < batch && ad->current_write_count == 0) {
				942	if (batch > write_time * 3)
				943	ad->write_batch_count *= 2;
				944	else
				945	ad->write_batch_count++;
				946	}
				947
				948	if (ad->write_batch_count < 1)
				949	ad->write_batch_count = 1;
				950	}
				951
				952	/*
				953	* as_completed_request is to be called when a request has completed and
				954	* returned something to the requesting process, be it an error or data.
				955	*/
				956	static void as_completed_request(request_queue_t q, struct request rq)
				957	{
				958	struct as_data *ad = q->elevator->elevator_data;
				959	struct as_rq *arq = RQ_DATA(rq);
				960
				961	WARN_ON(!list_empty(&rq->queuelist));
				962
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	963	if (arq->state != AS_RQ_REMOVED) {
				964	printk("arq->state %d\n", arq->state);
				965	WARN_ON(1);
				966	goto out;
				967	}
				968
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	969	if (ad->changed_batch && ad->nr_dispatched == 1) {
				970	kblockd_schedule_work(&ad->antic_work);
				971	ad->changed_batch = 0;
				972
				973	if (ad->batch_data_dir == REQ_SYNC)
				974	ad->new_batch = 1;
				975	}
				976	WARN_ON(ad->nr_dispatched == 0);
				977	ad->nr_dispatched--;
				978
				979	/*
				980	* Start counting the batch from when a request of that direction is
				981	* actually serviced. This should help devices with big TCQ windows
				982	* and writeback caches
				983	*/
				984	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
				985	update_write_batch(ad);
				986	ad->current_batch_expires = jiffies +
				987	ad->batch_expire[REQ_SYNC];
				988	ad->new_batch = 0;
				989	}
				990
				991	if (ad->io_context == arq->io_context && ad->io_context) {
				992	ad->antic_start = jiffies;
				993	ad->ioc_finished = 1;
				994	if (ad->antic_status == ANTIC_WAIT_REQ) {
				995	/*
				996	* We were waiting on this request, now anticipate
				997	* the next one
				998	*/
				999	as_antic_waitnext(ad);
				1000	}
				1001	}
				1002
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1003	as_put_io_context(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1004	out:
				1005	arq->state = AS_RQ_POSTSCHED;
				1006	}
				1007
				1008	/*
				1009	* as_remove_queued_request removes a request from the pre dispatch queue
				1010	* without updating refcounts. It is expected the caller will drop the
				1011	* reference unless it replaces the request at somepart of the elevator
				1012	* (ie. the dispatch queue)
				1013	*/
				1014	static void as_remove_queued_request(request_queue_t q, struct request rq)
				1015	{
				1016	struct as_rq *arq = RQ_DATA(rq);
				1017	const int data_dir = arq->is_sync;
				1018	struct as_data *ad = q->elevator->elevator_data;
				1019
				1020	WARN_ON(arq->state != AS_RQ_QUEUED);
				1021
				1022	if (arq->io_context && arq->io_context->aic) {
				1023	BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
				1024	atomic_dec(&arq->io_context->aic->nr_queued);
				1025	}
				1026
				1027	/*
				1028	* Update the "next_arq" cache if we are about to remove its
				1029	* entry
				1030	*/
				1031	if (ad->next_arq[data_dir] == arq)
				1032	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1033
				1034	list_del_init(&arq->fifo);
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame^]	1035	as_del_arq_hash(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1036	as_del_arq_rb(ad, arq);
				1037	}
				1038
				1039	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1040	* as_fifo_expired returns 0 if there are no expired reads on the fifo,
				1041	* 1 otherwise. It is ratelimited so that we only perform the check once per
				1042	* `fifo_expire' interval. Otherwise a large number of expired requests
				1043	* would create a hopeless seekstorm.
				1044	*
				1045	* See as_antic_expired comment.
				1046	*/
				1047	static int as_fifo_expired(struct as_data *ad, int adir)
				1048	{
				1049	struct as_rq *arq;
				1050	long delta_jif;
				1051
				1052	delta_jif = jiffies - ad->last_check_fifo[adir];
				1053	if (unlikely(delta_jif < 0))
				1054	delta_jif = -delta_jif;
				1055	if (delta_jif < ad->fifo_expire[adir])
				1056	return 0;
				1057
				1058	ad->last_check_fifo[adir] = jiffies;
				1059
				1060	if (list_empty(&ad->fifo_list[adir]))
				1061	return 0;
				1062
				1063	arq = list_entry_fifo(ad->fifo_list[adir].next);
				1064
				1065	return time_after(jiffies, arq->expires);
				1066	}
				1067
				1068	/*
				1069	* as_batch_expired returns true if the current batch has expired. A batch
				1070	* is a set of reads or a set of writes.
				1071	*/
				1072	static inline int as_batch_expired(struct as_data *ad)
				1073	{
				1074	if (ad->changed_batch \|\| ad->new_batch)
				1075	return 0;
				1076
				1077	if (ad->batch_data_dir == REQ_SYNC)
				1078	/* TODO! add a check so a complete fifo gets written? */
				1079	return time_after(jiffies, ad->current_batch_expires);
				1080
				1081	return time_after(jiffies, ad->current_batch_expires)
				1082	\|\| ad->current_write_count == 0;
				1083	}
				1084
				1085	/*
				1086	* move an entry to dispatch queue
				1087	*/
				1088	static void as_move_to_dispatch(struct as_data ad, struct as_rq arq)
				1089	{
				1090	struct request *rq = arq->request;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1091	const int data_dir = arq->is_sync;
				1092
				1093	BUG_ON(!ON_RB(&arq->rb_node));
				1094
				1095	as_antic_stop(ad);
				1096	ad->antic_status = ANTIC_OFF;
				1097
				1098	/*
				1099	* This has to be set in order to be correctly updated by
				1100	* as_find_next_arq
				1101	*/
				1102	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
				1103
				1104	if (data_dir == REQ_SYNC) {
				1105	/* In case we have to anticipate after this */
				1106	copy_io_context(&ad->io_context, &arq->io_context);
				1107	} else {
				1108	if (ad->io_context) {
				1109	put_io_context(ad->io_context);
				1110	ad->io_context = NULL;
				1111	}
				1112
				1113	if (ad->current_write_count != 0)
				1114	ad->current_write_count--;
				1115	}
				1116	ad->ioc_finished = 0;
				1117
				1118	ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
				1119
				1120	/*
				1121	* take it off the sort and fifo list, add to dispatch queue
				1122	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1123	while (!list_empty(&rq->queuelist)) {
				1124	struct request *__rq = list_entry_rq(rq->queuelist.next);
				1125	struct as_rq *__arq = RQ_DATA(__rq);
				1126
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1127	list_del(&__rq->queuelist);
				1128
				1129	elv_dispatch_add_tail(ad->q, __rq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1130
				1131	if (__arq->io_context && __arq->io_context->aic)
				1132	atomic_inc(&__arq->io_context->aic->nr_dispatched);
				1133
				1134	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1135	__arq->state = AS_RQ_DISPATCHED;
				1136
				1137	ad->nr_dispatched++;
				1138	}
				1139
				1140	as_remove_queued_request(ad->q, rq);
				1141	WARN_ON(arq->state != AS_RQ_QUEUED);
				1142
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1143	elv_dispatch_sort(ad->q, rq);
				1144
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1145	arq->state = AS_RQ_DISPATCHED;
				1146	if (arq->io_context && arq->io_context->aic)
				1147	atomic_inc(&arq->io_context->aic->nr_dispatched);
				1148	ad->nr_dispatched++;
				1149	}
				1150
				1151	/*
				1152	* as_dispatch_request selects the best request according to
				1153	* read/write expire, batch expire, etc, and moves it to the dispatch
				1154	* queue. Returns 1 if a request was found, 0 otherwise.
				1155	*/
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1156	static int as_dispatch_request(request_queue_t *q, int force)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1157	{
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1158	struct as_data *ad = q->elevator->elevator_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1159	struct as_rq *arq;
				1160	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
				1161	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
				1162
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1163	if (unlikely(force)) {
				1164	/*
				1165	* Forced dispatch, accounting is useless. Reset
				1166	* accounting states and dump fifo_lists. Note that
				1167	* batch_data_dir is reset to REQ_SYNC to avoid
				1168	* screwing write batch accounting as write batch
				1169	* accounting occurs on W->R transition.
				1170	*/
				1171	int dispatched = 0;
				1172
				1173	ad->batch_data_dir = REQ_SYNC;
				1174	ad->changed_batch = 0;
				1175	ad->new_batch = 0;
				1176
				1177	while (ad->next_arq[REQ_SYNC]) {
				1178	as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
				1179	dispatched++;
				1180	}
				1181	ad->last_check_fifo[REQ_SYNC] = jiffies;
				1182
				1183	while (ad->next_arq[REQ_ASYNC]) {
				1184	as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
				1185	dispatched++;
				1186	}
				1187	ad->last_check_fifo[REQ_ASYNC] = jiffies;
				1188
				1189	return dispatched;
				1190	}
				1191
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1192	/* Signal that the write batch was uncontended, so we can't time it */
				1193	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
				1194	if (ad->current_write_count == 0 \|\| !writes)
				1195	ad->write_batch_idled = 1;
				1196	}
				1197
				1198	if (!(reads \|\| writes)
				1199	\|\| ad->antic_status == ANTIC_WAIT_REQ
				1200	\|\| ad->antic_status == ANTIC_WAIT_NEXT
				1201	\|\| ad->changed_batch)
				1202	return 0;
				1203
				1204	if (!(reads && writes && as_batch_expired(ad)) ) {
				1205	/*
				1206	* batch is still running or no reads or no writes
				1207	*/
				1208	arq = ad->next_arq[ad->batch_data_dir];
				1209
				1210	if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
				1211	if (as_fifo_expired(ad, REQ_SYNC))
				1212	goto fifo_expired;
				1213
				1214	if (as_can_anticipate(ad, arq)) {
				1215	as_antic_waitreq(ad);
				1216	return 0;
				1217	}
				1218	}
				1219
				1220	if (arq) {
				1221	/* we have a "next request" */
				1222	if (reads && !writes)
				1223	ad->current_batch_expires =
				1224	jiffies + ad->batch_expire[REQ_SYNC];
				1225	goto dispatch_request;
				1226	}
				1227	}
				1228
				1229	/*
				1230	* at this point we are not running a batch. select the appropriate
				1231	* data direction (read / write)
				1232	*/
				1233
				1234	if (reads) {
				1235	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC]));
				1236
				1237	if (writes && ad->batch_data_dir == REQ_SYNC)
				1238	/*
				1239	* Last batch was a read, switch to writes
				1240	*/
				1241	goto dispatch_writes;
				1242
				1243	if (ad->batch_data_dir == REQ_ASYNC) {
				1244	WARN_ON(ad->new_batch);
				1245	ad->changed_batch = 1;
				1246	}
				1247	ad->batch_data_dir = REQ_SYNC;
				1248	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1249	ad->last_check_fifo[ad->batch_data_dir] = jiffies;
				1250	goto dispatch_request;
				1251	}
				1252
				1253	/*
				1254	* the last batch was a read
				1255	*/
				1256
				1257	if (writes) {
				1258	dispatch_writes:
				1259	BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC]));
				1260
				1261	if (ad->batch_data_dir == REQ_SYNC) {
				1262	ad->changed_batch = 1;
				1263
				1264	/*
				1265	* new_batch might be 1 when the queue runs out of
				1266	* reads. A subsequent submission of a write might
				1267	* cause a change of batch before the read is finished.
				1268	*/
				1269	ad->new_batch = 0;
				1270	}
				1271	ad->batch_data_dir = REQ_ASYNC;
				1272	ad->current_write_count = ad->write_batch_count;
				1273	ad->write_batch_idled = 0;
				1274	arq = ad->next_arq[ad->batch_data_dir];
				1275	goto dispatch_request;
				1276	}
				1277
				1278	BUG();
				1279	return 0;
				1280
				1281	dispatch_request:
				1282	/*
				1283	* If a request has expired, service it.
				1284	*/
				1285
				1286	if (as_fifo_expired(ad, ad->batch_data_dir)) {
				1287	fifo_expired:
				1288	arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
				1289	BUG_ON(arq == NULL);
				1290	}
				1291
				1292	if (ad->changed_batch) {
				1293	WARN_ON(ad->new_batch);
				1294
				1295	if (ad->nr_dispatched)
				1296	return 0;
				1297
				1298	if (ad->batch_data_dir == REQ_ASYNC)
				1299	ad->current_batch_expires = jiffies +
				1300	ad->batch_expire[REQ_ASYNC];
				1301	else
				1302	ad->new_batch = 1;
				1303
				1304	ad->changed_batch = 0;
				1305	}
				1306
				1307	/*
				1308	* arq is the selected appropriate request.
				1309	*/
				1310	as_move_to_dispatch(ad, arq);
				1311
				1312	return 1;
				1313	}
				1314
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1315	/*
				1316	* Add arq to a list behind alias
				1317	*/
				1318	static inline void
				1319	as_add_aliased_request(struct as_data ad, struct as_rq arq, struct as_rq *alias)
				1320	{
				1321	struct request *req = arq->request;
				1322	struct list_head *insert = alias->request->queuelist.prev;
				1323
				1324	/*
				1325	* Transfer list of aliases
				1326	*/
				1327	while (!list_empty(&req->queuelist)) {
				1328	struct request *__rq = list_entry_rq(req->queuelist.next);
				1329	struct as_rq *__arq = RQ_DATA(__rq);
				1330
				1331	list_move_tail(&__rq->queuelist, &alias->request->queuelist);
				1332
				1333	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1334	}
				1335
				1336	/*
				1337	* Another request with the same start sector on the rbtree.
				1338	* Link this request to that sector. They are untangled in
				1339	* as_move_to_dispatch
				1340	*/
				1341	list_add(&arq->request->queuelist, insert);
				1342
				1343	/*
				1344	* Don't want to have to handle merges.
				1345	*/
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame^]	1346	as_del_arq_hash(arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1347	}
				1348
				1349	/*
				1350	* add arq to rbtree and fifo
				1351	*/
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1352	static void as_add_request(request_queue_t q, struct request rq)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1353	{
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1354	struct as_data *ad = q->elevator->elevator_data;
				1355	struct as_rq *arq = RQ_DATA(rq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1356	struct as_rq *alias;
				1357	int data_dir;
				1358
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1359	if (arq->state != AS_RQ_PRESCHED) {
				1360	printk("arq->state: %d\n", arq->state);
				1361	WARN_ON(1);
				1362	}
				1363	arq->state = AS_RQ_NEW;
				1364
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1365	if (rq_data_dir(arq->request) == READ
				1366	\|\| current->flags&PF_SYNCWRITE)
				1367	arq->is_sync = 1;
				1368	else
				1369	arq->is_sync = 0;
				1370	data_dir = arq->is_sync;
				1371
				1372	arq->io_context = as_get_io_context();
				1373
				1374	if (arq->io_context) {
				1375	as_update_iohist(ad, arq->io_context->aic, arq->request);
				1376	atomic_inc(&arq->io_context->aic->nr_queued);
				1377	}
				1378
				1379	alias = as_add_arq_rb(ad, arq);
				1380	if (!alias) {
				1381	/*
				1382	* set expire time (only used for reads) and add to fifo list
				1383	*/
				1384	arq->expires = jiffies + ad->fifo_expire[data_dir];
				1385	list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
				1386
Tejun Heo	98b1147	2005-10-20 16:46:54 +0200	[diff] [blame^]	1387	if (rq_mergeable(arq->request))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1388	as_add_arq_hash(ad, arq);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1389	as_update_arq(ad, arq); /* keep state machine up to date */
				1390
				1391	} else {
				1392	as_add_aliased_request(ad, arq, alias);
				1393
				1394	/*
				1395	* have we been anticipating this request?
				1396	* or does it come from the same process as the one we are
				1397	* anticipating for?
				1398	*/
				1399	if (ad->antic_status == ANTIC_WAIT_REQ
				1400	\|\| ad->antic_status == ANTIC_WAIT_NEXT) {
				1401	if (as_can_break_anticipation(ad, arq))
				1402	as_antic_stop(ad);
				1403	}
				1404	}
				1405
				1406	arq->state = AS_RQ_QUEUED;
				1407	}
				1408
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1409	static void as_activate_request(request_queue_t q, struct request rq)
				1410	{
				1411	struct as_rq *arq = RQ_DATA(rq);
				1412
				1413	WARN_ON(arq->state != AS_RQ_DISPATCHED);
				1414	arq->state = AS_RQ_REMOVED;
				1415	if (arq->io_context && arq->io_context->aic)
				1416	atomic_dec(&arq->io_context->aic->nr_dispatched);
				1417	}
				1418
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1419	static void as_deactivate_request(request_queue_t q, struct request rq)
				1420	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1421	struct as_rq *arq = RQ_DATA(rq);
				1422
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1423	WARN_ON(arq->state != AS_RQ_REMOVED);
				1424	arq->state = AS_RQ_DISPATCHED;
				1425	if (arq->io_context && arq->io_context->aic)
				1426	atomic_inc(&arq->io_context->aic->nr_dispatched);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1427	}
				1428
				1429	/*
				1430	* as_queue_empty tells us if there are requests left in the device. It may
				1431	* not be the case that a driver can get the next request even if the queue
				1432	* is not empty - it is used in the block layer to check for plugging and
				1433	* merging opportunities
				1434	*/
				1435	static int as_queue_empty(request_queue_t *q)
				1436	{
				1437	struct as_data *ad = q->elevator->elevator_data;
				1438
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1439	return list_empty(&ad->fifo_list[REQ_ASYNC])
				1440	&& list_empty(&ad->fifo_list[REQ_SYNC]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1441	}
				1442
				1443	static struct request *
				1444	as_former_request(request_queue_t q, struct request rq)
				1445	{
				1446	struct as_rq *arq = RQ_DATA(rq);
				1447	struct rb_node *rbprev = rb_prev(&arq->rb_node);
				1448	struct request *ret = NULL;
				1449
				1450	if (rbprev)
				1451	ret = rb_entry_arq(rbprev)->request;
				1452
				1453	return ret;
				1454	}
				1455
				1456	static struct request *
				1457	as_latter_request(request_queue_t q, struct request rq)
				1458	{
				1459	struct as_rq *arq = RQ_DATA(rq);
				1460	struct rb_node *rbnext = rb_next(&arq->rb_node);
				1461	struct request *ret = NULL;
				1462
				1463	if (rbnext)
				1464	ret = rb_entry_arq(rbnext)->request;
				1465
				1466	return ret;
				1467	}
				1468
				1469	static int
				1470	as_merge(request_queue_t q, struct request req, struct bio bio)
				1471	{
				1472	struct as_data *ad = q->elevator->elevator_data;
				1473	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
				1474	struct request *__rq;
				1475	int ret;
				1476
				1477	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1478	* see if the merge hash can satisfy a back merge
				1479	*/
				1480	__rq = as_find_arq_hash(ad, bio->bi_sector);
				1481	if (__rq) {
				1482	BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
				1483
				1484	if (elv_rq_merge_ok(__rq, bio)) {
				1485	ret = ELEVATOR_BACK_MERGE;
				1486	goto out;
				1487	}
				1488	}
				1489
				1490	/*
				1491	* check for front merge
				1492	*/
				1493	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
				1494	if (__rq) {
				1495	BUG_ON(rb_key != rq_rb_key(__rq));
				1496
				1497	if (elv_rq_merge_ok(__rq, bio)) {
				1498	ret = ELEVATOR_FRONT_MERGE;
				1499	goto out;
				1500	}
				1501	}
				1502
				1503	return ELEVATOR_NO_MERGE;
				1504	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1505	if (ret) {
				1506	if (rq_mergeable(__rq))
				1507	as_hot_arq_hash(ad, RQ_DATA(__rq));
				1508	}
				1509	*req = __rq;
				1510	return ret;
				1511	}
				1512
				1513	static void as_merged_request(request_queue_t q, struct request req)
				1514	{
				1515	struct as_data *ad = q->elevator->elevator_data;
				1516	struct as_rq *arq = RQ_DATA(req);
				1517
				1518	/*
				1519	* hash always needs to be repositioned, key is end sector
				1520	*/
				1521	as_del_arq_hash(arq);
				1522	as_add_arq_hash(ad, arq);
				1523
				1524	/*
				1525	* if the merge was a front merge, we need to reposition request
				1526	*/
				1527	if (rq_rb_key(req) != arq->rb_key) {
				1528	struct as_rq alias, next_arq = NULL;
				1529
				1530	if (ad->next_arq[arq->is_sync] == arq)
				1531	next_arq = as_find_next_arq(ad, arq);
				1532
				1533	/*
				1534	* Note! We should really be moving any old aliased requests
				1535	* off this request and try to insert them into the rbtree. We
				1536	* currently don't bother. Ditto the next function.
				1537	*/
				1538	as_del_arq_rb(ad, arq);
				1539	if ((alias = as_add_arq_rb(ad, arq)) ) {
				1540	list_del_init(&arq->fifo);
				1541	as_add_aliased_request(ad, arq, alias);
				1542	if (next_arq)
				1543	ad->next_arq[arq->is_sync] = next_arq;
				1544	}
				1545	/*
				1546	* Note! At this stage of this and the next function, our next
				1547	* request may not be optimal - eg the request may have "grown"
				1548	* behind the disk head. We currently don't bother adjusting.
				1549	*/
				1550	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1551	}
				1552
				1553	static void
				1554	as_merged_requests(request_queue_t q, struct request req,
				1555	struct request *next)
				1556	{
				1557	struct as_data *ad = q->elevator->elevator_data;
				1558	struct as_rq *arq = RQ_DATA(req);
				1559	struct as_rq *anext = RQ_DATA(next);
				1560
				1561	BUG_ON(!arq);
				1562	BUG_ON(!anext);
				1563
				1564	/*
				1565	* reposition arq (this is the merged request) in hash, and in rbtree
				1566	* in case of a front merge
				1567	*/
				1568	as_del_arq_hash(arq);
				1569	as_add_arq_hash(ad, arq);
				1570
				1571	if (rq_rb_key(req) != arq->rb_key) {
				1572	struct as_rq alias, next_arq = NULL;
				1573
				1574	if (ad->next_arq[arq->is_sync] == arq)
				1575	next_arq = as_find_next_arq(ad, arq);
				1576
				1577	as_del_arq_rb(ad, arq);
				1578	if ((alias = as_add_arq_rb(ad, arq)) ) {
				1579	list_del_init(&arq->fifo);
				1580	as_add_aliased_request(ad, arq, alias);
				1581	if (next_arq)
				1582	ad->next_arq[arq->is_sync] = next_arq;
				1583	}
				1584	}
				1585
				1586	/*
				1587	* if anext expires before arq, assign its expire time to arq
				1588	* and move into anext position (anext will be deleted) in fifo
				1589	*/
				1590	if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
				1591	if (time_before(anext->expires, arq->expires)) {
				1592	list_move(&arq->fifo, &anext->fifo);
				1593	arq->expires = anext->expires;
				1594	/*
				1595	* Don't copy here but swap, because when anext is
				1596	* removed below, it must contain the unused context
				1597	*/
				1598	swap_io_context(&arq->io_context, &anext->io_context);
				1599	}
				1600	}
				1601
				1602	/*
				1603	* Transfer list of aliases
				1604	*/
				1605	while (!list_empty(&next->queuelist)) {
				1606	struct request *__rq = list_entry_rq(next->queuelist.next);
				1607	struct as_rq *__arq = RQ_DATA(__rq);
				1608
				1609	list_move_tail(&__rq->queuelist, &req->queuelist);
				1610
				1611	WARN_ON(__arq->state != AS_RQ_QUEUED);
				1612	}
				1613
				1614	/*
				1615	* kill knowledge of next, this one is a goner
				1616	*/
				1617	as_remove_queued_request(q, next);
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1618	as_put_io_context(anext);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1619
				1620	anext->state = AS_RQ_MERGED;
				1621	}
				1622
				1623	/*
				1624	* This is executed in a "deferred" process context, by kblockd. It calls the
				1625	* driver's request_fn so the driver can submit that request.
				1626	*
				1627	* IMPORTANT! This guy will reenter the elevator, so set up all queue global
				1628	* state before calling, and don't rely on any state over calls.
				1629	*
				1630	* FIXME! dispatch queue is not a queue at all!
				1631	*/
				1632	static void as_work_handler(void *data)
				1633	{
				1634	struct request_queue *q = data;
				1635	unsigned long flags;
				1636
				1637	spin_lock_irqsave(q->queue_lock, flags);
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1638	if (!as_queue_empty(q))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1639	q->request_fn(q);
				1640	spin_unlock_irqrestore(q->queue_lock, flags);
				1641	}
				1642
				1643	static void as_put_request(request_queue_t q, struct request rq)
				1644	{
				1645	struct as_data *ad = q->elevator->elevator_data;
				1646	struct as_rq *arq = RQ_DATA(rq);
				1647
				1648	if (!arq) {
				1649	WARN_ON(1);
				1650	return;
				1651	}
				1652
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1653	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
				1654	arq->state != AS_RQ_PRESCHED &&
				1655	arq->state != AS_RQ_MERGED)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1656	printk("arq->state %d\n", arq->state);
				1657	WARN_ON(1);
				1658	}
				1659
				1660	mempool_free(arq, ad->arq_pool);
				1661	rq->elevator_private = NULL;
				1662	}
				1663
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1664	static int as_set_request(request_queue_t q, struct request rq,
				1665	struct bio *bio, int gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1666	{
				1667	struct as_data *ad = q->elevator->elevator_data;
				1668	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
				1669
				1670	if (arq) {
				1671	memset(arq, 0, sizeof(*arq));
				1672	RB_CLEAR(&arq->rb_node);
				1673	arq->request = rq;
				1674	arq->state = AS_RQ_PRESCHED;
				1675	arq->io_context = NULL;
				1676	INIT_LIST_HEAD(&arq->hash);
				1677	arq->on_hash = 0;
				1678	INIT_LIST_HEAD(&arq->fifo);
				1679	rq->elevator_private = arq;
				1680	return 0;
				1681	}
				1682
				1683	return 1;
				1684	}
				1685
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	1686	static int as_may_queue(request_queue_t q, int rw, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1687	{
				1688	int ret = ELV_MQUEUE_MAY;
				1689	struct as_data *ad = q->elevator->elevator_data;
				1690	struct io_context *ioc;
				1691	if (ad->antic_status == ANTIC_WAIT_REQ \|\|
				1692	ad->antic_status == ANTIC_WAIT_NEXT) {
				1693	ioc = as_get_io_context();
				1694	if (ad->io_context == ioc)
				1695	ret = ELV_MQUEUE_MUST;
				1696	put_io_context(ioc);
				1697	}
				1698
				1699	return ret;
				1700	}
				1701
				1702	static void as_exit_queue(elevator_t *e)
				1703	{
				1704	struct as_data *ad = e->elevator_data;
				1705
				1706	del_timer_sync(&ad->antic_timer);
				1707	kblockd_flush();
				1708
				1709	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
				1710	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
				1711
				1712	mempool_destroy(ad->arq_pool);
				1713	put_io_context(ad->io_context);
				1714	kfree(ad->hash);
				1715	kfree(ad);
				1716	}
				1717
				1718	/*
				1719	* initialize elevator private data (as_data), and alloc a arq for
				1720	* each request on the free lists
				1721	*/
				1722	static int as_init_queue(request_queue_t q, elevator_t e)
				1723	{
				1724	struct as_data *ad;
				1725	int i;
				1726
				1727	if (!arq_pool)
				1728	return -ENOMEM;
				1729
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1730	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1731	if (!ad)
				1732	return -ENOMEM;
				1733	memset(ad, 0, sizeof(*ad));
				1734
				1735	ad->q = q; /* Identify what queue the data belongs to */
				1736
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1737	ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES,
				1738	GFP_KERNEL, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1739	if (!ad->hash) {
				1740	kfree(ad);
				1741	return -ENOMEM;
				1742	}
				1743
Christoph Lameter	1946089	2005-06-23 00:08:19 -0700	[diff] [blame]	1744	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
				1745	mempool_free_slab, arq_pool, q->node);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1746	if (!ad->arq_pool) {
				1747	kfree(ad->hash);
				1748	kfree(ad);
				1749	return -ENOMEM;
				1750	}
				1751
				1752	/* anticipatory scheduling helpers */
				1753	ad->antic_timer.function = as_antic_timeout;
				1754	ad->antic_timer.data = (unsigned long)q;
				1755	init_timer(&ad->antic_timer);
				1756	INIT_WORK(&ad->antic_work, as_work_handler, q);
				1757
				1758	for (i = 0; i < AS_HASH_ENTRIES; i++)
				1759	INIT_LIST_HEAD(&ad->hash[i]);
				1760
				1761	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
				1762	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
				1763	ad->sort_list[REQ_SYNC] = RB_ROOT;
				1764	ad->sort_list[REQ_ASYNC] = RB_ROOT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1765	ad->fifo_expire[REQ_SYNC] = default_read_expire;
				1766	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
				1767	ad->antic_expire = default_antic_expire;
				1768	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
				1769	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
				1770	e->elevator_data = ad;
				1771
				1772	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
				1773	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
				1774	if (ad->write_batch_count < 2)
				1775	ad->write_batch_count = 2;
				1776
				1777	return 0;
				1778	}
				1779
				1780	/*
				1781	* sysfs parts below
				1782	*/
				1783	struct as_fs_entry {
				1784	struct attribute attr;
				1785	ssize_t (show)(struct as_data , char *);
				1786	ssize_t (store)(struct as_data , const char *, size_t);
				1787	};
				1788
				1789	static ssize_t
				1790	as_var_show(unsigned int var, char *page)
				1791	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1792	return sprintf(page, "%d\n", var);
				1793	}
				1794
				1795	static ssize_t
				1796	as_var_store(unsigned long var, const char page, size_t count)
				1797	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1798	char p = (char ) page;
				1799
Jens Axboe	c9b3ad6	2005-07-27 11:43:37 -0700	[diff] [blame]	1800	*var = simple_strtoul(p, &p, 10);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1801	return count;
				1802	}
				1803
				1804	static ssize_t as_est_show(struct as_data ad, char page)
				1805	{
				1806	int pos = 0;
				1807
				1808	pos += sprintf(page+pos, "%lu %% exit probability\n", 100*ad->exit_prob/256);
				1809	pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean);
				1810	pos += sprintf(page+pos, "%llu sectors new seek distance\n", (unsigned long long)ad->new_seek_mean);
				1811
				1812	return pos;
				1813	}
				1814
				1815	#define SHOW_FUNCTION(__FUNC, __VAR) \
				1816	static ssize_t __FUNC(struct as_data ad, char page) \
				1817	{ \
				1818	return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
				1819	}
				1820	SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]);
				1821	SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]);
				1822	SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire);
				1823	SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]);
				1824	SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]);
				1825	#undef SHOW_FUNCTION
				1826
				1827	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
				1828	static ssize_t __FUNC(struct as_data ad, const char page, size_t count) \
				1829	{ \
				1830	int ret = as_var_store(__PTR, (page), count); \
				1831	if (*(__PTR) < (MIN)) \
				1832	*(__PTR) = (MIN); \
				1833	else if (*(__PTR) > (MAX)) \
				1834	*(__PTR) = (MAX); \
				1835	(__PTR) = msecs_to_jiffies((__PTR)); \
				1836	return ret; \
				1837	}
				1838	STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
				1839	STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
				1840	STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX);
				1841	STORE_FUNCTION(as_read_batchexpire_store,
				1842	&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
				1843	STORE_FUNCTION(as_write_batchexpire_store,
				1844	&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
				1845	#undef STORE_FUNCTION
				1846
				1847	static struct as_fs_entry as_est_entry = {
				1848	.attr = {.name = "est_time", .mode = S_IRUGO },
				1849	.show = as_est_show,
				1850	};
				1851	static struct as_fs_entry as_readexpire_entry = {
				1852	.attr = {.name = "read_expire", .mode = S_IRUGO \| S_IWUSR },
				1853	.show = as_readexpire_show,
				1854	.store = as_readexpire_store,
				1855	};
				1856	static struct as_fs_entry as_writeexpire_entry = {
				1857	.attr = {.name = "write_expire", .mode = S_IRUGO \| S_IWUSR },
				1858	.show = as_writeexpire_show,
				1859	.store = as_writeexpire_store,
				1860	};
				1861	static struct as_fs_entry as_anticexpire_entry = {
				1862	.attr = {.name = "antic_expire", .mode = S_IRUGO \| S_IWUSR },
				1863	.show = as_anticexpire_show,
				1864	.store = as_anticexpire_store,
				1865	};
				1866	static struct as_fs_entry as_read_batchexpire_entry = {
				1867	.attr = {.name = "read_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				1868	.show = as_read_batchexpire_show,
				1869	.store = as_read_batchexpire_store,
				1870	};
				1871	static struct as_fs_entry as_write_batchexpire_entry = {
				1872	.attr = {.name = "write_batch_expire", .mode = S_IRUGO \| S_IWUSR },
				1873	.show = as_write_batchexpire_show,
				1874	.store = as_write_batchexpire_store,
				1875	};
				1876
				1877	static struct attribute *default_attrs[] = {
				1878	&as_est_entry.attr,
				1879	&as_readexpire_entry.attr,
				1880	&as_writeexpire_entry.attr,
				1881	&as_anticexpire_entry.attr,
				1882	&as_read_batchexpire_entry.attr,
				1883	&as_write_batchexpire_entry.attr,
				1884	NULL,
				1885	};
				1886
				1887	#define to_as(atr) container_of((atr), struct as_fs_entry, attr)
				1888
				1889	static ssize_t
				1890	as_attr_show(struct kobject kobj, struct attribute attr, char *page)
				1891	{
				1892	elevator_t *e = container_of(kobj, elevator_t, kobj);
				1893	struct as_fs_entry *entry = to_as(attr);
				1894
				1895	if (!entry->show)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	1896	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1897
				1898	return entry->show(e->elevator_data, page);
				1899	}
				1900
				1901	static ssize_t
				1902	as_attr_store(struct kobject kobj, struct attribute attr,
				1903	const char *page, size_t length)
				1904	{
				1905	elevator_t *e = container_of(kobj, elevator_t, kobj);
				1906	struct as_fs_entry *entry = to_as(attr);
				1907
				1908	if (!entry->store)
Dmitry Torokhov	6c1852a	2005-04-29 01:26:06 -0500	[diff] [blame]	1909	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1910
				1911	return entry->store(e->elevator_data, page, length);
				1912	}
				1913
				1914	static struct sysfs_ops as_sysfs_ops = {
				1915	.show = as_attr_show,
				1916	.store = as_attr_store,
				1917	};
				1918
				1919	static struct kobj_type as_ktype = {
				1920	.sysfs_ops = &as_sysfs_ops,
				1921	.default_attrs = default_attrs,
				1922	};
				1923
				1924	static struct elevator_type iosched_as = {
				1925	.ops = {
				1926	.elevator_merge_fn = as_merge,
				1927	.elevator_merged_fn = as_merged_request,
				1928	.elevator_merge_req_fn = as_merged_requests,
Jens Axboe	b4878f2	2005-10-20 16:42:29 +0200	[diff] [blame]	1929	.elevator_dispatch_fn = as_dispatch_request,
				1930	.elevator_add_req_fn = as_add_request,
				1931	.elevator_activate_req_fn = as_activate_request,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1932	.elevator_deactivate_req_fn = as_deactivate_request,
				1933	.elevator_queue_empty_fn = as_queue_empty,
				1934	.elevator_completed_req_fn = as_completed_request,
				1935	.elevator_former_req_fn = as_former_request,
				1936	.elevator_latter_req_fn = as_latter_request,
				1937	.elevator_set_req_fn = as_set_request,
				1938	.elevator_put_req_fn = as_put_request,
				1939	.elevator_may_queue_fn = as_may_queue,
				1940	.elevator_init_fn = as_init_queue,
				1941	.elevator_exit_fn = as_exit_queue,
				1942	},
				1943
				1944	.elevator_ktype = &as_ktype,
				1945	.elevator_name = "anticipatory",
				1946	.elevator_owner = THIS_MODULE,
				1947	};
				1948
				1949	static int __init as_init(void)
				1950	{
				1951	int ret;
				1952
				1953	arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
				1954	0, 0, NULL, NULL);
				1955	if (!arq_pool)
				1956	return -ENOMEM;
				1957
				1958	ret = elv_register(&iosched_as);
				1959	if (!ret) {
				1960	/*
				1961	* don't allow AS to get unregistered, since we would have
				1962	* to browse all tasks in the system and release their
				1963	* as_io_context first
				1964	*/
				1965	__module_get(THIS_MODULE);
				1966	return 0;
				1967	}
				1968
				1969	kmem_cache_destroy(arq_pool);
				1970	return ret;
				1971	}
				1972
				1973	static void __exit as_exit(void)
				1974	{
				1975	kmem_cache_destroy(arq_pool);
				1976	elv_unregister(&iosched_as);
				1977	}
				1978
				1979	module_init(as_init);
				1980	module_exit(as_exit);
				1981
				1982	MODULE_AUTHOR("Nick Piggin");
				1983	MODULE_LICENSE("GPL");
				1984	MODULE_DESCRIPTION("anticipatory IO scheduler");