Blame - drivers/staging/lustre/lustre/llite/rw.c - kernel/msm-4.9

blob: 336397773fbb5d911c54081624405818237f13fe [file] [log] [blame]

Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1	/*
				2	* GPL HEADER START
				3	*
				4	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				5	*
				6	* This program is free software; you can redistribute it and/or modify
				7	* it under the terms of the GNU General Public License version 2 only,
				8	* as published by the Free Software Foundation.
				9	*
				10	* This program is distributed in the hope that it will be useful, but
				11	* WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	* General Public License version 2 for more details (a copy is included
				14	* in the LICENSE file that accompanied this code).
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* version 2 along with this program; If not, see
				18	* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
				19	*
				20	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				21	* CA 95054 USA or visit www.sun.com if you need additional information or
				22	* have any questions.
				23	*
				24	* GPL HEADER END
				25	*/
				26	/*
				27	* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
				28	* Use is subject to license terms.
				29	*
Andreas Dilger	1dc563a	2015-11-08 18:09:37 -0500	[diff] [blame]	30	* Copyright (c) 2011, 2015, Intel Corporation.
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	31	*/
				32	/*
				33	* This file is part of Lustre, http://www.lustre.org/
				34	* Lustre is a trademark of Sun Microsystems, Inc.
				35	*
				36	* lustre/llite/rw.c
				37	*
				38	* Lustre Lite I/O page cache routines shared by different kernel revs
				39	*/
				40
				41	#include <linux/kernel.h>
				42	#include <linux/mm.h>
				43	#include <linux/string.h>
				44	#include <linux/stat.h>
				45	#include <linux/errno.h>
				46	#include <linux/unistd.h>
				47	#include <linux/writeback.h>
Asaf Vertz	e8fd99f	2015-02-09 12:00:50 +0200	[diff] [blame]	48	#include <linux/uaccess.h>
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	49
				50	#include <linux/fs.h>
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	51	#include <linux/pagemap.h>
				52	/* current_is_kswapd() */
				53	#include <linux/swap.h>
				54
				55	#define DEBUG_SUBSYSTEM S_LLITE
				56
Greg Kroah-Hartman	67a235f	2014-07-11 21:51:41 -0700	[diff] [blame]	57	#include "../include/lustre_lite.h"
				58	#include "../include/obd_cksum.h"
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	59	#include "llite_internal.h"
Greg Kroah-Hartman	67a235f	2014-07-11 21:51:41 -0700	[diff] [blame]	60	#include "../include/linux/lustre_compat25.h"
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	61
				62	/**
				63	* Finalizes cl-data before exiting typical address_space operation. Dual to
				64	* ll_cl_init().
				65	*/
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	66	void ll_cl_fini(struct ll_cl_context *lcc)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	67	{
				68	struct lu_env *env = lcc->lcc_env;
				69	struct cl_io *io = lcc->lcc_io;
				70	struct cl_page *page = lcc->lcc_page;
				71
				72	LASSERT(lcc->lcc_cookie == current);
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	73	LASSERT(env);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	74
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	75	if (page) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	76	lu_ref_del(&page->cp_reference, "cl_io", io);
				77	cl_page_put(env, page);
				78	}
				79
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	80	cl_env_put(env, &lcc->lcc_refcheck);
				81	}
				82
				83	/**
				84	* Initializes common cl-data at the typical address_space operation entry
				85	* point.
				86	*/
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	87	struct ll_cl_context ll_cl_init(struct file file, struct page *vmpage)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	88	{
				89	struct ll_cl_context *lcc;
				90	struct lu_env *env;
				91	struct cl_io *io;
				92	struct cl_object *clob;
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	93	struct vvp_io *vio;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	94
				95	int refcheck;
				96	int result = 0;
				97
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	98	clob = ll_i2info(file_inode(file))->lli_clob;
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	99	LASSERT(clob);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	100
				101	env = cl_env_get(&refcheck);
				102	if (IS_ERR(env))
Laurent Navet	1dc8548	2013-07-05 14:26:33 +0200	[diff] [blame]	103	return ERR_CAST(env);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	104
John Hammond	9989a58	2016-03-30 19:48:56 -0400	[diff] [blame]	105	lcc = &ll_env_info(env)->lti_io_ctx;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	106	memset(lcc, 0, sizeof(*lcc));
				107	lcc->lcc_env = env;
				108	lcc->lcc_refcheck = refcheck;
				109	lcc->lcc_cookie = current;
				110
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	111	vio = vvp_env_io(env);
				112	io = vio->vui_cl.cis_io;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	113	lcc->lcc_io = io;
Bobi Jam	06d2fcc	2016-04-12 21:11:09 -0400	[diff] [blame]	114	if (!io)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	115	result = -EIO;
Bobi Jam	06d2fcc	2016-04-12 21:11:09 -0400	[diff] [blame]	116
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	117	if (result == 0 && vmpage) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	118	struct cl_page *page;
				119
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	120	LASSERT(io->ci_state == CIS_IO_GOING);
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	121	LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	122	page = cl_page_find(env, clob, vmpage->index, vmpage,
				123	CPT_CACHEABLE);
				124	if (!IS_ERR(page)) {
				125	lcc->lcc_page = page;
				126	lu_ref_add(&page->cp_reference, "cl_io", io);
				127	result = 0;
Oleg Drokin	da5ecb4	2016-04-01 15:18:01 -0400	[diff] [blame]	128	} else {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	129	result = PTR_ERR(page);
Oleg Drokin	da5ecb4	2016-04-01 15:18:01 -0400	[diff] [blame]	130	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	131	}
				132	if (result) {
				133	ll_cl_fini(lcc);
				134	lcc = ERR_PTR(result);
				135	}
				136
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	137	return lcc;
				138	}
				139
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	140	static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
				141
				142	/**
				143	* Get readahead pages from the filesystem readahead pool of the client for a
				144	* thread.
				145	*
				146	* /param sbi superblock for filesystem readahead state ll_ra_info
				147	* /param ria per-thread readahead state
				148	* /param pages number of pages requested for readahead for the thread.
				149	*
				150	* WARNING: This algorithm is used to reduce contention on sbi->ll_lock.
				151	* It should work well if the ra_max_pages is much greater than the single
				152	* file's read-ahead window, and not too many threads contending for
				153	* these readahead pages.
				154	*
				155	* TODO: There may be a 'global sync problem' if many threads are trying
				156	* to get an ra budget that is larger than the remaining readahead pages
				157	* and reach here at exactly the same time. They will compute /a ret to
				158	* consume the remaining pages, but will fail at atomic_add_return() and
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	159	* get a zero ra window, although there is still ra space remaining. - Jay
				160	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	161	static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
				162	struct ra_io_arg *ria,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	163	unsigned long pages, unsigned long min)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	164	{
				165	struct ll_ra_info *ra = &sbi->ll_ra_info;
				166	long ret;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	167
				168	/* If read-ahead pages left are less than 1M, do not do read-ahead,
				169	* otherwise it will form small read RPC(< 1M), which hurt server
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	170	* performance a lot.
				171	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	172	ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	173	if (ret < 0 \|\| ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) {
				174	ret = 0;
				175	goto out;
				176	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	177
				178	/* If the non-strided (ria_pages == 0) readahead window
				179	* (ria_start + ret) has grown across an RPC boundary, then trim
				180	* readahead size by the amount beyond the RPC so it ends on an
				181	* RPC boundary. If the readahead window is already ending on
				182	* an RPC boundary (beyond_rpc == 0), or smaller than a full
				183	* RPC (beyond_rpc < ret) the readahead size is unchanged.
				184	* The (beyond_rpc != 0) check is skipped since the conditional
				185	* branch is more expensive than subtracting zero from the result.
				186	*
				187	* Strided read is left unaligned to avoid small fragments beyond
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	188	* the RPC boundary from needing an extra read RPC.
				189	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	190	if (ria->ria_pages == 0) {
				191	long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
Mike Rapoport	50ffcb7	2015-10-13 16:03:40 +0300	[diff] [blame]	192
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	193	if (/* beyond_rpc != 0 && */ beyond_rpc < ret)
				194	ret -= beyond_rpc;
				195	}
				196
				197	if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
				198	atomic_sub(ret, &ra->ra_cur_pages);
				199	ret = 0;
				200	}
				201
				202	out:
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	203	if (ret < min) {
				204	/* override ra limit for maximum performance */
				205	atomic_add(min - ret, &ra->ra_cur_pages);
				206	ret = min;
				207	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	208	return ret;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	209	}
				210
				211	void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
				212	{
				213	struct ll_ra_info *ra = &sbi->ll_ra_info;
Mike Rapoport	50ffcb7	2015-10-13 16:03:40 +0300	[diff] [blame]	214
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	215	atomic_sub(len, &ra->ra_cur_pages);
				216	}
				217
				218	static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
				219	{
				220	LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
				221	lprocfs_counter_incr(sbi->ll_ra_stats, which);
				222	}
				223
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	224	void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	225	{
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	226	struct ll_sb_info *sbi = ll_i2sbi(inode);
Mike Rapoport	50ffcb7	2015-10-13 16:03:40 +0300	[diff] [blame]	227
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	228	ll_ra_stats_inc_sbi(sbi, which);
				229	}
				230
				231	#define RAS_CDEBUG(ras) \
				232	CDEBUG(D_READA, \
				233	"lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
Oleg Drokin	72a87fc	2016-02-24 22:00:40 -0500	[diff] [blame]	234	"csr %lu sf %lu sp %lu sl %lu\n", \
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	235	ras->ras_last_readpage, ras->ras_consecutive_requests, \
				236	ras->ras_consecutive_pages, ras->ras_window_start, \
				237	ras->ras_window_len, ras->ras_next_readahead, \
				238	ras->ras_requests, ras->ras_request_index, \
				239	ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
				240	ras->ras_stride_pages, ras->ras_stride_length)
				241
				242	static int index_in_window(unsigned long index, unsigned long point,
				243	unsigned long before, unsigned long after)
				244	{
				245	unsigned long start = point - before, end = point + after;
				246
				247	if (start > point)
Oleg Drokin	defa220	2016-02-24 22:00:39 -0500	[diff] [blame]	248	start = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	249	if (end < point)
Oleg Drokin	defa220	2016-02-24 22:00:39 -0500	[diff] [blame]	250	end = ~0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	251
				252	return start <= index && index <= end;
				253	}
				254
John L. Hammond	bc4320a	2016-03-30 19:48:50 -0400	[diff] [blame]	255	void ll_ras_enter(struct file *f)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	256	{
John L. Hammond	bc4320a	2016-03-30 19:48:50 -0400	[diff] [blame]	257	struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
				258	struct ll_readahead_state *ras = &fd->fd_ras;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	259
				260	spin_lock(&ras->ras_lock);
				261	ras->ras_requests++;
				262	ras->ras_request_index = 0;
				263	ras->ras_consecutive_requests++;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	264	spin_unlock(&ras->ras_lock);
				265	}
				266
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	267	static int cl_read_ahead_page(const struct lu_env env, struct cl_io io,
				268	struct cl_page_list queue, struct cl_page page,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	269	struct cl_object clob, pgoff_t max_index)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	270	{
Jinshan Xiong	7addf40	2016-03-30 19:48:32 -0400	[diff] [blame]	271	struct page *vmpage = page->cp_vmpage;
John L. Hammond	3a52f80	2016-03-30 19:48:48 -0400	[diff] [blame]	272	struct vvp_page *vpg;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	273	int rc;
				274
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	275	rc = 0;
				276	cl_page_assume(env, io, page);
				277	lu_ref_add(&page->cp_reference, "ra", current);
John L. Hammond	3a52f80	2016-03-30 19:48:48 -0400	[diff] [blame]	278	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
				279	if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	280	CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
John L. Hammond	3a52f80	2016-03-30 19:48:48 -0400	[diff] [blame]	281	vvp_index(vpg), *max_index);
				282	if (max_index == 0 \|\| vvp_index(vpg) > max_index)
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	283	rc = cl_page_is_under_lock(env, io, page, max_index);
				284	if (rc == 0) {
John L. Hammond	3a52f80	2016-03-30 19:48:48 -0400	[diff] [blame]	285	vpg->vpg_defer_uptodate = 1;
				286	vpg->vpg_ra_used = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	287	cl_page_list_add(queue, page);
				288	rc = 1;
				289	} else {
Jinshan Xiong	d9d4790	2016-03-30 19:48:28 -0400	[diff] [blame]	290	cl_page_discard(env, io, page);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	291	rc = -ENOLCK;
				292	}
				293	} else {
				294	/* skip completed pages */
				295	cl_page_unassume(env, io, page);
				296	}
				297	lu_ref_del(&page->cp_reference, "ra", current);
				298	cl_page_put(env, page);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	299	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	300	}
				301
				302	/**
				303	* Initiates read-ahead of a page with given index.
				304	*
				305	* \retval +ve: page was added to \a queue.
				306	*
				307	* \retval -ENOLCK: there is no extent lock for this part of a file, stop
				308	* read-ahead.
				309	*
				310	* \retval -ve, 0: page wasn't added to \a queue for other reason.
				311	*/
				312	static int ll_read_ahead_page(const struct lu_env env, struct cl_io io,
				313	struct cl_page_list *queue,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	314	pgoff_t index, pgoff_t *max_index)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	315	{
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	316	struct cl_object *clob = io->ci_obj;
John L. Hammond	8c7b0e1	2016-03-30 19:48:47 -0400	[diff] [blame]	317	struct inode *inode = vvp_object_inode(clob);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	318	struct page *vmpage;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	319	struct cl_page *page;
				320	enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	321	int rc = 0;
				322	const char *msg = NULL;
				323
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	324	vmpage = grab_cache_page_nowait(inode->i_mapping, index);
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	325	if (vmpage) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	326	/* Check if vmpage was truncated or reclaimed */
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	327	if (vmpage->mapping == inode->i_mapping) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	328	page = cl_page_find(env, clob, vmpage->index,
				329	vmpage, CPT_CACHEABLE);
				330	if (!IS_ERR(page)) {
				331	rc = cl_read_ahead_page(env, io, queue,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	332	page, clob, max_index);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	333	if (rc == -ENOLCK) {
				334	which = RA_STAT_FAILED_MATCH;
				335	msg = "lock match failed";
				336	}
				337	} else {
				338	which = RA_STAT_FAILED_GRAB_PAGE;
				339	msg = "cl_page_find failed";
				340	}
				341	} else {
				342	which = RA_STAT_WRONG_GRAB_PAGE;
				343	msg = "g_c_p_n returned invalid page";
				344	}
				345	if (rc != 1)
				346	unlock_page(vmpage);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	347	put_page(vmpage);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	348	} else {
				349	which = RA_STAT_FAILED_GRAB_PAGE;
				350	msg = "g_c_p_n failed";
				351	}
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	352	if (msg) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	353	ll_ra_stats_inc(inode, which);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	354	CDEBUG(D_READA, "%s\n", msg);
				355	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	356	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	357	}
				358
				359	#define RIA_DEBUG(ria) \
				360	CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
				361	ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
				362	ria->ria_pages)
				363
				364	/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't
				365	* know what the actual RPC size is. If this needs to change, it makes more
				366	* sense to tune the i_blkbits value for the file based on the OSTs it is
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	367	* striped over, rather than having a constant value for all files here.
				368	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	369
Kirill A. Shutemov	ea1754a	2016-04-01 15:29:48 +0300	[diff] [blame]	370	/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)).
Masanari Iida	d0a0acc	2014-03-08 22:58:32 +0900	[diff] [blame]	371	* Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	372	* by default, this should be adjusted corresponding with max_read_ahead_mb
				373	* and max_read_ahead_per_file_mb otherwise the readahead budget can be used
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	374	* up quickly which will affect read performance significantly. See LU-2816
				375	*/
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	376	#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	377
				378	static inline int stride_io_mode(struct ll_readahead_state *ras)
				379	{
				380	return ras->ras_consecutive_stride_requests > 1;
				381	}
Mike Rapoport	c9f6bb9	2015-10-13 16:03:42 +0300	[diff] [blame]	382
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	383	/* The function calculates how much pages will be read in
				384	* [off, off + length], in such stride IO area,
Masanari Iida	d0a0acc	2014-03-08 22:58:32 +0900	[diff] [blame]	385	* stride_offset = st_off, stride_length = st_len,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	386	* stride_pages = st_pgs
				387	*
				388	* \|------------------\|***\|------------------\|*\|------------\|***\|....
				389	* st_off
				390	* \|--- st_pgs ---\|
				391	* \|----- st_len -----\|
				392	*
				393	* How many pages it should read in such pattern
				394	* \|-------------------------------------------------------------\|
				395	* off
				396	* \|<------ length ------->\|
				397	*
				398	* = \|<----->\| + \|-------------------------------------\| + \|---\|
				399	* start_left st_pgs * i end_left
				400	*/
				401	static unsigned long
				402	stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
				403	unsigned long off, unsigned long length)
				404	{
				405	__u64 start = off > st_off ? off - st_off : 0;
				406	__u64 end = off + length > st_off ? off + length - st_off : 0;
				407	unsigned long start_left = 0;
				408	unsigned long end_left = 0;
				409	unsigned long pg_count;
				410
				411	if (st_len == 0 \|\| length == 0 \|\| end == 0)
				412	return length;
				413
				414	start_left = do_div(start, st_len);
				415	if (start_left < st_pgs)
				416	start_left = st_pgs - start_left;
				417	else
				418	start_left = 0;
				419
				420	end_left = do_div(end, st_len);
				421	if (end_left > st_pgs)
				422	end_left = st_pgs;
				423
Oleg Drokin	72a87fc	2016-02-24 22:00:40 -0500	[diff] [blame]	424	CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	425	start, end, start_left, end_left);
				426
				427	if (start == end)
				428	pg_count = end_left - (st_pgs - start_left);
				429	else
				430	pg_count = start_left + st_pgs * (end - start - 1) + end_left;
				431
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	432	CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu pgcount %lu\n",
				433	st_off, st_len, st_pgs, off, length, pg_count);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	434
				435	return pg_count;
				436	}
				437
				438	static int ria_page_count(struct ra_io_arg *ria)
				439	{
				440	__u64 length = ria->ria_end >= ria->ria_start ?
				441	ria->ria_end - ria->ria_start + 1 : 0;
				442
				443	return stride_pg_count(ria->ria_stoff, ria->ria_length,
				444	ria->ria_pages, ria->ria_start,
				445	length);
				446	}
				447
				448	/Check whether the index is in the defined ra-window /
				449	static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
				450	{
				451	/* If ria_length == ria_pages, it means non-stride I/O mode,
				452	* idx should always inside read-ahead window in this case
				453	* For stride I/O mode, just check whether the idx is inside
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	454	* the ria_pages.
				455	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	456	return ria->ria_length == 0 \|\| ria->ria_length == ria->ria_pages \|\|
				457	(idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
				458	ria->ria_length < ria->ria_pages);
				459	}
				460
				461	static int ll_read_ahead_pages(const struct lu_env *env,
				462	struct cl_io io, struct cl_page_list queue,
				463	struct ra_io_arg *ria,
				464	unsigned long *reserved_pages,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	465	unsigned long *ra_end)
				466	{
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	467	int rc, count = 0;
				468	bool stride_ria;
				469	pgoff_t page_idx;
				470	pgoff_t max_index = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	471
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	472	LASSERT(ria);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	473	RIA_DEBUG(ria);
				474
				475	stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
Oleg Drokin	e15ba45	2016-02-26 01:49:49 -0500	[diff] [blame]	476	for (page_idx = ria->ria_start;
				477	page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	478	if (ras_inside_ra_window(page_idx, ria)) {
				479	/* If the page is inside the read-ahead window*/
				480	rc = ll_read_ahead_page(env, io, queue,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	481	page_idx, &max_index);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	482	if (rc == 1) {
				483	(*reserved_pages)--;
Mike Rapoport	b2952d6	2015-09-03 11:49:13 +0300	[diff] [blame]	484	count++;
Oleg Drokin	da5ecb4	2016-04-01 15:18:01 -0400	[diff] [blame]	485	} else if (rc == -ENOLCK) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	486	break;
Oleg Drokin	da5ecb4	2016-04-01 15:18:01 -0400	[diff] [blame]	487	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	488	} else if (stride_ria) {
				489	/* If it is not in the read-ahead window, and it is
				490	* read-ahead mode, then check whether it should skip
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	491	* the stride gap
				492	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	493	pgoff_t offset;
				494	/* FIXME: This assertion only is valid when it is for
				495	* forward read-ahead, it will be fixed when backward
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	496	* read-ahead is implemented
				497	*/
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	498	LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
				499	page_idx,
				500	ria->ria_start, ria->ria_end, ria->ria_stoff,
				501	ria->ria_length, ria->ria_pages);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	502	offset = page_idx - ria->ria_stoff;
				503	offset = offset % (ria->ria_length);
				504	if (offset > ria->ria_pages) {
				505	page_idx += ria->ria_length - offset;
Oleg Drokin	72a87fc	2016-02-24 22:00:40 -0500	[diff] [blame]	506	CDEBUG(D_READA, "i %lu skip %lu\n", page_idx,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	507	ria->ria_length - offset);
				508	continue;
				509	}
				510	}
				511	}
				512	*ra_end = page_idx;
				513	return count;
				514	}
				515
				516	int ll_readahead(const struct lu_env env, struct cl_io io,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	517	struct cl_page_list queue, struct ll_readahead_state ras,
				518	bool hit)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	519	{
				520	struct vvp_io *vio = vvp_env_io(env);
John Hammond	9989a58	2016-03-30 19:48:56 -0400	[diff] [blame]	521	struct ll_thread_info *lti = ll_env_info(env);
John Hammond	9acc450	2016-03-30 19:48:57 -0400	[diff] [blame]	522	struct cl_attr *attr = vvp_env_thread_attr(env);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	523	unsigned long start = 0, end = 0, reserved;
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	524	unsigned long ra_end, len, mlen = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	525	struct inode *inode;
John Hammond	9989a58	2016-03-30 19:48:56 -0400	[diff] [blame]	526	struct ra_io_arg *ria = &lti->lti_ria;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	527	struct cl_object *clob;
				528	int ret = 0;
				529	__u64 kms;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	530
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	531	clob = io->ci_obj;
John L. Hammond	8c7b0e1	2016-03-30 19:48:47 -0400	[diff] [blame]	532	inode = vvp_object_inode(clob);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	533
Joe Perches	ec83e61	2013-10-13 20:22:03 -0700	[diff] [blame]	534	memset(ria, 0, sizeof(*ria));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	535
				536	cl_object_attr_lock(clob);
				537	ret = cl_object_attr_get(env, clob, attr);
				538	cl_object_attr_unlock(clob);
				539
				540	if (ret != 0)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	541	return ret;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	542	kms = attr->cat_kms;
				543	if (kms == 0) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	544	ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	545	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	546	}
				547
				548	spin_lock(&ras->ras_lock);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	549
				550	/* Enlarge the RA window to encompass the full read */
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	551	if (vio->vui_ra_valid &&
John L. Hammond	bc4320a	2016-03-30 19:48:50 -0400	[diff] [blame]	552	ras->ras_window_start + ras->ras_window_len <
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	553	vio->vui_ra_start + vio->vui_ra_count) {
				554	ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	555	ras->ras_window_start;
				556	}
John L. Hammond	bc4320a	2016-03-30 19:48:50 -0400	[diff] [blame]	557
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	558	/* Reserve a part of the read-ahead window that we'll be issuing */
				559	if (ras->ras_window_len) {
				560	start = ras->ras_next_readahead;
				561	end = ras->ras_window_start + ras->ras_window_len - 1;
				562	}
				563	if (end != 0) {
				564	unsigned long rpc_boundary;
				565	/*
				566	* Align RA window to an optimal boundary.
				567	*
				568	* XXX This would be better to align to cl_max_pages_per_rpc
				569	* instead of PTLRPC_MAX_BRW_PAGES, because the RPC size may
				570	* be aligned to the RAID stripe size in the future and that
				571	* is more important than the RPC size.
				572	*/
				573	/* Note: we only trim the RPC, instead of extending the RPC
				574	* to the boundary, so to avoid reading too much pages during
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	575	* random reading.
				576	*/
Haneen Mohammed	b6ee382	2015-03-13 20:48:53 +0300	[diff] [blame]	577	rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	578	if (rpc_boundary > 0)
				579	rpc_boundary--;
				580
				581	if (rpc_boundary > start)
				582	end = rpc_boundary;
				583
				584	/* Truncate RA window to end of file */
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	585	end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	586
				587	ras->ras_next_readahead = max(end, end + 1);
				588	RAS_CDEBUG(ras);
				589	}
				590	ria->ria_start = start;
				591	ria->ria_end = end;
				592	/* If stride I/O mode is detected, get stride window*/
				593	if (stride_io_mode(ras)) {
				594	ria->ria_stoff = ras->ras_stride_offset;
				595	ria->ria_length = ras->ras_stride_length;
				596	ria->ria_pages = ras->ras_stride_pages;
				597	}
				598	spin_unlock(&ras->ras_lock);
				599
				600	if (end == 0) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	601	ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	602	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	603	}
				604	len = ria_page_count(ria);
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	605	if (len == 0) {
				606	ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	607	return 0;
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	608	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	609
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	610	CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
				611	PFID(lu_object_fid(&clob->co_lu)),
				612	ria->ria_start, ria->ria_end,
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	613	vio->vui_ra_valid ? vio->vui_ra_start : 0,
				614	vio->vui_ra_valid ? vio->vui_ra_count : 0,
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	615	hit);
				616
				617	/* at least to extend the readahead window to cover current read */
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	618	if (!hit && vio->vui_ra_valid &&
				619	vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	620	/* to the end of current read window. */
John L. Hammond	e0a8144	2016-03-30 19:48:52 -0400	[diff] [blame]	621	mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	622	/* trim to RPC boundary */
				623	start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
				624	mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
				625	}
				626
				627	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	628	if (reserved < len)
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	629	ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	630
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	631	CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
				632	reserved, len, mlen,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	633	atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
				634	ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
				635
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	636	ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	637
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	638	if (reserved != 0)
				639	ll_ra_count_put(ll_i2sbi(inode), reserved);
				640
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	641	if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	642	ll_ra_stats_inc(inode, RA_STAT_EOF);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	643
				644	/* if we didn't get to the end of the region we reserved from
				645	* the ras we need to go back and update the ras so that the
				646	* next read-ahead tries from where we left off. we only do so
				647	* if the region we failed to issue read-ahead on is still ahead
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	648	* of the app and behind the next index to start read-ahead from
				649	*/
Oleg Drokin	72a87fc	2016-02-24 22:00:40 -0500	[diff] [blame]	650	CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	651	ra_end, end, ria->ria_end);
				652
				653	if (ra_end != end + 1) {
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	654	ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	655	spin_lock(&ras->ras_lock);
				656	if (ra_end < ras->ras_next_readahead &&
				657	index_in_window(ra_end, ras->ras_window_start, 0,
				658	ras->ras_window_len)) {
				659	ras->ras_next_readahead = ra_end;
				660	RAS_CDEBUG(ras);
				661	}
				662	spin_unlock(&ras->ras_lock);
				663	}
				664
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	665	return ret;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	666	}
				667
				668	static void ras_set_start(struct inode inode, struct ll_readahead_state ras,
				669	unsigned long index)
				670	{
				671	ras->ras_window_start = index & (~(RAS_INCREASE_STEP(inode) - 1));
				672	}
				673
				674	/* called with the ras_lock held or from places where it doesn't matter */
				675	static void ras_reset(struct inode inode, struct ll_readahead_state ras,
				676	unsigned long index)
				677	{
				678	ras->ras_last_readpage = index;
				679	ras->ras_consecutive_requests = 0;
				680	ras->ras_consecutive_pages = 0;
				681	ras->ras_window_len = 0;
				682	ras_set_start(inode, ras, index);
				683	ras->ras_next_readahead = max(ras->ras_window_start, index);
				684
				685	RAS_CDEBUG(ras);
				686	}
				687
				688	/* called with the ras_lock held or from places where it doesn't matter */
				689	static void ras_stride_reset(struct ll_readahead_state *ras)
				690	{
				691	ras->ras_consecutive_stride_requests = 0;
				692	ras->ras_stride_length = 0;
				693	ras->ras_stride_pages = 0;
				694	RAS_CDEBUG(ras);
				695	}
				696
				697	void ll_readahead_init(struct inode inode, struct ll_readahead_state ras)
				698	{
				699	spin_lock_init(&ras->ras_lock);
				700	ras_reset(inode, ras, 0);
				701	ras->ras_requests = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	702	}
				703
				704	/*
				705	* Check whether the read request is in the stride window.
				706	* If it is in the stride window, return 1, otherwise return 0.
				707	*/
				708	static int index_in_stride_window(struct ll_readahead_state *ras,
				709	unsigned long index)
				710	{
				711	unsigned long stride_gap;
				712
				713	if (ras->ras_stride_length == 0 \|\| ras->ras_stride_pages == 0 \|\|
				714	ras->ras_stride_pages == ras->ras_stride_length)
				715	return 0;
				716
				717	stride_gap = index - ras->ras_last_readpage - 1;
				718
				719	/* If it is contiguous read */
				720	if (stride_gap == 0)
				721	return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
				722
				723	/* Otherwise check the stride by itself */
				724	return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
				725	ras->ras_consecutive_pages == ras->ras_stride_pages;
				726	}
				727
				728	static void ras_update_stride_detector(struct ll_readahead_state *ras,
				729	unsigned long index)
				730	{
				731	unsigned long stride_gap = index - ras->ras_last_readpage - 1;
				732
				733	if (!stride_io_mode(ras) && (stride_gap != 0 \|\|
Oleg Drokin	e15ba45	2016-02-26 01:49:49 -0500	[diff] [blame]	734	ras->ras_consecutive_stride_requests == 0)) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	735	ras->ras_stride_pages = ras->ras_consecutive_pages;
Mike Rapoport	b2952d6	2015-09-03 11:49:13 +0300	[diff] [blame]	736	ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	737	}
				738	LASSERT(ras->ras_request_index == 0);
				739	LASSERT(ras->ras_consecutive_stride_requests == 0);
				740
				741	if (index <= ras->ras_last_readpage) {
				742	/Reset stride window for forward read/
				743	ras_stride_reset(ras);
				744	return;
				745	}
				746
				747	ras->ras_stride_pages = ras->ras_consecutive_pages;
Mike Rapoport	b2952d6	2015-09-03 11:49:13 +0300	[diff] [blame]	748	ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	749
				750	RAS_CDEBUG(ras);
				751	return;
				752	}
				753
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	754	/* Stride Read-ahead window will be increased inc_len according to
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	755	* stride I/O pattern
				756	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	757	static void ras_stride_increase_window(struct ll_readahead_state *ras,
				758	struct ll_ra_info *ra,
				759	unsigned long inc_len)
				760	{
				761	unsigned long left, step, window_len;
				762	unsigned long stride_len;
				763
				764	LASSERT(ras->ras_stride_length > 0);
				765	LASSERTF(ras->ras_window_start + ras->ras_window_len
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	766	>= ras->ras_stride_offset, "window_start %lu, window_len %lu stride_offset %lu\n",
				767	ras->ras_window_start,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	768	ras->ras_window_len, ras->ras_stride_offset);
				769
				770	stride_len = ras->ras_window_start + ras->ras_window_len -
				771	ras->ras_stride_offset;
				772
				773	left = stride_len % ras->ras_stride_length;
				774	window_len = ras->ras_window_len - left;
				775
				776	if (left < ras->ras_stride_pages)
				777	left += inc_len;
				778	else
				779	left = ras->ras_stride_pages + inc_len;
				780
				781	LASSERT(ras->ras_stride_pages != 0);
				782
				783	step = left / ras->ras_stride_pages;
				784	left %= ras->ras_stride_pages;
				785
				786	window_len += step * ras->ras_stride_length + left;
				787
Shivani Bhardwaj	464e594	2015-11-07 13:32:07 +0530	[diff] [blame]	788	if (stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
				789	ras->ras_stride_pages, ras->ras_stride_offset,
				790	window_len) <= ra->ra_max_pages_per_file)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	791	ras->ras_window_len = window_len;
				792
				793	RAS_CDEBUG(ras);
				794	}
				795
				796	static void ras_increase_window(struct inode *inode,
				797	struct ll_readahead_state *ras,
				798	struct ll_ra_info *ra)
				799	{
				800	/* The stretch of ra-window should be aligned with max rpc_size
				801	* but current clio architecture does not support retrieve such
				802	* information from lower layer. FIXME later
				803	*/
				804	if (stride_io_mode(ras))
				805	ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP(inode));
				806	else
				807	ras->ras_window_len = min(ras->ras_window_len +
				808	RAS_INCREASE_STEP(inode),
				809	ra->ra_max_pages_per_file);
				810	}
				811
				812	void ras_update(struct ll_sb_info sbi, struct inode inode,
				813	struct ll_readahead_state *ras, unsigned long index,
				814	unsigned hit)
				815	{
				816	struct ll_ra_info *ra = &sbi->ll_ra_info;
				817	int zero = 0, stride_detect = 0, ra_miss = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	818
				819	spin_lock(&ras->ras_lock);
				820
				821	ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
				822
				823	/* reset the read-ahead window in two cases. First when the app seeks
				824	* or reads to some other part of the file. Secondly if we get a
				825	* read-ahead miss that we think we've previously issued. This can
				826	* be a symptom of there being so many read-ahead pages that the VM is
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	827	* reclaiming it before we get to it.
				828	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	829	if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
				830	zero = 1;
				831	ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
				832	} else if (!hit && ras->ras_window_len &&
				833	index < ras->ras_next_readahead &&
				834	index_in_window(index, ras->ras_window_start, 0,
				835	ras->ras_window_len)) {
				836	ra_miss = 1;
				837	ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
				838	}
				839
				840	/* On the second access to a file smaller than the tunable
				841	* ra_max_read_ahead_whole_pages trigger RA on all pages in the
				842	* file up to ra_max_pages_per_file. This is simply a best effort
				843	* and only occurs once per open file. Normal RA behavior is reverted
				844	* to for subsequent IO. The mmap case does not increment
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	845	* ras_requests and thus can never trigger this behavior.
				846	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	847	if (ras->ras_requests == 2 && !ras->ras_request_index) {
				848	__u64 kms_pages;
				849
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	850	kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
				851	PAGE_SHIFT;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	852
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	853	CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	854	ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages_per_file);
				855
				856	if (kms_pages &&
				857	kms_pages <= ra->ra_max_read_ahead_whole_pages) {
				858	ras->ras_window_start = 0;
				859	ras->ras_last_readpage = 0;
				860	ras->ras_next_readahead = 0;
				861	ras->ras_window_len = min(ra->ra_max_pages_per_file,
				862	ra->ra_max_read_ahead_whole_pages);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	863	goto out_unlock;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	864	}
				865	}
				866	if (zero) {
				867	/* check whether it is in stride I/O mode*/
				868	if (!index_in_stride_window(ras, index)) {
				869	if (ras->ras_consecutive_stride_requests == 0 &&
				870	ras->ras_request_index == 0) {
				871	ras_update_stride_detector(ras, index);
				872	ras->ras_consecutive_stride_requests++;
				873	} else {
				874	ras_stride_reset(ras);
				875	}
				876	ras_reset(inode, ras, index);
				877	ras->ras_consecutive_pages++;
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	878	goto out_unlock;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	879	} else {
				880	ras->ras_consecutive_pages = 0;
				881	ras->ras_consecutive_requests = 0;
				882	if (++ras->ras_consecutive_stride_requests > 1)
				883	stride_detect = 1;
				884	RAS_CDEBUG(ras);
				885	}
				886	} else {
				887	if (ra_miss) {
				888	if (index_in_stride_window(ras, index) &&
				889	stride_io_mode(ras)) {
				890	/*If stride-RA hit cache miss, the stride dector
				891	*will not be reset to avoid the overhead of
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	892	*redetecting read-ahead mode
				893	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	894	if (index != ras->ras_last_readpage + 1)
				895	ras->ras_consecutive_pages = 0;
				896	ras_reset(inode, ras, index);
				897	RAS_CDEBUG(ras);
				898	} else {
				899	/* Reset both stride window and normal RA
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	900	* window
				901	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	902	ras_reset(inode, ras, index);
				903	ras->ras_consecutive_pages++;
				904	ras_stride_reset(ras);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	905	goto out_unlock;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	906	}
				907	} else if (stride_io_mode(ras)) {
				908	/* If this is contiguous read but in stride I/O mode
				909	* currently, check whether stride step still is valid,
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	910	* if invalid, it will reset the stride ra window
				911	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	912	if (!index_in_stride_window(ras, index)) {
				913	/* Shrink stride read-ahead window to be zero */
				914	ras_stride_reset(ras);
				915	ras->ras_window_len = 0;
				916	ras->ras_next_readahead = index;
				917	}
				918	}
				919	}
				920	ras->ras_consecutive_pages++;
				921	ras->ras_last_readpage = index;
				922	ras_set_start(inode, ras, index);
				923
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	924	if (stride_io_mode(ras)) {
Masanari Iida	d0a0acc	2014-03-08 22:58:32 +0900	[diff] [blame]	925	/* Since stride readahead is sensitive to the offset
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	926	* of read-ahead, so we use original offset here,
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	927	* instead of ras_window_start, which is RPC aligned
				928	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	929	ras->ras_next_readahead = max(index, ras->ras_next_readahead);
Jinshan Xiong	fd7444f	2016-03-30 19:48:33 -0400	[diff] [blame]	930	} else {
				931	if (ras->ras_next_readahead < ras->ras_window_start)
				932	ras->ras_next_readahead = ras->ras_window_start;
				933	if (!hit)
				934	ras->ras_next_readahead = index + 1;
				935	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	936	RAS_CDEBUG(ras);
				937
				938	/* Trigger RA in the mmap case where ras_consecutive_requests
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	939	* is not incremented and thus can't be used to trigger RA
				940	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	941	if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
				942	ras->ras_window_len = RAS_INCREASE_STEP(inode);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	943	goto out_unlock;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	944	}
				945
				946	/* Initially reset the stride window offset to next_readahead*/
				947	if (ras->ras_consecutive_stride_requests == 2 && stride_detect) {
				948	/**
				949	* Once stride IO mode is detected, next_readahead should be
				950	* reset to make sure next_readahead > stride offset
				951	*/
				952	ras->ras_next_readahead = max(index, ras->ras_next_readahead);
				953	ras->ras_stride_offset = index;
				954	ras->ras_window_len = RAS_INCREASE_STEP(inode);
				955	}
				956
				957	/* The initial ras_window_len is set to the request size. To avoid
				958	* uselessly reading and discarding pages for random IO the window is
				959	* only increased once per consecutive request received. */
				960	if ((ras->ras_consecutive_requests > 1 \|\| stride_detect) &&
				961	!ras->ras_request_index)
				962	ras_increase_window(inode, ras, ra);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	963	out_unlock:
				964	RAS_CDEBUG(ras);
				965	ras->ras_request_index++;
				966	spin_unlock(&ras->ras_lock);
				967	return;
				968	}
				969
				970	int ll_writepage(struct page vmpage, struct writeback_control wbc)
				971	{
				972	struct inode *inode = vmpage->mapping->host;
				973	struct ll_inode_info *lli = ll_i2info(inode);
				974	struct lu_env *env;
				975	struct cl_io *io;
				976	struct cl_page *page;
				977	struct cl_object *clob;
				978	struct cl_env_nest nest;
				979	bool redirtied = false;
				980	bool unlocked = false;
				981	int result;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	982
				983	LASSERT(PageLocked(vmpage));
				984	LASSERT(!PageWriteback(vmpage));
				985
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	986	LASSERT(ll_i2dtexp(inode));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	987
				988	env = cl_env_nested_get(&nest);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	989	if (IS_ERR(env)) {
				990	result = PTR_ERR(env);
				991	goto out;
				992	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	993
				994	clob = ll_i2info(inode)->lli_clob;
Oleg Drokin	6e16818	2016-02-16 00:46:46 -0500	[diff] [blame]	995	LASSERT(clob);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	996
John Hammond	9acc450	2016-03-30 19:48:57 -0400	[diff] [blame]	997	io = vvp_env_thread_io(env);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	998	io->ci_obj = clob;
				999	io->ci_ignore_layout = 1;
				1000	result = cl_io_init(env, io, CIT_MISC, clob);
				1001	if (result == 0) {
				1002	page = cl_page_find(env, clob, vmpage->index,
				1003	vmpage, CPT_CACHEABLE);
				1004	if (!IS_ERR(page)) {
				1005	lu_ref_add(&page->cp_reference, "writepage",
				1006	current);
				1007	cl_page_assume(env, io, page);
				1008	result = cl_page_flush(env, io, page);
				1009	if (result != 0) {
				1010	/*
				1011	* Re-dirty page on error so it retries write,
				1012	* but not in case when IO has actually
				1013	* occurred and completed with an error.
				1014	*/
				1015	if (!PageError(vmpage)) {
				1016	redirty_page_for_writepage(wbc, vmpage);
				1017	result = 0;
				1018	redirtied = true;
				1019	}
				1020	}
				1021	cl_page_disown(env, io, page);
				1022	unlocked = true;
				1023	lu_ref_del(&page->cp_reference,
				1024	"writepage", current);
				1025	cl_page_put(env, page);
				1026	} else {
				1027	result = PTR_ERR(page);
				1028	}
				1029	}
				1030	cl_io_fini(env, io);
				1031
				1032	if (redirtied && wbc->sync_mode == WB_SYNC_ALL) {
				1033	loff_t offset = cl_offset(clob, vmpage->index);
				1034
				1035	/* Flush page failed because the extent is being written out.
				1036	* Wait for the write of extent to be finished to avoid
				1037	* breaking kernel which assumes ->writepage should mark
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	1038	* PageWriteback or clean the page.
				1039	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1040	result = cl_sync_file_range(inode, offset,
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1041	offset + PAGE_SIZE - 1,
Niu Yawei	65fb55d	2013-06-03 21:40:38 +0800	[diff] [blame]	1042	CL_FSYNC_LOCAL, 1);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1043	if (result > 0) {
				1044	/* actually we may have written more than one page.
				1045	* decreasing this page because the caller will count
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	1046	* it.
				1047	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1048	wbc->nr_to_write -= result - 1;
				1049	result = 0;
				1050	}
				1051	}
				1052
				1053	cl_env_nested_put(&nest, env);
Julia Lawall	34e1f2b	2014-08-30 16:24:55 +0200	[diff] [blame]	1054	goto out;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1055
				1056	out:
				1057	if (result < 0) {
				1058	if (!lli->lli_async_rc)
				1059	lli->lli_async_rc = result;
				1060	SetPageError(vmpage);
				1061	if (!unlocked)
				1062	unlock_page(vmpage);
				1063	}
				1064	return result;
				1065	}
				1066
				1067	int ll_writepages(struct address_space mapping, struct writeback_control wbc)
				1068	{
				1069	struct inode *inode = mapping->host;
Niu Yawei	65fb55d	2013-06-03 21:40:38 +0800	[diff] [blame]	1070	struct ll_sb_info *sbi = ll_i2sbi(inode);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1071	loff_t start;
				1072	loff_t end;
				1073	enum cl_fsync_mode mode;
				1074	int range_whole = 0;
				1075	int result;
Niu Yawei	65fb55d	2013-06-03 21:40:38 +0800	[diff] [blame]	1076	int ignore_layout = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1077
				1078	if (wbc->range_cyclic) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1079	start = mapping->writeback_index << PAGE_SHIFT;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1080	end = OBD_OBJECT_EOF;
				1081	} else {
				1082	start = wbc->range_start;
				1083	end = wbc->range_end;
				1084	if (end == LLONG_MAX) {
				1085	end = OBD_OBJECT_EOF;
				1086	range_whole = start == 0;
				1087	}
				1088	}
				1089
				1090	mode = CL_FSYNC_NONE;
				1091	if (wbc->sync_mode == WB_SYNC_ALL)
				1092	mode = CL_FSYNC_LOCAL;
				1093
Niu Yawei	65fb55d	2013-06-03 21:40:38 +0800	[diff] [blame]	1094	if (sbi->ll_umounting)
				1095	/* if the mountpoint is being umounted, all pages have to be
				1096	* evicted to avoid hitting LBUG when truncate_inode_pages()
Oleg Drokin	c0894c6	2016-02-24 22:00:30 -0500	[diff] [blame]	1097	* is called later on.
				1098	*/
Niu Yawei	65fb55d	2013-06-03 21:40:38 +0800	[diff] [blame]	1099	ignore_layout = 1;
				1100	result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1101	if (result > 0) {
				1102	wbc->nr_to_write -= result;
				1103	result = 0;
				1104	}
				1105
				1106	if (wbc->range_cyclic \|\| (range_whole && wbc->nr_to_write > 0)) {
				1107	if (end == OBD_OBJECT_EOF)
Jinshan Xiong	c355855	2016-04-27 18:20:53 -0400	[diff] [blame]	1108	mapping->writeback_index = 0;
				1109	else
				1110	mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1111	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1112	return result;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1113	}
				1114
				1115	int ll_readpage(struct file file, struct page vmpage)
				1116	{
				1117	struct ll_cl_context *lcc;
				1118	int result;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1119
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	1120	lcc = ll_cl_init(file, vmpage);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1121	if (!IS_ERR(lcc)) {
				1122	struct lu_env *env = lcc->lcc_env;
				1123	struct cl_io *io = lcc->lcc_io;
				1124	struct cl_page *page = lcc->lcc_page;
				1125
				1126	LASSERT(page->cp_type == CPT_CACHEABLE);
				1127	if (likely(!PageUptodate(vmpage))) {
				1128	cl_page_assume(env, io, page);
				1129	result = cl_io_read_page(env, io, page);
				1130	} else {
				1131	/* Page from a non-object file. */
				1132	unlock_page(vmpage);
				1133	result = 0;
				1134	}
				1135	ll_cl_fini(lcc);
				1136	} else {
				1137	unlock_page(vmpage);
				1138	result = PTR_ERR(lcc);
				1139	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1140	return result;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1141	}
Jinshan Xiong	77605e4	2016-03-30 19:48:30 -0400	[diff] [blame]	1142
				1143	int ll_page_sync_io(const struct lu_env env, struct cl_io io,
				1144	struct cl_page *page, enum cl_req_type crt)
				1145	{
				1146	struct cl_2queue *queue;
				1147	int result;
				1148
				1149	LASSERT(io->ci_type == CIT_READ \|\| io->ci_type == CIT_WRITE);
				1150
				1151	queue = &io->ci_queue;
				1152	cl_2queue_init_page(queue, page);
				1153
				1154	result = cl_io_submit_sync(env, io, crt, queue, 0);
				1155	LASSERT(cl_page_is_owned(page, io));
				1156
				1157	if (crt == CRT_READ)
				1158	/*
				1159	* in CRT_WRITE case page is left locked even in case of
				1160	* error.
				1161	*/
				1162	cl_page_list_disown(env, io, &queue->c2_qin);
				1163	cl_2queue_fini(env, queue);
				1164
				1165	return result;
				1166	}