Blame - fs/nfs/blocklayout/blocklayout.c - kernel/msm-4.9

blob: 2e373826db802907313ecb21214e5d698b824337 [file] [log] [blame]

Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	1	/*
				2	* linux/fs/nfs/blocklayout/blocklayout.c
				3	*
				4	* Module for the NFSv4.1 pNFS block layout driver.
				5	*
				6	* Copyright (c) 2006 The Regents of the University of Michigan.
				7	* All rights reserved.
				8	*
				9	* Andy Adamson <andros@citi.umich.edu>
				10	* Fred Isaman <iisaman@umich.edu>
				11	*
				12	* permission is granted to use, copy, create derivative works and
				13	* redistribute this software and such derivative works for any purpose,
				14	* so long as the name of the university of michigan is not used in
				15	* any advertising or publicity pertaining to the use or distribution
				16	* of this software without specific, written prior authorization. if
				17	* the above copyright notice or any other identification of the
				18	* university of michigan is included in any copy of any portion of
				19	* this software, then the disclaimer below must also be included.
				20	*
				21	* this software is provided as is, without representation from the
				22	* university of michigan as to its fitness for any purpose, and without
				23	* warranty by the university of michigan of any kind, either express
				24	* or implied, including without limitation the implied warranties of
				25	* merchantability and fitness for a particular purpose. the regents
				26	* of the university of michigan shall not be liable for any damages,
				27	* including special, indirect, incidental, or consequential damages,
				28	* with respect to any claim arising out or in connection with the use
				29	* of the software, even if it has been or is hereafter advised of the
				30	* possibility of such damages.
				31	*/
Fred Isaman	9549ec0	2011-07-30 20:52:53 -0400	[diff] [blame]	32
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	33	#include <linux/module.h>
				34	#include <linux/init.h>
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	35	#include <linux/mount.h>
				36	#include <linux/namei.h>
Fred Isaman	9549ec0	2011-07-30 20:52:53 -0400	[diff] [blame]	37	#include <linux/bio.h> /* struct bio */
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	38
				39	#include "blocklayout.h"
				40
				41	#define NFSDBG_FACILITY NFSDBG_PNFS_LD
				42
				43	MODULE_LICENSE("GPL");
				44	MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
				45	MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
				46
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	47	struct dentry *bl_device_pipe;
				48	wait_queue_head_t bl_wq;
				49
Fred Isaman	9549ec0	2011-07-30 20:52:53 -0400	[diff] [blame]	50	static void print_page(struct page *page)
				51	{
				52	dprintk("PRINTPAGE page %p\n", page);
				53	dprintk(" PagePrivate %d\n", PagePrivate(page));
				54	dprintk(" PageUptodate %d\n", PageUptodate(page));
				55	dprintk(" PageError %d\n", PageError(page));
				56	dprintk(" PageDirty %d\n", PageDirty(page));
				57	dprintk(" PageReferenced %d\n", PageReferenced(page));
				58	dprintk(" PageLocked %d\n", PageLocked(page));
				59	dprintk(" PageWriteback %d\n", PageWriteback(page));
				60	dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page));
				61	dprintk("\n");
				62	}
				63
				64	/* Given the be associated with isect, determine if page data needs to be
				65	* initialized.
				66	*/
				67	static int is_hole(struct pnfs_block_extent *be, sector_t isect)
				68	{
				69	if (be->be_state == PNFS_BLOCK_NONE_DATA)
				70	return 1;
				71	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
				72	return 0;
				73	else
				74	return !bl_is_sector_init(be->be_inval, isect);
				75	}
				76
Fred Isaman	650e2d3	2011-07-30 20:52:54 -0400	[diff] [blame^]	77	/* Given the be associated with isect, determine if page data can be
				78	* written to disk.
				79	*/
				80	static int is_writable(struct pnfs_block_extent *be, sector_t isect)
				81	{
				82	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
				83	return 1;
				84	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
				85	return 0;
				86	else
				87	return bl_is_sector_init(be->be_inval, isect);
				88	}
				89
Fred Isaman	9549ec0	2011-07-30 20:52:53 -0400	[diff] [blame]	90	/* The data we are handed might be spread across several bios. We need
				91	* to track when the last one is finished.
				92	*/
				93	struct parallel_io {
				94	struct kref refcnt;
				95	struct rpc_call_ops call_ops;
				96	void (pnfs_callback) (void data);
				97	void *data;
				98	};
				99
				100	static inline struct parallel_io alloc_parallel(void data)
				101	{
				102	struct parallel_io *rv;
				103
				104	rv = kmalloc(sizeof(*rv), GFP_NOFS);
				105	if (rv) {
				106	rv->data = data;
				107	kref_init(&rv->refcnt);
				108	}
				109	return rv;
				110	}
				111
				112	static inline void get_parallel(struct parallel_io *p)
				113	{
				114	kref_get(&p->refcnt);
				115	}
				116
				117	static void destroy_parallel(struct kref *kref)
				118	{
				119	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
				120
				121	dprintk("%s enter\n", __func__);
				122	p->pnfs_callback(p->data);
				123	kfree(p);
				124	}
				125
				126	static inline void put_parallel(struct parallel_io *p)
				127	{
				128	kref_put(&p->refcnt, destroy_parallel);
				129	}
				130
				131	static struct bio *
				132	bl_submit_bio(int rw, struct bio *bio)
				133	{
				134	if (bio) {
				135	get_parallel(bio->bi_private);
				136	dprintk("%s submitting %s bio %u@%llu\n", __func__,
				137	rw == READ ? "read" : "write",
				138	bio->bi_size, (unsigned long long)bio->bi_sector);
				139	submit_bio(rw, bio);
				140	}
				141	return NULL;
				142	}
				143
				144	static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
				145	struct pnfs_block_extent *be,
				146	void (end_io)(struct bio , int err),
				147	struct parallel_io *par)
				148	{
				149	struct bio *bio;
				150
				151	bio = bio_alloc(GFP_NOIO, npg);
				152	if (!bio)
				153	return NULL;
				154
				155	bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
				156	bio->bi_bdev = be->be_mdev;
				157	bio->bi_end_io = end_io;
				158	bio->bi_private = par;
				159	return bio;
				160	}
				161
				162	static struct bio bl_add_page_to_bio(struct bio bio, int npg, int rw,
				163	sector_t isect, struct page *page,
				164	struct pnfs_block_extent *be,
				165	void (end_io)(struct bio , int err),
				166	struct parallel_io *par)
				167	{
				168	retry:
				169	if (!bio) {
				170	bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
				171	if (!bio)
				172	return ERR_PTR(-ENOMEM);
				173	}
				174	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
				175	bio = bl_submit_bio(rw, bio);
				176	goto retry;
				177	}
				178	return bio;
				179	}
				180
				181	static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
				182	{
				183	if (lseg->pls_range.iomode == IOMODE_RW) {
				184	dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
				185	set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
				186	} else {
				187	dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
				188	set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
				189	}
				190	}
				191
				192	/* This is basically copied from mpage_end_io_read */
				193	static void bl_end_io_read(struct bio *bio, int err)
				194	{
				195	struct parallel_io *par = bio->bi_private;
				196	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
				197	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
				198	struct nfs_read_data rdata = (struct nfs_read_data )par->data;
				199
				200	do {
				201	struct page *page = bvec->bv_page;
				202
				203	if (--bvec >= bio->bi_io_vec)
				204	prefetchw(&bvec->bv_page->flags);
				205	if (uptodate)
				206	SetPageUptodate(page);
				207	} while (bvec >= bio->bi_io_vec);
				208	if (!uptodate) {
				209	if (!rdata->pnfs_error)
				210	rdata->pnfs_error = -EIO;
				211	bl_set_lo_fail(rdata->lseg);
				212	}
				213	bio_put(bio);
				214	put_parallel(par);
				215	}
				216
				217	static void bl_read_cleanup(struct work_struct *work)
				218	{
				219	struct rpc_task *task;
				220	struct nfs_read_data *rdata;
				221	dprintk("%s enter\n", __func__);
				222	task = container_of(work, struct rpc_task, u.tk_work);
				223	rdata = container_of(task, struct nfs_read_data, task);
				224	pnfs_ld_read_done(rdata);
				225	}
				226
				227	static void
				228	bl_end_par_io_read(void *data)
				229	{
				230	struct nfs_read_data *rdata = data;
				231
				232	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
				233	schedule_work(&rdata->task.u.tk_work);
				234	}
				235
				236	/* We don't want normal .rpc_call_done callback used, so we replace it
				237	* with this stub.
				238	*/
				239	static void bl_rpc_do_nothing(struct rpc_task task, void calldata)
				240	{
				241	return;
				242	}
				243
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	244	static enum pnfs_try_status
				245	bl_read_pagelist(struct nfs_read_data *rdata)
				246	{
Fred Isaman	9549ec0	2011-07-30 20:52:53 -0400	[diff] [blame]	247	int i, hole;
				248	struct bio *bio = NULL;
				249	struct pnfs_block_extent be = NULL, cow_read = NULL;
				250	sector_t isect, extent_length = 0;
				251	struct parallel_io *par;
				252	loff_t f_offset = rdata->args.offset;
				253	size_t count = rdata->args.count;
				254	struct page **pages = rdata->args.pages;
				255	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
				256
				257	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
				258	rdata->npages, f_offset, count);
				259
				260	par = alloc_parallel(rdata);
				261	if (!par)
				262	goto use_mds;
				263	par->call_ops = *rdata->mds_ops;
				264	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
				265	par->pnfs_callback = bl_end_par_io_read;
				266	/* At this point, we can no longer jump to use_mds */
				267
				268	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
				269	/* Code assumes extents are page-aligned */
				270	for (i = pg_index; i < rdata->npages; i++) {
				271	if (!extent_length) {
				272	/* We've used up the previous extent */
				273	bl_put_extent(be);
				274	bl_put_extent(cow_read);
				275	bio = bl_submit_bio(READ, bio);
				276	/* Get the next one */
				277	be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
				278	isect, &cow_read);
				279	if (!be) {
				280	rdata->pnfs_error = -EIO;
				281	goto out;
				282	}
				283	extent_length = be->be_length -
				284	(isect - be->be_f_offset);
				285	if (cow_read) {
				286	sector_t cow_length = cow_read->be_length -
				287	(isect - cow_read->be_f_offset);
				288	extent_length = min(extent_length, cow_length);
				289	}
				290	}
				291	hole = is_hole(be, isect);
				292	if (hole && !cow_read) {
				293	bio = bl_submit_bio(READ, bio);
				294	/* Fill hole w/ zeroes w/o accessing device */
				295	dprintk("%s Zeroing page for hole\n", __func__);
				296	zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
				297	print_page(pages[i]);
				298	SetPageUptodate(pages[i]);
				299	} else {
				300	struct pnfs_block_extent *be_read;
				301
				302	be_read = (hole && cow_read) ? cow_read : be;
				303	bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
				304	isect, pages[i], be_read,
				305	bl_end_io_read, par);
				306	if (IS_ERR(bio)) {
				307	rdata->pnfs_error = PTR_ERR(bio);
				308	goto out;
				309	}
				310	}
				311	isect += PAGE_CACHE_SECTORS;
				312	extent_length -= PAGE_CACHE_SECTORS;
				313	}
				314	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
				315	rdata->res.eof = 1;
				316	rdata->res.count = rdata->inode->i_size - f_offset;
				317	} else {
				318	rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
				319	}
				320	out:
				321	bl_put_extent(be);
				322	bl_put_extent(cow_read);
				323	bl_submit_bio(READ, bio);
				324	put_parallel(par);
				325	return PNFS_ATTEMPTED;
				326
				327	use_mds:
				328	dprintk("Giving up and using normal NFS\n");
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	329	return PNFS_NOT_ATTEMPTED;
				330	}
				331
Fred Isaman	650e2d3	2011-07-30 20:52:54 -0400	[diff] [blame^]	332	/* This is basically copied from mpage_end_io_read */
				333	static void bl_end_io_write(struct bio *bio, int err)
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	334	{
Fred Isaman	650e2d3	2011-07-30 20:52:54 -0400	[diff] [blame^]	335	struct parallel_io *par = bio->bi_private;
				336	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
				337	struct nfs_write_data wdata = (struct nfs_write_data )par->data;
				338
				339	if (!uptodate) {
				340	if (!wdata->pnfs_error)
				341	wdata->pnfs_error = -EIO;
				342	bl_set_lo_fail(wdata->lseg);
				343	}
				344	bio_put(bio);
				345	put_parallel(par);
				346	}
				347
				348	/* Function scheduled for call during bl_end_par_io_write,
				349	* it marks sectors as written and extends the commitlist.
				350	*/
				351	static void bl_write_cleanup(struct work_struct *work)
				352	{
				353	struct rpc_task *task;
				354	struct nfs_write_data *wdata;
				355	dprintk("%s enter\n", __func__);
				356	task = container_of(work, struct rpc_task, u.tk_work);
				357	wdata = container_of(task, struct nfs_write_data, task);
				358	pnfs_ld_write_done(wdata);
				359	}
				360
				361	/* Called when last of bios associated with a bl_write_pagelist call finishes */
				362	static void
				363	bl_end_par_io_write(void *data)
				364	{
				365	struct nfs_write_data *wdata = data;
				366
				367	/* STUB - ignoring error handling */
				368	wdata->task.tk_status = 0;
				369	wdata->verf.committed = NFS_FILE_SYNC;
				370	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
				371	schedule_work(&wdata->task.u.tk_work);
				372	}
				373
				374	static enum pnfs_try_status
				375	bl_write_pagelist(struct nfs_write_data *wdata, int sync)
				376	{
				377	int i;
				378	struct bio *bio = NULL;
				379	struct pnfs_block_extent *be = NULL;
				380	sector_t isect, extent_length = 0;
				381	struct parallel_io *par;
				382	loff_t offset = wdata->args.offset;
				383	size_t count = wdata->args.count;
				384	struct page **pages = wdata->args.pages;
				385	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
				386
				387	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
				388	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
				389	* We want to write each, and if there is an error remove it from
				390	* list and call
				391	* nfs_retry_request(req) to have it redone using nfs.
				392	* QUEST? Do as block or per req? Think have to do per block
				393	* as part of end_bio
				394	*/
				395	par = alloc_parallel(wdata);
				396	if (!par)
				397	return PNFS_NOT_ATTEMPTED;
				398	par->call_ops = *wdata->mds_ops;
				399	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
				400	par->pnfs_callback = bl_end_par_io_write;
				401	/* At this point, have to be more careful with error handling */
				402
				403	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
				404	for (i = pg_index; i < wdata->npages ; i++) {
				405	if (!extent_length) {
				406	/* We've used up the previous extent */
				407	bl_put_extent(be);
				408	bio = bl_submit_bio(WRITE, bio);
				409	/* Get the next one */
				410	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
				411	isect, NULL);
				412	if (!be \|\| !is_writable(be, isect)) {
				413	wdata->pnfs_error = -ENOMEM;
				414	goto out;
				415	}
				416	extent_length = be->be_length -
				417	(isect - be->be_f_offset);
				418	}
				419	for (;;) {
				420	if (!bio) {
				421	bio = bio_alloc(GFP_NOIO, wdata->npages - i);
				422	if (!bio) {
				423	wdata->pnfs_error = -ENOMEM;
				424	goto out;
				425	}
				426	bio->bi_sector = isect - be->be_f_offset +
				427	be->be_v_offset;
				428	bio->bi_bdev = be->be_mdev;
				429	bio->bi_end_io = bl_end_io_write;
				430	bio->bi_private = par;
				431	}
				432	if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
				433	break;
				434	bio = bl_submit_bio(WRITE, bio);
				435	}
				436	isect += PAGE_CACHE_SECTORS;
				437	extent_length -= PAGE_CACHE_SECTORS;
				438	}
				439	wdata->res.count = (isect << SECTOR_SHIFT) - (offset);
				440	if (count < wdata->res.count)
				441	wdata->res.count = count;
				442	out:
				443	bl_put_extent(be);
				444	bl_submit_bio(WRITE, bio);
				445	put_parallel(par);
				446	return PNFS_ATTEMPTED;
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	447	}
				448
Fred Isaman	9e69296	2011-07-30 20:52:41 -0400	[diff] [blame]	449	/* FIXME - range ignored */
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	450	static void
Fred Isaman	9e69296	2011-07-30 20:52:41 -0400	[diff] [blame]	451	release_extents(struct pnfs_block_layout bl, struct pnfs_layout_range range)
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	452	{
Fred Isaman	9e69296	2011-07-30 20:52:41 -0400	[diff] [blame]	453	int i;
				454	struct pnfs_block_extent *be;
				455
				456	spin_lock(&bl->bl_ext_lock);
				457	for (i = 0; i < EXTENT_LISTS; i++) {
				458	while (!list_empty(&bl->bl_extents[i])) {
				459	be = list_first_entry(&bl->bl_extents[i],
				460	struct pnfs_block_extent,
				461	be_node);
				462	list_del(&be->be_node);
				463	bl_put_extent(be);
				464	}
				465	}
				466	spin_unlock(&bl->bl_ext_lock);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	467	}
				468
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	469	static void
				470	release_inval_marks(struct pnfs_inval_markings *marks)
				471	{
Fred Isaman	c1c2a4c	2011-07-30 20:52:49 -0400	[diff] [blame]	472	struct pnfs_inval_tracking pos, temp;
				473
				474	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
				475	list_del(&pos->it_link);
				476	kfree(pos);
				477	}
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	478	return;
				479	}
				480
				481	static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
				482	{
				483	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
				484
				485	dprintk("%s enter\n", __func__);
				486	release_extents(bl, NULL);
				487	release_inval_marks(&bl->bl_inval);
				488	kfree(bl);
				489	}
				490
				491	static struct pnfs_layout_hdr bl_alloc_layout_hdr(struct inode inode,
				492	gfp_t gfp_flags)
				493	{
				494	struct pnfs_block_layout *bl;
				495
				496	dprintk("%s enter\n", __func__);
				497	bl = kzalloc(sizeof(*bl), gfp_flags);
				498	if (!bl)
				499	return NULL;
				500	spin_lock_init(&bl->bl_ext_lock);
				501	INIT_LIST_HEAD(&bl->bl_extents[0]);
				502	INIT_LIST_HEAD(&bl->bl_extents[1]);
				503	INIT_LIST_HEAD(&bl->bl_commit);
				504	INIT_LIST_HEAD(&bl->bl_committing);
				505	bl->bl_count = 0;
				506	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT;
				507	BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
				508	return &bl->bl_layout;
				509	}
				510
Fred Isaman	a60d2eb	2011-07-30 20:52:44 -0400	[diff] [blame]	511	static void bl_free_lseg(struct pnfs_layout_segment *lseg)
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	512	{
Fred Isaman	a60d2eb	2011-07-30 20:52:44 -0400	[diff] [blame]	513	dprintk("%s enter\n", __func__);
				514	kfree(lseg);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	515	}
				516
Fred Isaman	a60d2eb	2011-07-30 20:52:44 -0400	[diff] [blame]	517	/* We pretty much ignore lseg, and store all data layout wide, so we
				518	* can correctly merge.
				519	*/
				520	static struct pnfs_layout_segment bl_alloc_lseg(struct pnfs_layout_hdr lo,
				521	struct nfs4_layoutget_res *lgr,
				522	gfp_t gfp_flags)
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	523	{
Fred Isaman	a60d2eb	2011-07-30 20:52:44 -0400	[diff] [blame]	524	struct pnfs_layout_segment *lseg;
				525	int status;
				526
				527	dprintk("%s enter\n", __func__);
				528	lseg = kzalloc(sizeof(*lseg), gfp_flags);
				529	if (!lseg)
				530	return ERR_PTR(-ENOMEM);
				531	status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags);
				532	if (status) {
				533	/* We don't want to call the full-blown bl_free_lseg,
				534	* since on error extents were not touched.
				535	*/
				536	kfree(lseg);
				537	return ERR_PTR(status);
				538	}
				539	return lseg;
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	540	}
				541
				542	static void
				543	bl_encode_layoutcommit(struct pnfs_layout_hdr lo, struct xdr_stream xdr,
				544	const struct nfs4_layoutcommit_args *arg)
				545	{
Fred Isaman	90ace12	2011-07-30 20:52:51 -0400	[diff] [blame]	546	dprintk("%s enter\n", __func__);
				547	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	548	}
				549
				550	static void
				551	bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
				552	{
Fred Isaman	b2be781	2011-07-30 20:52:52 -0400	[diff] [blame]	553	struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout;
				554
				555	dprintk("%s enter\n", __func__);
				556	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	557	}
				558
Fred Isaman	2f9fd18	2011-07-30 20:52:46 -0400	[diff] [blame]	559	static void free_blk_mountid(struct block_mount_id *mid)
				560	{
				561	if (mid) {
				562	struct pnfs_block_dev *dev;
				563	spin_lock(&mid->bm_lock);
				564	while (!list_empty(&mid->bm_devlist)) {
				565	dev = list_first_entry(&mid->bm_devlist,
				566	struct pnfs_block_dev,
				567	bm_node);
				568	list_del(&dev->bm_node);
				569	bl_free_block_dev(dev);
				570	}
				571	spin_unlock(&mid->bm_lock);
				572	kfree(mid);
				573	}
				574	}
				575
				576	/* This is mostly copied from the filelayout's get_device_info function.
				577	* It seems much of this should be at the generic pnfs level.
				578	*/
				579	static struct pnfs_block_dev *
				580	nfs4_blk_get_deviceinfo(struct nfs_server server, const struct nfs_fh fh,
				581	struct nfs4_deviceid *d_id)
				582	{
				583	struct pnfs_device *dev;
				584	struct pnfs_block_dev *rv = NULL;
				585	u32 max_resp_sz;
				586	int max_pages;
				587	struct page **pages = NULL;
				588	int i, rc;
				589
				590	/*
				591	* Use the session max response size as the basis for setting
				592	* GETDEVICEINFO's maxcount
				593	*/
				594	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
				595	max_pages = max_resp_sz >> PAGE_SHIFT;
				596	dprintk("%s max_resp_sz %u max_pages %d\n",
				597	__func__, max_resp_sz, max_pages);
				598
				599	dev = kmalloc(sizeof(*dev), GFP_NOFS);
				600	if (!dev) {
				601	dprintk("%s kmalloc failed\n", __func__);
				602	return NULL;
				603	}
				604
				605	pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
				606	if (pages == NULL) {
				607	kfree(dev);
				608	return NULL;
				609	}
				610	for (i = 0; i < max_pages; i++) {
				611	pages[i] = alloc_page(GFP_NOFS);
				612	if (!pages[i])
				613	goto out_free;
				614	}
				615
				616	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
				617	dev->layout_type = LAYOUT_BLOCK_VOLUME;
				618	dev->pages = pages;
				619	dev->pgbase = 0;
				620	dev->pglen = PAGE_SIZE * max_pages;
				621	dev->mincount = 0;
				622
				623	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
				624	rc = nfs4_proc_getdeviceinfo(server, dev);
				625	dprintk("%s getdevice info returns %d\n", __func__, rc);
				626	if (rc)
				627	goto out_free;
				628
				629	rv = nfs4_blk_decode_device(server, dev);
				630	out_free:
				631	for (i = 0; i < max_pages; i++)
				632	__free_page(pages[i]);
				633	kfree(pages);
				634	kfree(dev);
				635	return rv;
				636	}
				637
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	638	static int
				639	bl_set_layoutdriver(struct nfs_server server, const struct nfs_fh fh)
				640	{
Fred Isaman	2f9fd18	2011-07-30 20:52:46 -0400	[diff] [blame]	641	struct block_mount_id *b_mt_id = NULL;
				642	struct pnfs_devicelist *dlist = NULL;
				643	struct pnfs_block_dev *bdev;
				644	LIST_HEAD(block_disklist);
				645	int status = 0, i;
				646
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	647	dprintk("%s enter\n", __func__);
Fred Isaman	2f9fd18	2011-07-30 20:52:46 -0400	[diff] [blame]	648
				649	if (server->pnfs_blksize == 0) {
				650	dprintk("%s Server did not return blksize\n", __func__);
				651	return -EINVAL;
				652	}
				653	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS);
				654	if (!b_mt_id) {
				655	status = -ENOMEM;
				656	goto out_error;
				657	}
				658	/* Initialize nfs4 block layout mount id */
				659	spin_lock_init(&b_mt_id->bm_lock);
				660	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
				661
				662	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
				663	if (!dlist) {
				664	status = -ENOMEM;
				665	goto out_error;
				666	}
				667	dlist->eof = 0;
				668	while (!dlist->eof) {
				669	status = nfs4_proc_getdevicelist(server, fh, dlist);
				670	if (status)
				671	goto out_error;
				672	dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
				673	__func__, dlist->num_devs, dlist->eof);
				674	for (i = 0; i < dlist->num_devs; i++) {
				675	bdev = nfs4_blk_get_deviceinfo(server, fh,
				676	&dlist->dev_id[i]);
				677	if (!bdev) {
				678	status = -ENODEV;
				679	goto out_error;
				680	}
				681	spin_lock(&b_mt_id->bm_lock);
				682	list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
				683	spin_unlock(&b_mt_id->bm_lock);
				684	}
				685	}
				686	dprintk("%s SUCCESS\n", __func__);
				687	server->pnfs_ld_data = b_mt_id;
				688
				689	out_return:
				690	kfree(dlist);
				691	return status;
				692
				693	out_error:
				694	free_blk_mountid(b_mt_id);
				695	goto out_return;
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	696	}
				697
				698	static int
				699	bl_clear_layoutdriver(struct nfs_server *server)
				700	{
Fred Isaman	2f9fd18	2011-07-30 20:52:46 -0400	[diff] [blame]	701	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
				702
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	703	dprintk("%s enter\n", __func__);
Fred Isaman	2f9fd18	2011-07-30 20:52:46 -0400	[diff] [blame]	704	free_blk_mountid(b_mt_id);
				705	dprintk("%s RETURNS\n", __func__);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	706	return 0;
				707	}
				708
Benny Halevy	e9643fe	2011-07-30 20:52:40 -0400	[diff] [blame]	709	static const struct nfs_pageio_ops bl_pg_read_ops = {
				710	.pg_init = pnfs_generic_pg_init_read,
				711	.pg_test = pnfs_generic_pg_test,
				712	.pg_doio = pnfs_generic_pg_readpages,
				713	};
				714
				715	static const struct nfs_pageio_ops bl_pg_write_ops = {
				716	.pg_init = pnfs_generic_pg_init_write,
				717	.pg_test = pnfs_generic_pg_test,
				718	.pg_doio = pnfs_generic_pg_writepages,
				719	};
				720
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	721	static struct pnfs_layoutdriver_type blocklayout_type = {
				722	.id = LAYOUT_BLOCK_VOLUME,
				723	.name = "LAYOUT_BLOCK_VOLUME",
				724	.read_pagelist = bl_read_pagelist,
				725	.write_pagelist = bl_write_pagelist,
				726	.alloc_layout_hdr = bl_alloc_layout_hdr,
				727	.free_layout_hdr = bl_free_layout_hdr,
				728	.alloc_lseg = bl_alloc_lseg,
				729	.free_lseg = bl_free_lseg,
				730	.encode_layoutcommit = bl_encode_layoutcommit,
				731	.cleanup_layoutcommit = bl_cleanup_layoutcommit,
				732	.set_layoutdriver = bl_set_layoutdriver,
				733	.clear_layoutdriver = bl_clear_layoutdriver,
Benny Halevy	e9643fe	2011-07-30 20:52:40 -0400	[diff] [blame]	734	.pg_read_ops = &bl_pg_read_ops,
				735	.pg_write_ops = &bl_pg_write_ops,
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	736	};
				737
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	738	static const struct rpc_pipe_ops bl_upcall_ops = {
				739	.upcall = bl_pipe_upcall,
				740	.downcall = bl_pipe_downcall,
				741	.destroy_msg = bl_pipe_destroy_msg,
				742	};
				743
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	744	static int __init nfs4blocklayout_init(void)
				745	{
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	746	struct vfsmount *mnt;
				747	struct path path;
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	748	int ret;
				749
				750	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
				751
				752	ret = pnfs_register_layoutdriver(&blocklayout_type);
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	753	if (ret)
				754	goto out;
				755
				756	init_waitqueue_head(&bl_wq);
				757
				758	mnt = rpc_get_mount();
				759	if (IS_ERR(mnt)) {
				760	ret = PTR_ERR(mnt);
				761	goto out_remove;
				762	}
				763
				764	ret = vfs_path_lookup(mnt->mnt_root,
				765	mnt,
				766	NFS_PIPE_DIRNAME, 0, &path);
				767	if (ret)
				768	goto out_remove;
				769
				770	bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
				771	&bl_upcall_ops, 0);
				772	if (IS_ERR(bl_device_pipe)) {
				773	ret = PTR_ERR(bl_device_pipe);
				774	goto out_remove;
				775	}
				776	out:
				777	return ret;
				778
				779	out_remove:
				780	pnfs_unregister_layoutdriver(&blocklayout_type);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	781	return ret;
				782	}
				783
				784	static void __exit nfs4blocklayout_exit(void)
				785	{
				786	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
				787	__func__);
				788
				789	pnfs_unregister_layoutdriver(&blocklayout_type);
Jim Rees	fe0a9b7	2011-07-30 20:52:42 -0400	[diff] [blame]	790	rpc_unlink(bl_device_pipe);
Fred Isaman	155e752	2011-07-30 20:52:39 -0400	[diff] [blame]	791	}
				792
				793	MODULE_ALIAS("nfs-layouttype4-3");
				794
				795	module_init(nfs4blocklayout_init);
				796	module_exit(nfs4blocklayout_exit);