Blame - fs/exofs/inode.c - kernel/msm

blob: 01fa798e8fdb1056a8cd598ca16d3772f3fc65c5 [file] [log] [blame]

Boaz Harrosh	e806271	2008-10-27 18:37:02 +0200	[diff] [blame]	1	/*
				2	* Copyright (C) 2005, 2006
Boaz Harrosh	27d2e14	2009-06-14 17:23:09 +0300	[diff] [blame]	3	* Avishay Traeger (avishay@gmail.com)
Boaz Harrosh	e806271	2008-10-27 18:37:02 +0200	[diff] [blame]	4	* Copyright (C) 2008, 2009
				5	* Boaz Harrosh <bharrosh@panasas.com>
				6	*
				7	* Copyrights for code taken from ext2:
				8	* Copyright (C) 1992, 1993, 1994, 1995
				9	* Remy Card (card@masi.ibp.fr)
				10	* Laboratoire MASI - Institut Blaise Pascal
				11	* Universite Pierre et Marie Curie (Paris VI)
				12	* from
				13	* linux/fs/minix/inode.c
				14	* Copyright (C) 1991, 1992 Linus Torvalds
				15	*
				16	* This file is part of exofs.
				17	*
				18	* exofs is free software; you can redistribute it and/or modify
				19	* it under the terms of the GNU General Public License as published by
				20	* the Free Software Foundation. Since it is based on ext2, and the only
				21	* valid version of GPL for the Linux kernel is version 2, the only valid
				22	* version of GPL for exofs is version 2.
				23	*
				24	* exofs is distributed in the hope that it will be useful,
				25	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				26	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				27	* GNU General Public License for more details.
				28	*
				29	* You should have received a copy of the GNU General Public License
				30	* along with exofs; if not, write to the Free Software
				31	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				32	*/
				33
				34	#include <linux/writeback.h>
				35	#include <linux/buffer_head.h>
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	36	#include <scsi/scsi_device.h>
Boaz Harrosh	e806271	2008-10-27 18:37:02 +0200	[diff] [blame]	37
				38	#include "exofs.h"
				39
				40	#ifdef CONFIG_EXOFS_DEBUG
				41	# define EXOFS_DEBUG_OBJ_ISIZE 1
				42	#endif
				43
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	44	#define EXOFS_DBGMSG2(M...) do {} while (0)
				45
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	46	struct page_collect {
				47	struct exofs_sb_info *sbi;
				48	struct request_queue *req_q;
				49	struct inode *inode;
				50	unsigned expected_pages;
				51
				52	struct bio *bio;
				53	unsigned nr_pages;
				54	unsigned long length;
				55	loff_t pg_first; /* keep 64bit also in 32-arches */
				56	};
				57
				58	static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
				59	struct inode *inode)
				60	{
				61	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	62
				63	pcol->sbi = sbi;
Boaz Harrosh	fc2fac5	2009-05-24 20:04:43 +0300	[diff] [blame]	64	pcol->req_q = osd_request_queue(sbi->s_dev);
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	65	pcol->inode = inode;
				66	pcol->expected_pages = expected_pages;
				67
				68	pcol->bio = NULL;
				69	pcol->nr_pages = 0;
				70	pcol->length = 0;
				71	pcol->pg_first = -1;
				72
				73	EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
				74	expected_pages);
				75	}
				76
				77	static void _pcol_reset(struct page_collect *pcol)
				78	{
				79	pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
				80
				81	pcol->bio = NULL;
				82	pcol->nr_pages = 0;
				83	pcol->length = 0;
				84	pcol->pg_first = -1;
				85	EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n",
				86	pcol->inode->i_ino, pcol->expected_pages);
				87
				88	/* this is probably the end of the loop but in writes
				89	* it might not end here. don't be left with nothing
				90	*/
				91	if (!pcol->expected_pages)
				92	pcol->expected_pages = 128;
				93	}
				94
				95	static int pcol_try_alloc(struct page_collect *pcol)
				96	{
				97	int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES);
				98
				99	for (; pages; pages >>= 1) {
				100	pcol->bio = bio_alloc(GFP_KERNEL, pages);
				101	if (likely(pcol->bio))
				102	return 0;
				103	}
				104
				105	EXOFS_ERR("Failed to kcalloc expected_pages=%u\n",
				106	pcol->expected_pages);
				107	return -ENOMEM;
				108	}
				109
				110	static void pcol_free(struct page_collect *pcol)
				111	{
				112	bio_put(pcol->bio);
				113	pcol->bio = NULL;
				114	}
				115
				116	static int pcol_add_page(struct page_collect pcol, struct page page,
				117	unsigned len)
				118	{
				119	int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
				120	if (unlikely(len != added_len))
				121	return -ENOMEM;
				122
				123	++pcol->nr_pages;
				124	pcol->length += len;
				125	return 0;
				126	}
				127
				128	static int update_read_page(struct page *page, int ret)
				129	{
				130	if (ret == 0) {
				131	/* Everything is OK */
				132	SetPageUptodate(page);
				133	if (PageError(page))
				134	ClearPageError(page);
				135	} else if (ret == -EFAULT) {
				136	/* In this case we were trying to read something that wasn't on
				137	* disk yet - return a page full of zeroes. This should be OK,
				138	* because the object should be empty (if there was a write
				139	* before this read, the read would be waiting with the page
				140	* locked */
				141	clear_highpage(page);
				142
				143	SetPageUptodate(page);
				144	if (PageError(page))
				145	ClearPageError(page);
				146	ret = 0; /* recovered error */
				147	EXOFS_DBGMSG("recovered read error\n");
				148	} else /* Error */
				149	SetPageError(page);
				150
				151	return ret;
				152	}
				153
				154	static void update_write_page(struct page *page, int ret)
				155	{
				156	if (ret) {
				157	mapping_set_error(page->mapping, ret);
				158	SetPageError(page);
				159	}
				160	end_page_writeback(page);
				161	}
				162
				163	/* Called at the end of reads, to optionally unlock pages and update their
				164	* status.
				165	*/
				166	static int __readpages_done(struct osd_request or, struct page_collect pcol,
				167	bool do_unlock)
				168	{
				169	struct bio_vec *bvec;
				170	int i;
				171	u64 resid;
				172	u64 good_bytes;
				173	u64 length = 0;
				174	int ret = exofs_check_ok_resid(or, &resid, NULL);
				175
				176	osd_end_request(or);
				177
				178	if (likely(!ret))
				179	good_bytes = pcol->length;
				180	else if (!resid)
				181	good_bytes = 0;
				182	else
				183	good_bytes = pcol->length - resid;
				184
				185	EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
				186	" length=0x%lx nr_pages=%u\n",
				187	pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
				188	pcol->nr_pages);
				189
				190	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
				191	struct page *page = bvec->bv_page;
				192	struct inode *inode = page->mapping->host;
				193	int page_stat;
				194
				195	if (inode != pcol->inode)
				196	continue; /* osd might add more pages at end */
				197
				198	if (likely(length < good_bytes))
				199	page_stat = 0;
				200	else
				201	page_stat = ret;
				202
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	203	EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n",
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	204	inode->i_ino, page->index,
				205	page_stat ? "bad_bytes" : "good_bytes");
				206
				207	ret = update_read_page(page, page_stat);
				208	if (do_unlock)
				209	unlock_page(page);
				210	length += bvec->bv_len;
				211	}
				212
				213	pcol_free(pcol);
				214	EXOFS_DBGMSG("readpages_done END\n");
				215	return ret;
				216	}
				217
				218	/* callback of async reads */
				219	static void readpages_done(struct osd_request or, void p)
				220	{
				221	struct page_collect *pcol = p;
				222
				223	__readpages_done(or, pcol, true);
				224	atomic_dec(&pcol->sbi->s_curr_pending);
				225	kfree(p);
				226	}
				227
				228	static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
				229	{
				230	struct bio_vec *bvec;
				231	int i;
				232
				233	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
				234	struct page *page = bvec->bv_page;
				235
				236	if (rw == READ)
				237	update_read_page(page, ret);
				238	else
				239	update_write_page(page, ret);
				240
				241	unlock_page(page);
				242	}
				243	pcol_free(pcol);
				244	}
				245
				246	static int read_exec(struct page_collect *pcol, bool is_sync)
				247	{
				248	struct exofs_i_info *oi = exofs_i(pcol->inode);
				249	struct osd_obj_id obj = {pcol->sbi->s_pid,
				250	pcol->inode->i_ino + EXOFS_OBJ_OFF};
				251	struct osd_request *or = NULL;
				252	struct page_collect *pcol_copy = NULL;
				253	loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
				254	int ret;
				255
				256	if (!pcol->bio)
				257	return 0;
				258
				259	/* see comment in _readpage() about sync reads */
				260	WARN_ON(is_sync && (pcol->nr_pages != 1));
				261
				262	or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
				263	if (unlikely(!or)) {
				264	ret = -ENOMEM;
				265	goto err;
				266	}
				267
Boaz Harrosh	62f469b	2009-05-24 20:04:26 +0300	[diff] [blame]	268	osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	269
				270	if (is_sync) {
				271	exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
				272	return __readpages_done(or, pcol, false);
				273	}
				274
				275	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
				276	if (!pcol_copy) {
				277	ret = -ENOMEM;
				278	goto err;
				279	}
				280
				281	pcol_copy = pcol;
				282	ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred);
				283	if (unlikely(ret))
				284	goto err;
				285
				286	atomic_inc(&pcol->sbi->s_curr_pending);
				287
				288	EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
				289	obj.id, _LLU(i_start), pcol->length);
				290
				291	/* pages ownership was passed to pcol_copy */
				292	_pcol_reset(pcol);
				293	return 0;
				294
				295	err:
				296	if (!is_sync)
				297	_unlock_pcol_pages(pcol, ret, READ);
Boaz Harrosh	b76a3f9	2009-06-08 19:28:41 +0300	[diff] [blame]	298	else /* Pages unlocked by caller in sync mode only free bio */
				299	pcol_free(pcol);
				300
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	301	kfree(pcol_copy);
				302	if (or)
				303	osd_end_request(or);
				304	return ret;
				305	}
				306
				307	/* readpage_strip is called either directly from readpage() or by the VFS from
				308	* within read_cache_pages(), to add one more page to be read. It will try to
				309	* collect as many contiguous pages as posible. If a discontinuity is
				310	* encountered, or it runs out of resources, it will submit the previous segment
				311	* and will start a new collection. Eventually caller must submit the last
				312	* segment if present.
				313	*/
				314	static int readpage_strip(void data, struct page page)
				315	{
				316	struct page_collect *pcol = data;
				317	struct inode *inode = pcol->inode;
				318	struct exofs_i_info *oi = exofs_i(inode);
				319	loff_t i_size = i_size_read(inode);
				320	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
				321	size_t len;
				322	int ret;
				323
				324	/* FIXME: Just for debugging, will be removed */
				325	if (PageUptodate(page))
				326	EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
				327	page->index);
				328
				329	if (page->index < end_index)
				330	len = PAGE_CACHE_SIZE;
				331	else if (page->index == end_index)
				332	len = i_size & ~PAGE_CACHE_MASK;
				333	else
				334	len = 0;
				335
				336	if (!len \|\| !obj_created(oi)) {
				337	/* this will be out of bounds, or doesn't exist yet.
				338	* Current page is cleared and the request is split
				339	*/
				340	clear_highpage(page);
				341
				342	SetPageUptodate(page);
				343	if (PageError(page))
				344	ClearPageError(page);
				345
				346	unlock_page(page);
				347	EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
				348	" splitting\n", inode->i_ino, page->index);
				349
				350	return read_exec(pcol, false);
				351	}
				352
				353	try_again:
				354
				355	if (unlikely(pcol->pg_first == -1)) {
				356	pcol->pg_first = page->index;
				357	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
				358	page->index)) {
				359	/* Discontinuity detected, split the request */
				360	ret = read_exec(pcol, false);
				361	if (unlikely(ret))
				362	goto fail;
				363	goto try_again;
				364	}
				365
				366	if (!pcol->bio) {
				367	ret = pcol_try_alloc(pcol);
				368	if (unlikely(ret))
				369	goto fail;
				370	}
				371
				372	if (len != PAGE_CACHE_SIZE)
				373	zero_user(page, len, PAGE_CACHE_SIZE - len);
				374
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	375	EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	376	inode->i_ino, page->index, len);
				377
				378	ret = pcol_add_page(pcol, page, len);
				379	if (ret) {
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	380	EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	381	"this_len=0x%zx nr_pages=%u length=0x%lx\n",
				382	page, len, pcol->nr_pages, pcol->length);
				383
				384	/* split the request, and start again with current page */
				385	ret = read_exec(pcol, false);
				386	if (unlikely(ret))
				387	goto fail;
				388
				389	goto try_again;
				390	}
				391
				392	return 0;
				393
				394	fail:
				395	/* SetPageError(page); ??? */
				396	unlock_page(page);
				397	return ret;
				398	}
				399
				400	static int exofs_readpages(struct file file, struct address_space mapping,
				401	struct list_head *pages, unsigned nr_pages)
				402	{
				403	struct page_collect pcol;
				404	int ret;
				405
				406	_pcol_init(&pcol, nr_pages, mapping->host);
				407
				408	ret = read_cache_pages(mapping, pages, readpage_strip, &pcol);
				409	if (ret) {
				410	EXOFS_ERR("read_cache_pages => %d\n", ret);
				411	return ret;
				412	}
				413
				414	return read_exec(&pcol, false);
				415	}
				416
				417	static int _readpage(struct page *page, bool is_sync)
				418	{
				419	struct page_collect pcol;
				420	int ret;
				421
				422	_pcol_init(&pcol, 1, page->mapping->host);
				423
				424	/* readpage_strip might call read_exec(,async) inside at several places
				425	* but this is safe for is_async=0 since read_exec will not do anything
				426	* when we have a single page.
				427	*/
				428	ret = readpage_strip(&pcol, page);
				429	if (ret) {
				430	EXOFS_ERR("_readpage => %d\n", ret);
				431	return ret;
				432	}
				433
				434	return read_exec(&pcol, is_sync);
				435	}
				436
				437	/*
				438	* We don't need the file
				439	*/
				440	static int exofs_readpage(struct file file, struct page page)
				441	{
				442	return _readpage(page, false);
				443	}
				444
				445	/* Callback for osd_write. All writes are asynchronouse */
				446	static void writepages_done(struct osd_request or, void p)
				447	{
				448	struct page_collect *pcol = p;
				449	struct bio_vec *bvec;
				450	int i;
				451	u64 resid;
				452	u64 good_bytes;
				453	u64 length = 0;
				454
				455	int ret = exofs_check_ok_resid(or, NULL, &resid);
				456
				457	osd_end_request(or);
				458	atomic_dec(&pcol->sbi->s_curr_pending);
				459
				460	if (likely(!ret))
				461	good_bytes = pcol->length;
				462	else if (!resid)
				463	good_bytes = 0;
				464	else
				465	good_bytes = pcol->length - resid;
				466
				467	EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
				468	" length=0x%lx nr_pages=%u\n",
				469	pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
				470	pcol->nr_pages);
				471
				472	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
				473	struct page *page = bvec->bv_page;
				474	struct inode *inode = page->mapping->host;
				475	int page_stat;
				476
				477	if (inode != pcol->inode)
				478	continue; /* osd might add more pages to a bio */
				479
				480	if (likely(length < good_bytes))
				481	page_stat = 0;
				482	else
				483	page_stat = ret;
				484
				485	update_write_page(page, page_stat);
				486	unlock_page(page);
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	487	EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	488	inode->i_ino, page->index, page_stat);
				489
				490	length += bvec->bv_len;
				491	}
				492
				493	pcol_free(pcol);
				494	kfree(pcol);
				495	EXOFS_DBGMSG("writepages_done END\n");
				496	}
				497
				498	static int write_exec(struct page_collect *pcol)
				499	{
				500	struct exofs_i_info *oi = exofs_i(pcol->inode);
				501	struct osd_obj_id obj = {pcol->sbi->s_pid,
				502	pcol->inode->i_ino + EXOFS_OBJ_OFF};
				503	struct osd_request *or = NULL;
				504	struct page_collect *pcol_copy = NULL;
				505	loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
				506	int ret;
				507
				508	if (!pcol->bio)
				509	return 0;
				510
				511	or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
				512	if (unlikely(!or)) {
				513	EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
				514	ret = -ENOMEM;
				515	goto err;
				516	}
				517
				518	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
				519	if (!pcol_copy) {
				520	EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
				521	ret = -ENOMEM;
				522	goto err;
				523	}
				524
				525	pcol_copy = pcol;
				526
Boaz Harrosh	62f469b	2009-05-24 20:04:26 +0300	[diff] [blame]	527	pcol_copy->bio->bi_rw \|= (1 << BIO_RW); /* FIXME: bio_set_dir() */
				528	osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	529	ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
				530	if (unlikely(ret)) {
				531	EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
				532	goto err;
				533	}
				534
				535	atomic_inc(&pcol->sbi->s_curr_pending);
				536	EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
				537	pcol->inode->i_ino, pcol->pg_first, _LLU(i_start),
				538	pcol->length);
				539	/* pages ownership was passed to pcol_copy */
				540	_pcol_reset(pcol);
				541	return 0;
				542
				543	err:
				544	_unlock_pcol_pages(pcol, ret, WRITE);
				545	kfree(pcol_copy);
				546	if (or)
				547	osd_end_request(or);
				548	return ret;
				549	}
				550
				551	/* writepage_strip is called either directly from writepage() or by the VFS from
				552	* within write_cache_pages(), to add one more page to be written to storage.
				553	* It will try to collect as many contiguous pages as possible. If a
				554	* discontinuity is encountered or it runs out of resources it will submit the
				555	* previous segment and will start a new collection.
				556	* Eventually caller must submit the last segment if present.
				557	*/
				558	static int writepage_strip(struct page *page,
				559	struct writeback_control wbc_unused, void data)
				560	{
				561	struct page_collect *pcol = data;
				562	struct inode *inode = pcol->inode;
				563	struct exofs_i_info *oi = exofs_i(inode);
				564	loff_t i_size = i_size_read(inode);
				565	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
				566	size_t len;
				567	int ret;
				568
				569	BUG_ON(!PageLocked(page));
				570
				571	ret = wait_obj_created(oi);
				572	if (unlikely(ret))
				573	goto fail;
				574
				575	if (page->index < end_index)
				576	/* in this case, the page is within the limits of the file */
				577	len = PAGE_CACHE_SIZE;
				578	else {
				579	len = i_size & ~PAGE_CACHE_MASK;
				580
				581	if (page->index > end_index \|\| !len) {
				582	/* in this case, the page is outside the limits
				583	* (truncate in progress)
				584	*/
				585	ret = write_exec(pcol);
				586	if (unlikely(ret))
				587	goto fail;
				588	if (PageError(page))
				589	ClearPageError(page);
				590	unlock_page(page);
				591	return 0;
				592	}
				593	}
				594
				595	try_again:
				596
				597	if (unlikely(pcol->pg_first == -1)) {
				598	pcol->pg_first = page->index;
				599	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
				600	page->index)) {
				601	/* Discontinuity detected, split the request */
				602	ret = write_exec(pcol);
				603	if (unlikely(ret))
				604	goto fail;
				605	goto try_again;
				606	}
				607
				608	if (!pcol->bio) {
				609	ret = pcol_try_alloc(pcol);
				610	if (unlikely(ret))
				611	goto fail;
				612	}
				613
Boaz Harrosh	fe33cc1	2009-11-01 18:28:14 +0200	[diff] [blame^]	614	EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
Boaz Harrosh	beaec07	2008-10-27 19:31:34 +0200	[diff] [blame]	615	inode->i_ino, page->index, len);
				616
				617	ret = pcol_add_page(pcol, page, len);
				618	if (unlikely(ret)) {
				619	EXOFS_DBGMSG("Failed pcol_add_page "
				620	"nr_pages=%u total_length=0x%lx\n",
				621	pcol->nr_pages, pcol->length);
				622
				623	/* split the request, next loop will start again */
				624	ret = write_exec(pcol);
				625	if (unlikely(ret)) {
				626	EXOFS_DBGMSG("write_exec faild => %d", ret);
				627	goto fail;
				628	}
				629
				630	goto try_again;
				631	}
				632
				633	BUG_ON(PageWriteback(page));
				634	set_page_writeback(page);
				635
				636	return 0;
				637
				638	fail:
				639	set_bit(AS_EIO, &page->mapping->flags);
				640	unlock_page(page);
				641	return ret;
				642	}
				643
				644	static int exofs_writepages(struct address_space *mapping,
				645	struct writeback_control *wbc)
				646	{
				647	struct page_collect pcol;
				648	long start, end, expected_pages;
				649	int ret;
				650
				651	start = wbc->range_start >> PAGE_CACHE_SHIFT;
				652	end = (wbc->range_end == LLONG_MAX) ?
				653	start + mapping->nrpages :
				654	wbc->range_end >> PAGE_CACHE_SHIFT;
				655
				656	if (start \|\| end)
				657	expected_pages = min(end - start + 1, 32L);
				658	else
				659	expected_pages = mapping->nrpages;
				660
				661	EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx"
				662	" m->nrpages=%lu start=0x%lx end=0x%lx\n",
				663	mapping->host->i_ino, wbc->range_start, wbc->range_end,
				664	mapping->nrpages, start, end);
				665
				666	_pcol_init(&pcol, expected_pages, mapping->host);
				667
				668	ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
				669	if (ret) {
				670	EXOFS_ERR("write_cache_pages => %d\n", ret);
				671	return ret;
				672	}
				673
				674	return write_exec(&pcol);
				675	}
				676
				677	static int exofs_writepage(struct page page, struct writeback_control wbc)
				678	{
				679	struct page_collect pcol;
				680	int ret;
				681
				682	_pcol_init(&pcol, 1, page->mapping->host);
				683
				684	ret = writepage_strip(page, NULL, &pcol);
				685	if (ret) {
				686	EXOFS_ERR("exofs_writepage => %d\n", ret);
				687	return ret;
				688	}
				689
				690	return write_exec(&pcol);
				691	}
				692
				693	int exofs_write_begin(struct file file, struct address_space mapping,
				694	loff_t pos, unsigned len, unsigned flags,
				695	struct page pagep, void fsdata)
				696	{
				697	int ret = 0;
				698	struct page *page;
				699
				700	page = *pagep;
				701	if (page == NULL) {
				702	ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
				703	fsdata);
				704	if (ret) {
				705	EXOFS_DBGMSG("simple_write_begin faild\n");
				706	return ret;
				707	}
				708
				709	page = *pagep;
				710	}
				711
				712	/* read modify write */
				713	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
				714	ret = _readpage(page, true);
				715	if (ret) {
				716	/SetPageError was done by _readpage. Is it ok?/
				717	unlock_page(page);
				718	EXOFS_DBGMSG("__readpage_filler faild\n");
				719	}
				720	}
				721
				722	return ret;
				723	}
				724
				725	static int exofs_write_begin_export(struct file *file,
				726	struct address_space *mapping,
				727	loff_t pos, unsigned len, unsigned flags,
				728	struct page pagep, void fsdata)
				729	{
				730	*pagep = NULL;
				731
				732	return exofs_write_begin(file, mapping, pos, len, flags, pagep,
				733	fsdata);
				734	}
				735
				736	const struct address_space_operations exofs_aops = {
				737	.readpage = exofs_readpage,
				738	.readpages = exofs_readpages,
				739	.writepage = exofs_writepage,
				740	.writepages = exofs_writepages,
				741	.write_begin = exofs_write_begin_export,
				742	.write_end = simple_write_end,
				743	};
				744
Boaz Harrosh	e806271	2008-10-27 18:37:02 +0200	[diff] [blame]	745	/******************************************************************************
				746	* INODE OPERATIONS
				747	*****************************************************************************/
				748
				749	/*
				750	* Test whether an inode is a fast symlink.
				751	*/
				752	static inline int exofs_inode_is_fast_symlink(struct inode *inode)
				753	{
				754	struct exofs_i_info *oi = exofs_i(inode);
				755
				756	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
				757	}
				758
				759	/*
				760	* get_block_t - Fill in a buffer_head
				761	* An OSD takes care of block allocation so we just fake an allocation by
				762	* putting in the inode's sector_t in the buffer_head.
				763	* TODO: What about the case of create==0 and @iblock does not exist in the
				764	* object?
				765	*/
				766	static int exofs_get_block(struct inode *inode, sector_t iblock,
				767	struct buffer_head *bh_result, int create)
				768	{
				769	map_bh(bh_result, inode->i_sb, iblock);
				770	return 0;
				771	}
				772
				773	const struct osd_attr g_attr_logical_length = ATTR_DEF(
				774	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
				775
				776	/*
				777	* Truncate a file to the specified size - all we have to do is set the size
				778	* attribute. We make sure the object exists first.
				779	*/
				780	void exofs_truncate(struct inode *inode)
				781	{
				782	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
				783	struct exofs_i_info *oi = exofs_i(inode);
				784	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
				785	struct osd_request *or;
				786	struct osd_attr attr;
				787	loff_t isize = i_size_read(inode);
				788	__be64 newsize;
				789	int ret;
				790
				791	if (!(S_ISREG(inode->i_mode) \|\| S_ISDIR(inode->i_mode)
				792	\|\| S_ISLNK(inode->i_mode)))
				793	return;
				794	if (exofs_inode_is_fast_symlink(inode))
				795	return;
				796	if (IS_APPEND(inode) \|\| IS_IMMUTABLE(inode))
				797	return;
				798	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				799
				800	nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
				801
				802	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
				803	if (unlikely(!or)) {
				804	EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
				805	goto fail;
				806	}
				807
				808	osd_req_set_attributes(or, &obj);
				809
				810	newsize = cpu_to_be64((u64)isize);
				811	attr = g_attr_logical_length;
				812	attr.val_ptr = &newsize;
				813	osd_req_add_set_attr_list(or, &attr, 1);
				814
				815	/* if we are about to truncate an object, and it hasn't been
				816	* created yet, wait
				817	*/
				818	if (unlikely(wait_obj_created(oi)))
				819	goto fail;
				820
				821	ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
				822	osd_end_request(or);
				823	if (ret)
				824	goto fail;
				825
				826	out:
				827	mark_inode_dirty(inode);
				828	return;
				829	fail:
				830	make_bad_inode(inode);
				831	goto out;
				832	}
				833
				834	/*
				835	* Set inode attributes - just call generic functions.
				836	*/
				837	int exofs_setattr(struct dentry dentry, struct iattr iattr)
				838	{
				839	struct inode *inode = dentry->d_inode;
				840	int error;
				841
				842	error = inode_change_ok(inode, iattr);
				843	if (error)
				844	return error;
				845
				846	error = inode_setattr(inode, iattr);
				847	return error;
				848	}
Boaz Harrosh	e6af00f	2008-10-28 15:38:12 +0200	[diff] [blame]	849
				850	/*
				851	* Read an inode from the OSD, and return it as is. We also return the size
				852	* attribute in the 'sanity' argument if we got compiled with debugging turned
				853	* on.
				854	*/
				855	static int exofs_get_inode(struct super_block sb, struct exofs_i_info oi,
				856	struct exofs_fcb inode, uint64_t sanity)
				857	{
				858	struct exofs_sb_info *sbi = sb->s_fs_info;
				859	struct osd_request *or;
				860	struct osd_attr attr;
				861	struct osd_obj_id obj = {sbi->s_pid,
				862	oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
				863	int ret;
				864
				865	exofs_make_credential(oi->i_cred, &obj);
				866
				867	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
				868	if (unlikely(!or)) {
				869	EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n");
				870	return -ENOMEM;
				871	}
				872	osd_req_get_attributes(or, &obj);
				873
				874	/* we need the inode attribute */
				875	osd_req_add_get_attr_list(or, &g_attr_inode_data, 1);
				876
				877	#ifdef EXOFS_DEBUG_OBJ_ISIZE
				878	/* we get the size attributes to do a sanity check */
				879	osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
				880	#endif
				881
				882	ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
				883	if (ret)
				884	goto out;
				885
				886	attr = g_attr_inode_data;
				887	ret = extract_attr_from_req(or, &attr);
				888	if (ret) {
				889	EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n");
				890	goto out;
				891	}
				892
				893	WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE);
				894	memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
				895
				896	#ifdef EXOFS_DEBUG_OBJ_ISIZE
				897	attr = g_attr_logical_length;
				898	ret = extract_attr_from_req(or, &attr);
				899	if (ret) {
				900	EXOFS_ERR("ERROR: extract attr from or failed\n");
				901	goto out;
				902	}
				903	*sanity = get_unaligned_be64(attr.val_ptr);
				904	#endif
				905
				906	out:
				907	osd_end_request(or);
				908	return ret;
				909	}
				910
				911	/*
				912	* Fill in an inode read from the OSD and set it up for use
				913	*/
				914	struct inode exofs_iget(struct super_block sb, unsigned long ino)
				915	{
				916	struct exofs_i_info *oi;
				917	struct exofs_fcb fcb;
				918	struct inode *inode;
				919	uint64_t uninitialized_var(sanity);
				920	int ret;
				921
				922	inode = iget_locked(sb, ino);
				923	if (!inode)
				924	return ERR_PTR(-ENOMEM);
				925	if (!(inode->i_state & I_NEW))
				926	return inode;
				927	oi = exofs_i(inode);
				928
				929	/* read the inode from the osd */
				930	ret = exofs_get_inode(sb, oi, &fcb, &sanity);
				931	if (ret)
				932	goto bad_inode;
				933
				934	init_waitqueue_head(&oi->i_wq);
				935	set_obj_created(oi);
				936
				937	/* copy stuff from on-disk struct to in-memory struct */
				938	inode->i_mode = le16_to_cpu(fcb.i_mode);
				939	inode->i_uid = le32_to_cpu(fcb.i_uid);
				940	inode->i_gid = le32_to_cpu(fcb.i_gid);
				941	inode->i_nlink = le16_to_cpu(fcb.i_links_count);
				942	inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
				943	inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
				944	inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
				945	inode->i_ctime.tv_nsec =
				946	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
				947	oi->i_commit_size = le64_to_cpu(fcb.i_size);
				948	i_size_write(inode, oi->i_commit_size);
				949	inode->i_blkbits = EXOFS_BLKSHIFT;
				950	inode->i_generation = le32_to_cpu(fcb.i_generation);
				951
				952	#ifdef EXOFS_DEBUG_OBJ_ISIZE
				953	if ((inode->i_size != sanity) &&
				954	(!exofs_inode_is_fast_symlink(inode))) {
Boaz Harrosh	58311c4	2009-07-14 11:06:08 +0300	[diff] [blame]	955	EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
Boaz Harrosh	e6af00f	2008-10-28 15:38:12 +0200	[diff] [blame]	956	inode->i_size, _LLU(sanity));
				957	}
				958	#endif
				959
				960	oi->i_dir_start_lookup = 0;
				961
				962	if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
				963	ret = -ESTALE;
				964	goto bad_inode;
				965	}
				966
				967	if (S_ISCHR(inode->i_mode) \|\| S_ISBLK(inode->i_mode)) {
				968	if (fcb.i_data[0])
				969	inode->i_rdev =
				970	old_decode_dev(le32_to_cpu(fcb.i_data[0]));
				971	else
				972	inode->i_rdev =
				973	new_decode_dev(le32_to_cpu(fcb.i_data[1]));
				974	} else {
				975	memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
				976	}
				977
				978	if (S_ISREG(inode->i_mode)) {
				979	inode->i_op = &exofs_file_inode_operations;
				980	inode->i_fop = &exofs_file_operations;
				981	inode->i_mapping->a_ops = &exofs_aops;
				982	} else if (S_ISDIR(inode->i_mode)) {
				983	inode->i_op = &exofs_dir_inode_operations;
				984	inode->i_fop = &exofs_dir_operations;
				985	inode->i_mapping->a_ops = &exofs_aops;
				986	} else if (S_ISLNK(inode->i_mode)) {
				987	if (exofs_inode_is_fast_symlink(inode))
				988	inode->i_op = &exofs_fast_symlink_inode_operations;
				989	else {
				990	inode->i_op = &exofs_symlink_inode_operations;
				991	inode->i_mapping->a_ops = &exofs_aops;
				992	}
				993	} else {
				994	inode->i_op = &exofs_special_inode_operations;
				995	if (fcb.i_data[0])
				996	init_special_inode(inode, inode->i_mode,
				997	old_decode_dev(le32_to_cpu(fcb.i_data[0])));
				998	else
				999	init_special_inode(inode, inode->i_mode,
				1000	new_decode_dev(le32_to_cpu(fcb.i_data[1])));
				1001	}
				1002
				1003	unlock_new_inode(inode);
				1004	return inode;
				1005
				1006	bad_inode:
				1007	iget_failed(inode);
				1008	return ERR_PTR(ret);
				1009	}
				1010
				1011	int __exofs_wait_obj_created(struct exofs_i_info *oi)
				1012	{
				1013	if (!obj_created(oi)) {
				1014	BUG_ON(!obj_2bcreated(oi));
				1015	wait_event(oi->i_wq, obj_created(oi));
				1016	}
				1017	return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
				1018	}
				1019	/*
				1020	* Callback function from exofs_new_inode(). The important thing is that we
				1021	* set the obj_created flag so that other methods know that the object exists on
				1022	* the OSD.
				1023	*/
				1024	static void create_done(struct osd_request or, void p)
				1025	{
				1026	struct inode *inode = p;
				1027	struct exofs_i_info *oi = exofs_i(inode);
				1028	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
				1029	int ret;
				1030
				1031	ret = exofs_check_ok(or);
				1032	osd_end_request(or);
				1033	atomic_dec(&sbi->s_curr_pending);
				1034
				1035	if (unlikely(ret)) {
				1036	EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
				1037	_LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF));
				1038	make_bad_inode(inode);
				1039	} else
				1040	set_obj_created(oi);
				1041
				1042	atomic_dec(&inode->i_count);
				1043	wake_up(&oi->i_wq);
				1044	}
				1045
				1046	/*
				1047	* Set up a new inode and create an object for it on the OSD
				1048	*/
				1049	struct inode exofs_new_inode(struct inode dir, int mode)
				1050	{
				1051	struct super_block *sb;
				1052	struct inode *inode;
				1053	struct exofs_i_info *oi;
				1054	struct exofs_sb_info *sbi;
				1055	struct osd_request *or;
				1056	struct osd_obj_id obj;
				1057	int ret;
				1058
				1059	sb = dir->i_sb;
				1060	inode = new_inode(sb);
				1061	if (!inode)
				1062	return ERR_PTR(-ENOMEM);
				1063
				1064	oi = exofs_i(inode);
				1065
				1066	init_waitqueue_head(&oi->i_wq);
				1067	set_obj_2bcreated(oi);
				1068
				1069	sbi = sb->s_fs_info;
				1070
				1071	sb->s_dirt = 1;
				1072	inode->i_uid = current->cred->fsuid;
				1073	if (dir->i_mode & S_ISGID) {
				1074	inode->i_gid = dir->i_gid;
				1075	if (S_ISDIR(mode))
				1076	mode \|= S_ISGID;
				1077	} else {
				1078	inode->i_gid = current->cred->fsgid;
				1079	}
				1080	inode->i_mode = mode;
				1081
				1082	inode->i_ino = sbi->s_nextid++;
				1083	inode->i_blkbits = EXOFS_BLKSHIFT;
				1084	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
				1085	oi->i_commit_size = inode->i_size = 0;
				1086	spin_lock(&sbi->s_next_gen_lock);
				1087	inode->i_generation = sbi->s_next_generation++;
				1088	spin_unlock(&sbi->s_next_gen_lock);
				1089	insert_inode_hash(inode);
				1090
				1091	mark_inode_dirty(inode);
				1092
				1093	obj.partition = sbi->s_pid;
				1094	obj.id = inode->i_ino + EXOFS_OBJ_OFF;
				1095	exofs_make_credential(oi->i_cred, &obj);
				1096
				1097	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
				1098	if (unlikely(!or)) {
				1099	EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
				1100	return ERR_PTR(-ENOMEM);
				1101	}
				1102
				1103	osd_req_create_object(or, &obj);
				1104
				1105	/* increment the refcount so that the inode will still be around when we
				1106	* reach the callback
				1107	*/
				1108	atomic_inc(&inode->i_count);
				1109
				1110	ret = exofs_async_op(or, create_done, inode, oi->i_cred);
				1111	if (ret) {
				1112	atomic_dec(&inode->i_count);
				1113	osd_end_request(or);
				1114	return ERR_PTR(-EIO);
				1115	}
				1116	atomic_inc(&sbi->s_curr_pending);
				1117
				1118	return inode;
				1119	}
Boaz Harrosh	ba9e5e9	2008-10-28 16:11:41 +0200	[diff] [blame]	1120
				1121	/*
				1122	* struct to pass two arguments to update_inode's callback
				1123	*/
				1124	struct updatei_args {
				1125	struct exofs_sb_info *sbi;
				1126	struct exofs_fcb fcb;
				1127	};
				1128
				1129	/*
				1130	* Callback function from exofs_update_inode().
				1131	*/
				1132	static void updatei_done(struct osd_request or, void p)
				1133	{
				1134	struct updatei_args *args = p;
				1135
				1136	osd_end_request(or);
				1137
				1138	atomic_dec(&args->sbi->s_curr_pending);
				1139
				1140	kfree(args);
				1141	}
				1142
				1143	/*
				1144	* Write the inode to the OSD. Just fill up the struct, and set the attribute
				1145	* synchronously or asynchronously depending on the do_sync flag.
				1146	*/
				1147	static int exofs_update_inode(struct inode *inode, int do_sync)
				1148	{
				1149	struct exofs_i_info *oi = exofs_i(inode);
				1150	struct super_block *sb = inode->i_sb;
				1151	struct exofs_sb_info *sbi = sb->s_fs_info;
				1152	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
				1153	struct osd_request *or;
				1154	struct osd_attr attr;
				1155	struct exofs_fcb *fcb;
				1156	struct updatei_args *args;
				1157	int ret;
				1158
				1159	args = kzalloc(sizeof(*args), GFP_KERNEL);
				1160	if (!args)
				1161	return -ENOMEM;
				1162
				1163	fcb = &args->fcb;
				1164
				1165	fcb->i_mode = cpu_to_le16(inode->i_mode);
				1166	fcb->i_uid = cpu_to_le32(inode->i_uid);
				1167	fcb->i_gid = cpu_to_le32(inode->i_gid);
				1168	fcb->i_links_count = cpu_to_le16(inode->i_nlink);
				1169	fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
				1170	fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
				1171	fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
				1172	oi->i_commit_size = i_size_read(inode);
				1173	fcb->i_size = cpu_to_le64(oi->i_commit_size);
				1174	fcb->i_generation = cpu_to_le32(inode->i_generation);
				1175
				1176	if (S_ISCHR(inode->i_mode) \|\| S_ISBLK(inode->i_mode)) {
				1177	if (old_valid_dev(inode->i_rdev)) {
				1178	fcb->i_data[0] =
				1179	cpu_to_le32(old_encode_dev(inode->i_rdev));
				1180	fcb->i_data[1] = 0;
				1181	} else {
				1182	fcb->i_data[0] = 0;
				1183	fcb->i_data[1] =
				1184	cpu_to_le32(new_encode_dev(inode->i_rdev));
				1185	fcb->i_data[2] = 0;
				1186	}
				1187	} else
				1188	memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
				1189
				1190	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
				1191	if (unlikely(!or)) {
				1192	EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n");
				1193	ret = -ENOMEM;
				1194	goto free_args;
				1195	}
				1196
				1197	osd_req_set_attributes(or, &obj);
				1198
				1199	attr = g_attr_inode_data;
				1200	attr.val_ptr = fcb;
				1201	osd_req_add_set_attr_list(or, &attr, 1);
				1202
				1203	if (!obj_created(oi)) {
				1204	EXOFS_DBGMSG("!obj_created\n");
				1205	BUG_ON(!obj_2bcreated(oi));
				1206	wait_event(oi->i_wq, obj_created(oi));
				1207	EXOFS_DBGMSG("wait_event done\n");
				1208	}
				1209
				1210	if (do_sync) {
				1211	ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
				1212	osd_end_request(or);
				1213	goto free_args;
				1214	} else {
				1215	args->sbi = sbi;
				1216
				1217	ret = exofs_async_op(or, updatei_done, args, oi->i_cred);
				1218	if (ret) {
				1219	osd_end_request(or);
				1220	goto free_args;
				1221	}
				1222	atomic_inc(&sbi->s_curr_pending);
				1223	goto out; /* deallocation in updatei_done */
				1224	}
				1225
				1226	free_args:
				1227	kfree(args);
				1228	out:
				1229	EXOFS_DBGMSG("ret=>%d\n", ret);
				1230	return ret;
				1231	}
				1232
				1233	int exofs_write_inode(struct inode *inode, int wait)
				1234	{
				1235	return exofs_update_inode(inode, wait);
				1236	}
				1237
				1238	/*
				1239	* Callback function from exofs_delete_inode() - don't have much cleaning up to
				1240	* do.
				1241	*/
				1242	static void delete_done(struct osd_request or, void p)
				1243	{
				1244	struct exofs_sb_info *sbi;
				1245	osd_end_request(or);
				1246	sbi = p;
				1247	atomic_dec(&sbi->s_curr_pending);
				1248	}
				1249
				1250	/*
				1251	* Called when the refcount of an inode reaches zero. We remove the object
				1252	* from the OSD here. We make sure the object was created before we try and
				1253	* delete it.
				1254	*/
				1255	void exofs_delete_inode(struct inode *inode)
				1256	{
				1257	struct exofs_i_info *oi = exofs_i(inode);
				1258	struct super_block *sb = inode->i_sb;
				1259	struct exofs_sb_info *sbi = sb->s_fs_info;
				1260	struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
				1261	struct osd_request *or;
				1262	int ret;
				1263
				1264	truncate_inode_pages(&inode->i_data, 0);
				1265
				1266	if (is_bad_inode(inode))
				1267	goto no_delete;
				1268
				1269	mark_inode_dirty(inode);
				1270	exofs_update_inode(inode, inode_needs_sync(inode));
				1271
				1272	inode->i_size = 0;
				1273	if (inode->i_blocks)
				1274	exofs_truncate(inode);
				1275
				1276	clear_inode(inode);
				1277
				1278	or = osd_start_request(sbi->s_dev, GFP_KERNEL);
				1279	if (unlikely(!or)) {
				1280	EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n");
				1281	return;
				1282	}
				1283
				1284	osd_req_remove_object(or, &obj);
				1285
				1286	/* if we are deleting an obj that hasn't been created yet, wait */
				1287	if (!obj_created(oi)) {
				1288	BUG_ON(!obj_2bcreated(oi));
				1289	wait_event(oi->i_wq, obj_created(oi));
				1290	}
				1291
				1292	ret = exofs_async_op(or, delete_done, sbi, oi->i_cred);
				1293	if (ret) {
				1294	EXOFS_ERR(
				1295	"ERROR: @exofs_delete_inode exofs_async_op failed\n");
				1296	osd_end_request(or);
				1297	return;
				1298	}
				1299	atomic_inc(&sbi->s_curr_pending);
				1300
				1301	return;
				1302
				1303	no_delete:
				1304	clear_inode(inode);
				1305	}