Blame - fs/ocfs2/extent_map.c - kernel/msm-4.9

blob: b6ba292e9544000444718395aee0be5f39bf6de0 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* extent_map.c
				5	*
				6	* In-memory extent map for OCFS2. Man, this code was prettier in
				7	* the library.
				8	*
				9	* Copyright (C) 2004 Oracle. All rights reserved.
				10	*
				11	* This program is free software; you can redistribute it and/or
				12	* modify it under the terms of the GNU General Public
				13	* License, version 2, as published by the Free Software Foundation.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
				26	#include <linux/fs.h>
				27	#include <linux/init.h>
				28	#include <linux/types.h>
				29	#include <linux/slab.h>
				30	#include <linux/rbtree.h>
				31
				32	#define MLOG_MASK_PREFIX ML_EXTENT_MAP
				33	#include <cluster/masklog.h>
				34
				35	#include "ocfs2.h"
				36
				37	#include "extent_map.h"
				38	#include "inode.h"
				39	#include "super.h"
				40
				41	#include "buffer_head_io.h"
				42
				43
				44	/*
				45	* SUCK SUCK SUCK
				46	* Our headers are so bad that struct ocfs2_extent_map is in ocfs.h
				47	*/
				48
				49	struct ocfs2_extent_map_entry {
				50	struct rb_node e_node;
				51	int e_tree_depth;
				52	struct ocfs2_extent_rec e_rec;
				53	};
				54
				55	struct ocfs2_em_insert_context {
				56	int need_left;
				57	int need_right;
				58	struct ocfs2_extent_map_entry *new_ent;
				59	struct ocfs2_extent_map_entry *old_ent;
				60	struct ocfs2_extent_map_entry *left_ent;
				61	struct ocfs2_extent_map_entry *right_ent;
				62	};
				63
				64	static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
				65
				66
				67	static struct ocfs2_extent_map_entry *
				68	ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
				69	u32 cpos, u32 clusters,
				70	struct rb_node ***ret_p,
				71	struct rb_node **ret_parent);
				72	static int ocfs2_extent_map_insert(struct inode *inode,
				73	struct ocfs2_extent_rec *rec,
				74	int tree_depth);
				75	static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
				76	struct ocfs2_extent_map_entry *ent);
				77	static int ocfs2_extent_map_find_leaf(struct inode *inode,
				78	u32 cpos, u32 clusters,
				79	struct ocfs2_extent_list *el);
				80	static int ocfs2_extent_map_lookup_read(struct inode *inode,
				81	u32 cpos, u32 clusters,
				82	struct ocfs2_extent_map_entry **ret_ent);
				83	static int ocfs2_extent_map_try_insert(struct inode *inode,
				84	struct ocfs2_extent_rec *rec,
				85	int tree_depth,
				86	struct ocfs2_em_insert_context *ctxt);
				87
				88	/* returns 1 only if the rec contains all the given clusters -- that is that
				89	* rec's cpos is <= the cluster cpos and that the rec endpoint (cpos +
				90	* clusters) is >= the argument's endpoint */
				91	static int ocfs2_extent_rec_contains_clusters(struct ocfs2_extent_rec *rec,
				92	u32 cpos, u32 clusters)
				93	{
				94	if (le32_to_cpu(rec->e_cpos) > cpos)
				95	return 0;
				96	if (cpos + clusters > le32_to_cpu(rec->e_cpos) +
				97	le32_to_cpu(rec->e_clusters))
				98	return 0;
				99	return 1;
				100	}
				101
				102
				103	/*
				104	* Find an entry in the tree that intersects the region passed in.
				105	* Note that this will find straddled intervals, it is up to the
				106	* callers to enforce any boundary conditions.
				107	*
				108	* Callers must hold ip_lock. This lookup is not guaranteed to return
				109	* a tree_depth 0 match, and as such can race inserts if the lock
				110	* were not held.
				111	*
				112	* The rb_node garbage lets insertion share the search. Trivial
				113	* callers pass NULL.
				114	*/
				115	static struct ocfs2_extent_map_entry *
				116	ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
				117	u32 cpos, u32 clusters,
				118	struct rb_node ***ret_p,
				119	struct rb_node **ret_parent)
				120	{
				121	struct rb_node **p = &em->em_extents.rb_node;
				122	struct rb_node *parent = NULL;
				123	struct ocfs2_extent_map_entry *ent = NULL;
				124
				125	while (*p)
				126	{
				127	parent = *p;
				128	ent = rb_entry(parent, struct ocfs2_extent_map_entry,
				129	e_node);
				130	if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.e_cpos)) {
				131	p = &(*p)->rb_left;
				132	ent = NULL;
				133	} else if (cpos >= (le32_to_cpu(ent->e_rec.e_cpos) +
				134	le32_to_cpu(ent->e_rec.e_clusters))) {
				135	p = &(*p)->rb_right;
				136	ent = NULL;
				137	} else
				138	break;
				139	}
				140
				141	if (ret_p != NULL)
				142	*ret_p = p;
				143	if (ret_parent != NULL)
				144	*ret_parent = parent;
				145	return ent;
				146	}
				147
				148	/*
				149	* Find the leaf containing the interval we want. While we're on our
				150	* way down the tree, fill in every record we see at any depth, because
				151	* we might want it later.
				152	*
				153	* Note that this code is run without ip_lock. That's because it
				154	* sleeps while reading. If someone is also filling the extent list at
				155	* the same time we are, we might have to restart.
				156	*/
				157	static int ocfs2_extent_map_find_leaf(struct inode *inode,
				158	u32 cpos, u32 clusters,
				159	struct ocfs2_extent_list *el)
				160	{
				161	int i, ret;
				162	struct buffer_head *eb_bh = NULL;
				163	u64 blkno;
				164	u32 rec_end;
				165	struct ocfs2_extent_block *eb;
				166	struct ocfs2_extent_rec *rec;
				167
				168	/*
				169	* The bh data containing the el cannot change here, because
				170	* we hold alloc_sem. So we can do this without other
				171	* locks.
				172	*/
				173	while (el->l_tree_depth)
				174	{
				175	blkno = 0;
				176	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
				177	rec = &el->l_recs[i];
				178	rec_end = (le32_to_cpu(rec->e_cpos) +
				179	le32_to_cpu(rec->e_clusters));
				180
				181	ret = -EBADR;
				182	if (rec_end > OCFS2_I(inode)->ip_clusters) {
				183	mlog_errno(ret);
				184	goto out_free;
				185	}
				186
				187	if (rec_end <= cpos) {
				188	ret = ocfs2_extent_map_insert(inode, rec,
				189	le16_to_cpu(el->l_tree_depth));
				190	if (ret && (ret != -EEXIST)) {
				191	mlog_errno(ret);
				192	goto out_free;
				193	}
				194	continue;
				195	}
				196	if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) {
				197	ret = ocfs2_extent_map_insert(inode, rec,
				198	le16_to_cpu(el->l_tree_depth));
				199	if (ret && (ret != -EEXIST)) {
				200	mlog_errno(ret);
				201	goto out_free;
				202	}
				203	continue;
				204	}
				205
				206	/*
				207	* We've found a record that matches our
				208	* interval. We don't insert it because we're
				209	* about to traverse it.
				210	*/
				211
				212	/* Check to see if we're stradling */
				213	ret = -ESRCH;
				214	if (!ocfs2_extent_rec_contains_clusters(rec,
				215	cpos,
				216	clusters)) {
				217	mlog_errno(ret);
				218	goto out_free;
				219	}
				220
				221	/*
				222	* If we've already found a record, the el has
				223	* two records covering the same interval.
				224	* EEEK!
				225	*/
				226	ret = -EBADR;
				227	if (blkno) {
				228	mlog_errno(ret);
				229	goto out_free;
				230	}
				231
				232	blkno = le64_to_cpu(rec->e_blkno);
				233	}
				234
				235	/*
				236	* We don't support holes, and we're still up
				237	* in the branches, so we'd better have found someone
				238	*/
				239	ret = -EBADR;
				240	if (!blkno) {
				241	mlog_errno(ret);
				242	goto out_free;
				243	}
				244
				245	if (eb_bh) {
				246	brelse(eb_bh);
				247	eb_bh = NULL;
				248	}
				249	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				250	blkno, &eb_bh, OCFS2_BH_CACHED,
				251	inode);
				252	if (ret) {
				253	mlog_errno(ret);
				254	goto out_free;
				255	}
				256	eb = (struct ocfs2_extent_block *)eb_bh->b_data;
				257	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
				258	OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
				259	ret = -EIO;
				260	goto out_free;
				261	}
				262	el = &eb->h_list;
				263	}
				264
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	265	BUG_ON(el->l_tree_depth);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	266
				267	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
				268	rec = &el->l_recs[i];
				269	ret = ocfs2_extent_map_insert(inode, rec,
				270	le16_to_cpu(el->l_tree_depth));
				271	if (ret) {
				272	mlog_errno(ret);
				273	goto out_free;
				274	}
				275	}
				276
				277	ret = 0;
				278
				279	out_free:
				280	if (eb_bh)
				281	brelse(eb_bh);
				282
				283	return ret;
				284	}
				285
				286	/*
				287	* This lookup actually will read from disk. It has one invariant:
				288	* It will never re-traverse blocks. This means that all inserts should
				289	* be new regions or more granular regions (both allowed by insert).
				290	*/
				291	static int ocfs2_extent_map_lookup_read(struct inode *inode,
				292	u32 cpos,
				293	u32 clusters,
				294	struct ocfs2_extent_map_entry **ret_ent)
				295	{
				296	int ret;
				297	u64 blkno;
				298	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				299	struct ocfs2_extent_map_entry *ent;
				300	struct buffer_head *bh = NULL;
				301	struct ocfs2_extent_block *eb;
				302	struct ocfs2_dinode *di;
				303	struct ocfs2_extent_list *el;
				304
				305	spin_lock(&OCFS2_I(inode)->ip_lock);
				306	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
				307	if (ent) {
				308	if (!ent->e_tree_depth) {
				309	spin_unlock(&OCFS2_I(inode)->ip_lock);
				310	*ret_ent = ent;
				311	return 0;
				312	}
				313	blkno = le64_to_cpu(ent->e_rec.e_blkno);
				314	spin_unlock(&OCFS2_I(inode)->ip_lock);
				315
				316	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh,
				317	OCFS2_BH_CACHED, inode);
				318	if (ret) {
				319	mlog_errno(ret);
				320	if (bh)
				321	brelse(bh);
				322	return ret;
				323	}
				324	eb = (struct ocfs2_extent_block *)bh->b_data;
				325	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
				326	OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
				327	brelse(bh);
				328	return -EIO;
				329	}
				330	el = &eb->h_list;
				331	} else {
				332	spin_unlock(&OCFS2_I(inode)->ip_lock);
				333
				334	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				335	OCFS2_I(inode)->ip_blkno, &bh,
				336	OCFS2_BH_CACHED, inode);
				337	if (ret) {
				338	mlog_errno(ret);
				339	if (bh)
				340	brelse(bh);
				341	return ret;
				342	}
				343	di = (struct ocfs2_dinode *)bh->b_data;
				344	if (!OCFS2_IS_VALID_DINODE(di)) {
				345	brelse(bh);
				346	OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);
				347	return -EIO;
				348	}
				349	el = &di->id2.i_list;
				350	}
				351
				352	ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);
				353	brelse(bh);
				354	if (ret) {
				355	mlog_errno(ret);
				356	return ret;
				357	}
				358
				359	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
				360	if (!ent) {
				361	ret = -ESRCH;
				362	mlog_errno(ret);
				363	return ret;
				364	}
				365
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	366	/* FIXME: Make sure this isn't a corruption */
				367	BUG_ON(ent->e_tree_depth);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	368
				369	*ret_ent = ent;
				370
				371	return 0;
				372	}
				373
				374	/*
				375	* Callers must hold ip_lock. This can insert pieces of the tree,
				376	* thus racing lookup if the lock weren't held.
				377	*/
				378	static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
				379	struct ocfs2_extent_map_entry *ent)
				380	{
				381	struct rb_node *p, parent;
				382	struct ocfs2_extent_map_entry *old_ent;
				383
				384	old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.e_cpos),
				385	le32_to_cpu(ent->e_rec.e_clusters),
				386	&p, &parent);
				387	if (old_ent)
				388	return -EEXIST;
				389
				390	rb_link_node(&ent->e_node, parent, p);
				391	rb_insert_color(&ent->e_node, &em->em_extents);
				392
				393	return 0;
				394	}
				395
				396
				397	/*
				398	* Simple rule: on any return code other than -EAGAIN, anything left
				399	* in the insert_context will be freed.
				400	*/
				401	static int ocfs2_extent_map_try_insert(struct inode *inode,
				402	struct ocfs2_extent_rec *rec,
				403	int tree_depth,
				404	struct ocfs2_em_insert_context *ctxt)
				405	{
				406	int ret;
				407	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				408	struct ocfs2_extent_map_entry *old_ent;
				409
				410	ctxt->need_left = 0;
				411	ctxt->need_right = 0;
				412	ctxt->old_ent = NULL;
				413
				414	spin_lock(&OCFS2_I(inode)->ip_lock);
				415	ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
				416	if (!ret) {
				417	ctxt->new_ent = NULL;
				418	goto out_unlock;
				419	}
				420
				421	old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos),
				422	le32_to_cpu(rec->e_clusters), NULL,
				423	NULL);
				424
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	425	BUG_ON(!old_ent);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	426
				427	ret = -EEXIST;
				428	if (old_ent->e_tree_depth < tree_depth)
				429	goto out_unlock;
				430
				431	if (old_ent->e_tree_depth == tree_depth) {
				432	if (!memcmp(rec, &old_ent->e_rec,
				433	sizeof(struct ocfs2_extent_rec)))
				434	ret = 0;
				435
				436	/* FIXME: Should this be ESRCH/EBADR??? */
				437	goto out_unlock;
				438	}
				439
				440	/*
				441	* We do it in this order specifically so that no actual tree
				442	* changes occur until we have all the pieces we need. We
				443	* don't want malloc failures to leave an inconsistent tree.
				444	* Whenever we drop the lock, another process could be
				445	* inserting. Also note that, if another process just beat us
				446	* to an insert, we might not need the same pieces we needed
				447	* the first go round. In the end, the pieces we need will
				448	* be used, and the pieces we don't will be freed.
				449	*/
				450	ctxt->need_left = !!(le32_to_cpu(rec->e_cpos) >
				451	le32_to_cpu(old_ent->e_rec.e_cpos));
				452	ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.e_cpos) +
				453	le32_to_cpu(old_ent->e_rec.e_clusters)) >
				454	(le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)));
				455	ret = -EAGAIN;
				456	if (ctxt->need_left) {
				457	if (!ctxt->left_ent)
				458	goto out_unlock;
				459	(ctxt->left_ent) = old_ent;
				460	ctxt->left_ent->e_rec.e_clusters =
				461	cpu_to_le32(le32_to_cpu(rec->e_cpos) -
				462	le32_to_cpu(ctxt->left_ent->e_rec.e_cpos));
				463	}
				464	if (ctxt->need_right) {
				465	if (!ctxt->right_ent)
				466	goto out_unlock;
				467	(ctxt->right_ent) = old_ent;
				468	ctxt->right_ent->e_rec.e_cpos =
				469	cpu_to_le32(le32_to_cpu(rec->e_cpos) +
				470	le32_to_cpu(rec->e_clusters));
				471	ctxt->right_ent->e_rec.e_clusters =
				472	cpu_to_le32((le32_to_cpu(old_ent->e_rec.e_cpos) +
				473	le32_to_cpu(old_ent->e_rec.e_clusters)) -
				474	le32_to_cpu(ctxt->right_ent->e_rec.e_cpos));
				475	}
				476
				477	rb_erase(&old_ent->e_node, &em->em_extents);
				478	/* Now that he's erased, set him up for deletion */
				479	ctxt->old_ent = old_ent;
				480
				481	if (ctxt->need_left) {
				482	ret = ocfs2_extent_map_insert_entry(em,
				483	ctxt->left_ent);
				484	if (ret)
				485	goto out_unlock;
				486	ctxt->left_ent = NULL;
				487	}
				488
				489	if (ctxt->need_right) {
				490	ret = ocfs2_extent_map_insert_entry(em,
				491	ctxt->right_ent);
				492	if (ret)
				493	goto out_unlock;
				494	ctxt->right_ent = NULL;
				495	}
				496
				497	ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
				498
				499	if (!ret)
				500	ctxt->new_ent = NULL;
				501
				502	out_unlock:
				503	spin_unlock(&OCFS2_I(inode)->ip_lock);
				504
				505	return ret;
				506	}
				507
				508
				509	static int ocfs2_extent_map_insert(struct inode *inode,
				510	struct ocfs2_extent_rec *rec,
				511	int tree_depth)
				512	{
				513	int ret;
				514	struct ocfs2_em_insert_context ctxt = {0, };
				515
				516	if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >
				517	OCFS2_I(inode)->ip_map.em_clusters) {
				518	ret = -EBADR;
				519	mlog_errno(ret);
				520	return ret;
				521	}
				522
				523	/* Zero e_clusters means a truncated tail record. It better be EOF */
				524	if (!rec->e_clusters) {
				525	if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) !=
				526	OCFS2_I(inode)->ip_map.em_clusters) {
				527	ret = -EBADR;
				528	mlog_errno(ret);
				529	return ret;
				530	}
				531
				532	/* Ignore the truncated tail */
				533	return 0;
				534	}
				535
				536	ret = -ENOMEM;
				537	ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
				538	GFP_KERNEL);
				539	if (!ctxt.new_ent) {
				540	mlog_errno(ret);
				541	return ret;
				542	}
				543
				544	ctxt.new_ent->e_rec = *rec;
				545	ctxt.new_ent->e_tree_depth = tree_depth;
				546
				547	do {
				548	ret = -ENOMEM;
				549	if (ctxt.need_left && !ctxt.left_ent) {
				550	ctxt.left_ent =
				551	kmem_cache_alloc(ocfs2_em_ent_cachep,
				552	GFP_KERNEL);
				553	if (!ctxt.left_ent)
				554	break;
				555	}
				556	if (ctxt.need_right && !ctxt.right_ent) {
				557	ctxt.right_ent =
				558	kmem_cache_alloc(ocfs2_em_ent_cachep,
				559	GFP_KERNEL);
				560	if (!ctxt.right_ent)
				561	break;
				562	}
				563
				564	ret = ocfs2_extent_map_try_insert(inode, rec,
				565	tree_depth, &ctxt);
				566	} while (ret == -EAGAIN);
				567
				568	if (ret < 0)
				569	mlog_errno(ret);
				570
				571	if (ctxt.left_ent)
				572	kmem_cache_free(ocfs2_em_ent_cachep, ctxt.left_ent);
				573	if (ctxt.right_ent)
				574	kmem_cache_free(ocfs2_em_ent_cachep, ctxt.right_ent);
				575	if (ctxt.old_ent)
				576	kmem_cache_free(ocfs2_em_ent_cachep, ctxt.old_ent);
				577	if (ctxt.new_ent)
				578	kmem_cache_free(ocfs2_em_ent_cachep, ctxt.new_ent);
				579
				580	return ret;
				581	}
				582
				583	/*
				584	* Append this record to the tail of the extent map. It must be
				585	* tree_depth 0. The record might be an extension of an existing
				586	* record, and as such that needs to be handled. eg:
				587	*
				588	* Existing record in the extent map:
				589	*
				590	* cpos = 10, len = 10
				591	* \|---------\|
				592	*
				593	* New Record:
				594	*
				595	* cpos = 10, len = 20
				596	* \|------------------\|
				597	*
				598	* The passed record is the new on-disk record. The new_clusters value
				599	* is how many clusters were added to the file. If the append is a
				600	* contiguous append, the new_clusters has been added to
				601	* rec->e_clusters. If the append is an entirely new extent, then
				602	* rec->e_clusters is == new_clusters.
				603	*/
				604	int ocfs2_extent_map_append(struct inode *inode,
				605	struct ocfs2_extent_rec *rec,
				606	u32 new_clusters)
				607	{
				608	int ret;
				609	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				610	struct ocfs2_extent_map_entry *ent;
				611	struct ocfs2_extent_rec *old;
				612
				613	BUG_ON(!new_clusters);
				614	BUG_ON(le32_to_cpu(rec->e_clusters) < new_clusters);
				615
				616	if (em->em_clusters < OCFS2_I(inode)->ip_clusters) {
				617	/*
				618	* Size changed underneath us on disk. Drop any
				619	* straddling records and update our idea of
				620	* i_clusters
				621	*/
				622	ocfs2_extent_map_drop(inode, em->em_clusters - 1);
				623	em->em_clusters = OCFS2_I(inode)->ip_clusters;
				624	}
				625
				626	mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
				627	le32_to_cpu(rec->e_clusters)) !=
				628	(em->em_clusters + new_clusters),
				629	"Inode %"MLFu64":\n"
				630	"rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
				631	"em->em_clusters = %u + new_clusters = %u = %u\n",
				632	OCFS2_I(inode)->ip_blkno,
				633	le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
				634	le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
				635	em->em_clusters, new_clusters,
				636	em->em_clusters + new_clusters);
				637
				638	em->em_clusters += new_clusters;
				639
				640	ret = -ENOENT;
				641	if (le32_to_cpu(rec->e_clusters) > new_clusters) {
				642	/* This is a contiguous append */
				643	ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 1,
				644	NULL, NULL);
				645	if (ent) {
				646	old = &ent->e_rec;
				647	BUG_ON((le32_to_cpu(rec->e_cpos) +
				648	le32_to_cpu(rec->e_clusters)) !=
				649	(le32_to_cpu(old->e_cpos) +
				650	le32_to_cpu(old->e_clusters) +
				651	new_clusters));
				652	if (ent->e_tree_depth == 0) {
				653	BUG_ON(le32_to_cpu(old->e_cpos) !=
				654	le32_to_cpu(rec->e_cpos));
				655	BUG_ON(le64_to_cpu(old->e_blkno) !=
				656	le64_to_cpu(rec->e_blkno));
				657	ret = 0;
				658	}
				659	/*
				660	* Let non-leafs fall through as -ENOENT to
				661	* force insertion of the new leaf.
				662	*/
				663	le32_add_cpu(&old->e_clusters, new_clusters);
				664	}
				665	}
				666
				667	if (ret == -ENOENT)
				668	ret = ocfs2_extent_map_insert(inode, rec, 0);
				669	if (ret < 0)
				670	mlog_errno(ret);
				671	return ret;
				672	}
				673
				674	#if 0
				675	/* Code here is included but defined out as it completes the extent
				676	* map api and may be used in the future. */
				677
				678	/*
				679	* Look up the record containing this cluster offset. This record is
				680	* part of the extent map. Do not free it. Any changes you make to
				681	* it will reflect in the extent map. So, if your last extent
				682	* is (cpos = 10, clusters = 10) and you truncate the file by 5
				683	* clusters, you can do:
				684	*
				685	* ret = ocfs2_extent_map_get_rec(em, orig_size - 5, &rec);
				686	* rec->e_clusters -= 5;
				687	*
				688	* The lookup does not read from disk. If the map isn't filled in for
				689	* an entry, you won't find it.
				690	*
				691	* Also note that the returned record is valid until alloc_sem is
				692	* dropped. After that, truncate and extend can happen. Caveat Emptor.
				693	*/
				694	int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos,
				695	struct ocfs2_extent_rec **rec,
				696	int *tree_depth)
				697	{
				698	int ret = -ENOENT;
				699	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				700	struct ocfs2_extent_map_entry *ent;
				701
				702	*rec = NULL;
				703
				704	if (cpos >= OCFS2_I(inode)->ip_clusters)
				705	return -EINVAL;
				706
				707	if (cpos >= em->em_clusters) {
				708	/*
				709	* Size changed underneath us on disk. Drop any
				710	* straddling records and update our idea of
				711	* i_clusters
				712	*/
				713	ocfs2_extent_map_drop(inode, em->em_clusters - 1);
				714	em->em_clusters = OCFS2_I(inode)->ip_clusters ;
				715	}
				716
				717	ent = ocfs2_extent_map_lookup(&OCFS2_I(inode)->ip_map, cpos, 1,
				718	NULL, NULL);
				719
				720	if (ent) {
				721	*rec = &ent->e_rec;
				722	if (tree_depth)
				723	*tree_depth = ent->e_tree_depth;
				724	ret = 0;
				725	}
				726
				727	return ret;
				728	}
				729
				730	int ocfs2_extent_map_get_clusters(struct inode *inode,
				731	u32 v_cpos, int count,
				732	u32 p_cpos, int ret_count)
				733	{
				734	int ret;
				735	u32 coff, ccount;
				736	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				737	struct ocfs2_extent_map_entry *ent = NULL;
				738
				739	*p_cpos = ccount = 0;
				740
				741	if ((v_cpos + count) > OCFS2_I(inode)->ip_clusters)
				742	return -EINVAL;
				743
				744	if ((v_cpos + count) > em->em_clusters) {
				745	/*
				746	* Size changed underneath us on disk. Drop any
				747	* straddling records and update our idea of
				748	* i_clusters
				749	*/
				750	ocfs2_extent_map_drop(inode, em->em_clusters - 1);
				751	em->em_clusters = OCFS2_I(inode)->ip_clusters;
				752	}
				753
				754
				755	ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent);
				756	if (ret)
				757	return ret;
				758
				759	if (ent) {
				760	/* We should never find ourselves straddling an interval */
				761	if (!ocfs2_extent_rec_contains_clusters(&ent->e_rec,
				762	v_cpos,
				763	count))
				764	return -ESRCH;
				765
				766	coff = v_cpos - le32_to_cpu(ent->e_rec.e_cpos);
				767	*p_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
				768	le64_to_cpu(ent->e_rec.e_blkno)) +
				769	coff;
				770
				771	if (ret_count)
				772	*ret_count = le32_to_cpu(ent->e_rec.e_clusters) - coff;
				773
				774	return 0;
				775	}
				776
				777
				778	return -ENOENT;
				779	}
				780
				781	#endif /* 0 */
				782
				783	int ocfs2_extent_map_get_blocks(struct inode *inode,
				784	u64 v_blkno, int count,
				785	u64 p_blkno, int ret_count)
				786	{
				787	int ret;
				788	u64 boff;
				789	u32 cpos, clusters;
				790	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
				791	struct ocfs2_extent_map_entry *ent = NULL;
				792	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				793	struct ocfs2_extent_rec *rec;
				794
				795	*p_blkno = 0;
				796
				797	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
				798	clusters = ocfs2_blocks_to_clusters(inode->i_sb,
				799	(u64)count + bpc - 1);
				800	if ((cpos + clusters) > OCFS2_I(inode)->ip_clusters) {
				801	ret = -EINVAL;
				802	mlog_errno(ret);
				803	return ret;
				804	}
				805
				806	if ((cpos + clusters) > em->em_clusters) {
				807	/*
				808	* Size changed underneath us on disk. Drop any
				809	* straddling records and update our idea of
				810	* i_clusters
				811	*/
				812	ocfs2_extent_map_drop(inode, em->em_clusters - 1);
				813	em->em_clusters = OCFS2_I(inode)->ip_clusters;
				814	}
				815
				816	ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent);
				817	if (ret) {
				818	mlog_errno(ret);
				819	return ret;
				820	}
				821
				822	if (ent)
				823	{
				824	rec = &ent->e_rec;
				825
				826	/* We should never find ourselves straddling an interval */
				827	if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) {
				828	ret = -ESRCH;
				829	mlog_errno(ret);
				830	return ret;
				831	}
				832
				833	boff = ocfs2_clusters_to_blocks(inode->i_sb, cpos -
				834	le32_to_cpu(rec->e_cpos));
				835	boff += (v_blkno & (u64)(bpc - 1));
				836	*p_blkno = le64_to_cpu(rec->e_blkno) + boff;
				837
				838	if (ret_count) {
				839	*ret_count = ocfs2_clusters_to_blocks(inode->i_sb,
				840	le32_to_cpu(rec->e_clusters)) - boff;
				841	}
				842
				843	return 0;
				844	}
				845
				846	return -ENOENT;
				847	}
				848
				849	int ocfs2_extent_map_init(struct inode *inode)
				850	{
				851	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				852
				853	em->em_extents = RB_ROOT;
				854	em->em_clusters = 0;
				855
				856	return 0;
				857	}
				858
				859	/* Needs the lock */
				860	static void __ocfs2_extent_map_drop(struct inode *inode,
				861	u32 new_clusters,
				862	struct rb_node **free_head,
				863	struct ocfs2_extent_map_entry **tail_ent)
				864	{
				865	struct rb_node node, next;
				866	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				867	struct ocfs2_extent_map_entry *ent;
				868
				869	*free_head = NULL;
				870
				871	ent = NULL;
				872	node = rb_last(&em->em_extents);
				873	while (node)
				874	{
				875	next = rb_prev(node);
				876
				877	ent = rb_entry(node, struct ocfs2_extent_map_entry,
				878	e_node);
				879	if (le32_to_cpu(ent->e_rec.e_cpos) < new_clusters)
				880	break;
				881
				882	rb_erase(&ent->e_node, &em->em_extents);
				883
				884	node->rb_right = *free_head;
				885	*free_head = node;
				886
				887	ent = NULL;
				888	node = next;
				889	}
				890
				891	/* Do we have an entry straddling new_clusters? */
				892	if (tail_ent) {
				893	if (ent &&
				894	((le32_to_cpu(ent->e_rec.e_cpos) +
				895	le32_to_cpu(ent->e_rec.e_clusters)) > new_clusters))
				896	*tail_ent = ent;
				897	else
				898	*tail_ent = NULL;
				899	}
				900	}
				901
				902	static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head)
				903	{
				904	struct rb_node *node;
				905	struct ocfs2_extent_map_entry *ent;
				906
				907	while (free_head) {
				908	node = free_head;
				909	free_head = node->rb_right;
				910
				911	ent = rb_entry(node, struct ocfs2_extent_map_entry,
				912	e_node);
				913	kmem_cache_free(ocfs2_em_ent_cachep, ent);
				914	}
				915	}
				916
				917	/*
				918	* Remove all entries past new_clusters, inclusive of an entry that
				919	* contains new_clusters. This is effectively a cache forget.
				920	*
				921	* If you want to also clip the last extent by some number of clusters,
				922	* you need to call ocfs2_extent_map_trunc().
				923	* This code does not check or modify ip_clusters.
				924	*/
				925	int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters)
				926	{
				927	struct rb_node *free_head = NULL;
				928	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
				929	struct ocfs2_extent_map_entry *ent;
				930
				931	spin_lock(&OCFS2_I(inode)->ip_lock);
				932
				933	__ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
				934
				935	if (ent) {
				936	rb_erase(&ent->e_node, &em->em_extents);
				937	ent->e_node.rb_right = free_head;
				938	free_head = &ent->e_node;
				939	}
				940
				941	spin_unlock(&OCFS2_I(inode)->ip_lock);
				942
				943	if (free_head)
				944	__ocfs2_extent_map_drop_cleanup(free_head);
				945
				946	return 0;
				947	}
				948
				949	/*
				950	* Remove all entries past new_clusters and also clip any extent
				951	* straddling new_clusters, if there is one. This does not check
				952	* or modify ip_clusters
				953	*/
				954	int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters)
				955	{
				956	struct rb_node *free_head = NULL;
				957	struct ocfs2_extent_map_entry *ent = NULL;
				958
				959	spin_lock(&OCFS2_I(inode)->ip_lock);
				960
				961	__ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
				962
				963	if (ent)
				964	ent->e_rec.e_clusters = cpu_to_le32(new_clusters -
				965	le32_to_cpu(ent->e_rec.e_cpos));
				966
				967	OCFS2_I(inode)->ip_map.em_clusters = new_clusters;
				968
				969	spin_unlock(&OCFS2_I(inode)->ip_lock);
				970
				971	if (free_head)
				972	__ocfs2_extent_map_drop_cleanup(free_head);
				973
				974	return 0;
				975	}
				976
				977	int __init init_ocfs2_extent_maps(void)
				978	{
				979	ocfs2_em_ent_cachep =
				980	kmem_cache_create("ocfs2_em_ent",
				981	sizeof(struct ocfs2_extent_map_entry),
				982	0, SLAB_HWCACHE_ALIGN, NULL, NULL);
				983	if (!ocfs2_em_ent_cachep)
				984	return -ENOMEM;
				985
				986	return 0;
				987	}
				988
Adrian Bunk	0c6c98f	2006-01-07 20:07:02 +0100	[diff] [blame]	989	void exit_ocfs2_extent_maps(void)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	990	{
				991	kmem_cache_destroy(ocfs2_em_ent_cachep);
				992	}