Merge branch 'master' into devel and apply fixup from Stephen Rothwell:
vfs/nfs: fixup for nfs_open_context change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 535ab6e..e98f56d 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -96,12 +96,12 @@
return acl;
}
-int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int v9fs_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
struct v9fs_session_info *v9ses;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
v9ses = v9fs_inode2v9ses(inode);
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 7ef3ac9..59e18c2 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
#ifdef CONFIG_9P_FS_POSIX_ACL
extern int v9fs_get_acl(struct inode *, struct p9_fid *);
-extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
+extern int v9fs_check_acl(struct inode *inode, int mask);
extern int v9fs_acl_chmod(struct dentry *);
extern int v9fs_set_create_acl(struct dentry *,
struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 5b335c5..945aa5f 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -108,11 +108,10 @@
void *buffer, uint16_t bufmax)
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->fscache_key->path,
- sizeof(v9inode->fscache_key->path));
+ memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
- v9inode->fscache_key->path);
- return sizeof(v9inode->fscache_key->path);
+ v9inode->qid.path);
+ return sizeof(v9inode->qid.path);
}
static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
@@ -129,11 +128,10 @@
void *buffer, uint16_t buflen)
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->fscache_key->version,
- sizeof(v9inode->fscache_key->version));
+ memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
- v9inode->fscache_key->version);
- return sizeof(v9inode->fscache_key->version);
+ v9inode->qid.version);
+ return sizeof(v9inode->qid.version);
}
static enum
@@ -143,11 +141,11 @@
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- if (buflen != sizeof(v9inode->fscache_key->version))
+ if (buflen != sizeof(v9inode->qid.version))
return FSCACHE_CHECKAUX_OBSOLETE;
- if (memcmp(buffer, &v9inode->fscache_key->version,
- sizeof(v9inode->fscache_key->version)))
+ if (memcmp(buffer, &v9inode->qid.version,
+ sizeof(v9inode->qid.version)))
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 049507a..40cc54c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -93,15 +93,6 @@
BUG_ON(PageFsCache(page));
}
-static inline void v9fs_fscache_set_key(struct inode *inode,
- struct p9_qid *qid)
-{
- struct v9fs_inode *v9inode = V9FS_I(inode);
- spin_lock(&v9inode->fscache_lock);
- v9inode->fscache_key = qid;
- spin_unlock(&v9inode->fscache_lock);
-}
-
static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
struct page *page)
{
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c82b017..ef96618 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -78,6 +78,25 @@
{Opt_err, NULL}
};
+/* Interpret mount options for cache mode */
+static int get_cache_mode(char *s)
+{
+ int version = -EINVAL;
+
+ if (!strcmp(s, "loose")) {
+ version = CACHE_LOOSE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n");
+ } else if (!strcmp(s, "fscache")) {
+ version = CACHE_FSCACHE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n");
+ } else if (!strcmp(s, "none")) {
+ version = CACHE_NONE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n");
+ } else
+ printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s);
+ return version;
+}
+
/**
* v9fs_parse_options - parse mount options into session structure
* @v9ses: existing v9fs session information
@@ -97,7 +116,7 @@
/* setup defaults */
v9ses->afid = ~0;
v9ses->debug = 0;
- v9ses->cache = 0;
+ v9ses->cache = CACHE_NONE;
#ifdef CONFIG_9P_FSCACHE
v9ses->cachetag = NULL;
#endif
@@ -171,13 +190,13 @@
"problem allocating copy of cache arg\n");
goto free_and_return;
}
+ ret = get_cache_mode(s);
+ if (ret == -EINVAL) {
+ kfree(s);
+ goto free_and_return;
+ }
- if (strcmp(s, "loose") == 0)
- v9ses->cache = CACHE_LOOSE;
- else if (strcmp(s, "fscache") == 0)
- v9ses->cache = CACHE_FSCACHE;
- else
- v9ses->cache = CACHE_NONE;
+ v9ses->cache = ret;
kfree(s);
break;
@@ -200,9 +219,15 @@
} else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
- if (*e != '\0')
- v9ses->uid = ~0;
+ if (*e != '\0') {
+ ret = -EINVAL;
+ printk(KERN_INFO "9p: Unknown access "
+ "argument %s.\n", s);
+ kfree(s);
+ goto free_and_return;
+ }
}
+
kfree(s);
break;
@@ -487,8 +512,8 @@
struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- v9inode->fscache_key = NULL;
#endif
+ memset(&v9inode->qid, 0, sizeof(v9inode->qid));
inode_init_once(&v9inode->vfs_inode);
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e5ebedf..e78956c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -125,8 +125,8 @@
#ifdef CONFIG_9P_FSCACHE
spinlock_t fscache_lock;
struct fscache_cookie *fscache;
- struct p9_qid *fscache_key;
#endif
+ struct p9_qid qid;
unsigned int cache_validity;
struct p9_fid *writeback_fid;
struct mutex v_mutex;
@@ -153,13 +153,13 @@
void *p);
extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
- struct super_block *sb);
+ struct super_block *sb, int new);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
- struct super_block *sb);
+ struct super_block *sb, int new);
/* other default globals */
#define V9FS_PORT 564
@@ -201,8 +201,27 @@
struct super_block *sb)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0);
else
- return v9fs_inode_from_fid(v9ses, fid, sb);
+ return v9fs_inode_from_fid(v9ses, fid, sb, 0);
}
+
+/**
+ * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1);
+ else
+ return v9fs_inode_from_fid(v9ses, fid, sb, 1);
+}
+
#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 4014160..46ce357 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -70,7 +70,8 @@
ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
-int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+ int datasync);
ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
const char __user *, size_t, loff_t *, int);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index ffed558..3c173fc 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -519,32 +519,50 @@
}
-static int v9fs_file_fsync(struct file *filp, int datasync)
+static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct p9_fid *fid;
+ struct inode *inode = filp->f_mapping->host;
struct p9_wstat wstat;
int retval;
+ retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (retval)
+ return retval;
+
+ mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
retval = p9_client_wstat(fid, &wstat);
+ mutex_unlock(&inode->i_mutex);
+
return retval;
}
-int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct p9_fid *fid;
+ struct inode *inode = filp->f_mapping->host;
int retval;
+ retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (retval)
+ return retval;
+
+ mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
filp, datasync);
fid = filp->private_data;
retval = p9_client_fsync(fid, datasync);
+ mutex_unlock(&inode->i_mutex);
+
return retval;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c677..8bb5507 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -216,7 +216,6 @@
return NULL;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- v9inode->fscache_key = NULL;
spin_lock_init(&v9inode->fscache_lock);
#endif
v9inode->writeback_fid = NULL;
@@ -433,17 +432,60 @@
}
}
+static int v9fs_test_inode(struct inode *inode, void *data)
+{
+ int umode;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_wstat *st = (struct p9_wstat *)data;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+
+ umode = p9mode2unixmode(v9ses, st->mode);
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ return 0;
+
+ /* compare qid details */
+ if (memcmp(&v9inode->qid.version,
+ &st->qid.version, sizeof(v9inode->qid.version)))
+ return 0;
+
+ if (v9inode->qid.type != st->qid.type)
+ return 0;
+ return 1;
+}
+
+static int v9fs_test_new_inode(struct inode *inode, void *data)
+{
+ return 0;
+}
+
+static int v9fs_set_inode(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_wstat *st = (struct p9_wstat *)data;
+
+ memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
+ return 0;
+}
+
static struct inode *v9fs_qid_iget(struct super_block *sb,
struct p9_qid *qid,
- struct p9_wstat *st)
+ struct p9_wstat *st,
+ int new)
{
int retval, umode;
unsigned long i_ino;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ int (*test)(struct inode *, void *);
+
+ if (new)
+ test = v9fs_test_new_inode;
+ else
+ test = v9fs_test_inode;
i_ino = v9fs_qid2ino(qid);
- inode = iget_locked(sb, i_ino);
+ inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
@@ -453,6 +495,7 @@
* FIXME!! we may need support for stale inodes
* later.
*/
+ inode->i_ino = i_ino;
umode = p9mode2unixmode(v9ses, st->mode);
retval = v9fs_init_inode(v9ses, inode, umode);
if (retval)
@@ -460,7 +503,6 @@
v9fs_stat2inode(st, inode, sb);
#ifdef CONFIG_9P_FSCACHE
- v9fs_fscache_set_key(inode, &st->qid);
v9fs_cache_inode_get_cookie(inode);
#endif
unlock_new_inode(inode);
@@ -474,7 +516,7 @@
struct inode *
v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, int new)
{
struct p9_wstat *st;
struct inode *inode = NULL;
@@ -483,7 +525,7 @@
if (IS_ERR(st))
return ERR_CAST(st);
- inode = v9fs_qid_iget(sb, &st->qid, st);
+ inode = v9fs_qid_iget(sb, &st->qid, st, new);
p9stat_free(st);
kfree(st);
return inode;
@@ -492,38 +534,50 @@
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
- * @file: dentry that is being deleted
+ * @dentry: dentry that is being deleted
* @rmdir: removing a directory
*
*/
-static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
{
- int retval;
- struct p9_fid *v9fid;
- struct inode *file_inode;
+ struct inode *inode;
+ int retval = -EOPNOTSUPP;
+ struct p9_fid *v9fid, *dfid;
+ struct v9fs_session_info *v9ses;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
- rmdir);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
+ dir, dentry, flags);
- file_inode = file->d_inode;
- v9fid = v9fs_fid_clone(file);
- if (IS_ERR(v9fid))
- return PTR_ERR(v9fid);
-
- retval = p9_client_remove(v9fid);
+ v9ses = v9fs_inode2v9ses(dir);
+ inode = dentry->d_inode;
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ retval = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
+ return retval;
+ }
+ if (v9fs_proto_dotl(v9ses))
+ retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags);
+ if (retval == -EOPNOTSUPP) {
+ /* Try the one based on path */
+ v9fid = v9fs_fid_clone(dentry);
+ if (IS_ERR(v9fid))
+ return PTR_ERR(v9fid);
+ retval = p9_client_remove(v9fid);
+ }
if (!retval) {
/*
* directories on unlink should have zero
* link count
*/
- if (rmdir) {
- clear_nlink(file_inode);
+ if (flags & AT_REMOVEDIR) {
+ clear_nlink(inode);
drop_nlink(dir);
} else
- drop_nlink(file_inode);
+ drop_nlink(inode);
- v9fs_invalidate_inode_attr(file_inode);
+ v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
}
return retval;
@@ -585,7 +639,7 @@
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -633,8 +687,8 @@
fid = NULL;
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
+ if (nd)
+ flags = nd->intent.open.flags;
else
flags = O_RDWR;
@@ -649,7 +703,7 @@
v9fs_invalidate_inode_attr(dir);
/* if we are opening a file, assign the open fid to the file */
- if (nd && nd->flags & LOOKUP_OPEN) {
+ if (nd) {
v9inode = V9FS_I(dentry->d_inode);
mutex_lock(&v9inode->v_mutex);
if (v9ses->cache && !v9inode->writeback_fid &&
@@ -814,7 +868,7 @@
int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
{
- return v9fs_remove(i, d, 1);
+ return v9fs_remove(i, d, AT_REMOVEDIR);
}
/**
@@ -862,9 +916,12 @@
down_write(&v9ses->rename_sem);
if (v9fs_proto_dotl(v9ses)) {
- retval = p9_client_rename(oldfid, newdirfid,
- (char *) new_dentry->d_name.name);
- if (retval != -ENOSYS)
+ retval = p9_client_renameat(olddirfid, old_dentry->d_name.name,
+ newdirfid, new_dentry->d_name.name);
+ if (retval == -EOPNOTSUPP)
+ retval = p9_client_rename(oldfid, newdirfid,
+ new_dentry->d_name.name);
+ if (retval != -EOPNOTSUPP)
goto clunk_newdir;
}
if (old_dentry->d_parent != new_dentry->d_parent) {
@@ -889,11 +946,6 @@
clear_nlink(new_inode);
else
drop_nlink(new_inode);
- /*
- * Work around vfs rename rehash bug with
- * FS_RENAME_DOES_D_MOVE
- */
- v9fs_invalidate_inode_attr(new_inode);
}
if (S_ISDIR(old_inode->i_mode)) {
if (!new_inode)
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 691c78f..276f4a6 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,18 +86,63 @@
return dentry;
}
+static int v9fs_test_inode_dotl(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
+
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ return 0;
+
+ if (inode->i_generation != st->st_gen)
+ return 0;
+
+ /* compare qid details */
+ if (memcmp(&v9inode->qid.version,
+ &st->qid.version, sizeof(v9inode->qid.version)))
+ return 0;
+
+ if (v9inode->qid.type != st->qid.type)
+ return 0;
+ return 1;
+}
+
+/* Always get a new inode */
+static int v9fs_test_new_inode_dotl(struct inode *inode, void *data)
+{
+ return 0;
+}
+
+static int v9fs_set_inode_dotl(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
+
+ memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
+ inode->i_generation = st->st_gen;
+ return 0;
+}
+
static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
struct p9_qid *qid,
struct p9_fid *fid,
- struct p9_stat_dotl *st)
+ struct p9_stat_dotl *st,
+ int new)
{
int retval;
unsigned long i_ino;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ int (*test)(struct inode *, void *);
+
+ if (new)
+ test = v9fs_test_new_inode_dotl;
+ else
+ test = v9fs_test_inode_dotl;
i_ino = v9fs_qid2ino(qid);
- inode = iget_locked(sb, i_ino);
+ inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
@@ -107,13 +152,13 @@
* FIXME!! we may need support for stale inodes
* later.
*/
+ inode->i_ino = i_ino;
retval = v9fs_init_inode(v9ses, inode, st->st_mode);
if (retval)
goto error;
v9fs_stat2inode_dotl(st, inode);
#ifdef CONFIG_9P_FSCACHE
- v9fs_fscache_set_key(inode, &st->qid);
v9fs_cache_inode_get_cookie(inode);
#endif
retval = v9fs_get_acl(inode, fid);
@@ -131,16 +176,16 @@
struct inode *
v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, int new)
{
struct p9_stat_dotl *st;
struct inode *inode = NULL;
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
if (IS_ERR(st))
return ERR_CAST(st);
- inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
+ inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new);
kfree(st);
return inode;
}
@@ -173,8 +218,8 @@
struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
+ if (nd)
+ flags = nd->intent.open.flags;
else {
/*
* create call without LOOKUP_OPEN is due
@@ -230,7 +275,7 @@
fid = NULL;
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -350,7 +395,7 @@
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -547,7 +592,7 @@
inode->i_blocks = stat->st_blocks;
}
if (stat->st_result_mask & P9_STATS_GEN)
- inode->i_generation = stat->st_gen;
+ inode->i_generation = stat->st_gen;
/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
* because the inode structure does not have fields for them.
@@ -603,7 +648,7 @@
}
/* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -756,7 +801,7 @@
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e95f73..c2b9c79 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,7 +182,7 @@
void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
-int affs_file_fsync(struct file *, int);
+int affs_file_fsync(struct file *, loff_t, loff_t, int);
/* dir.c */
diff --git a/fs/affs/file.c b/fs/affs/file.c
index acf321b..2f4c935 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -923,14 +923,20 @@
affs_free_prealloc(inode);
}
-int affs_file_fsync(struct file *filp, int datasync)
+int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
int ret, err;
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 8bbefe0..800f607 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -49,7 +49,7 @@
AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
- AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */
+ AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */
AFSVL_PERM = 363546, /* No permission access */
AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
};
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5a9b684..d2b0888 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -627,7 +627,7 @@
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, unsigned int);
+extern int afs_permission(struct inode *, int);
/*
* server.c
@@ -750,7 +750,7 @@
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
-extern int afs_fsync(struct file *, int);
+extern int afs_fsync(struct file *, loff_t, loff_t, int);
/*****************************************************************************/
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f44b9d3..8d01042 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,14 +285,14 @@
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct inode *inode, int mask, unsigned int flags)
+int afs_permission(struct inode *inode, int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_access_t uninitialized_var(access);
struct key *key;
int ret;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
_enter("{{%x:%u},%lx},%x,",
@@ -350,7 +350,7 @@
}
key_put(key);
- ret = generic_permission(inode, mask, flags, NULL);
+ ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index b806285..9aa52d9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -681,9 +681,10 @@
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
-int afs_fsync(struct file *file, int datasync)
+int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = file->f_mapping->host;
struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
int ret;
@@ -692,12 +693,19 @@
vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
* completed or rejected */
wb = kzalloc(sizeof(*wb), GFP_KERNEL);
- if (!wb)
- return -ENOMEM;
+ if (!wb) {
+ ret = -ENOMEM;
+ goto out;
+ }
wb->vnode = vnode;
wb->first = 0;
wb->last = -1;
@@ -720,7 +728,7 @@
if (ret < 0) {
afs_put_writeback(wb);
_leave(" = %d [wb]", ret);
- return ret;
+ goto out;
}
/* wait for the preceding writes to actually complete */
@@ -729,6 +737,8 @@
vnode->writebacks.next == &wb->link);
afs_put_writeback(wb);
_leave(" = %d", ret);
+out:
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/attr.c b/fs/attr.c
index caf2aa5..538e279 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -232,17 +232,11 @@
if (error)
return error;
- if (ia_valid & ATTR_SIZE)
- down_write(&dentry->d_inode->i_alloc_sem);
-
if (inode->i_op->setattr)
error = inode->i_op->setattr(dentry, attr);
else
error = simple_setattr(dentry, attr);
- if (ia_valid & ATTR_SIZE)
- up_write(&dentry->d_inode->i_alloc_sem);
-
if (!error)
fsnotify_change(dentry, ia_valid);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index bfcb18f..9205cf2 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -87,7 +87,8 @@
return -EIO;
}
-static int bad_file_fsync(struct file *file, int datasync)
+static int bad_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
return -EIO;
}
@@ -229,7 +230,7 @@
return -EIO;
}
-static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
+static int bad_inode_permission(struct inode *inode, int mask)
{
return -EIO;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 303983f..dd0fdfc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -668,8 +668,7 @@
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
- if (file_permission(interpreter, MAY_READ) < 0)
- bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+ would_dump(bprm, interpreter);
retval = kernel_read(interpreter, 0, bprm->buf,
BINPRM_BUF_SIZE);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 63039ed..30745f4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -245,8 +245,7 @@
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
- if (file_permission(interpreter, MAY_READ) < 0)
- bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+ would_dump(bprm, interpreter);
retval = kernel_read(interpreter, 0, bprm->buf,
BINPRM_BUF_SIZE);
@@ -1864,6 +1863,7 @@
kfree(psinfo);
kfree(notes);
kfree(fpu);
+ kfree(shdr4extnum);
#ifdef ELF_CORE_COPY_XFPREGS
kfree(xfpu);
#endif
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1befe2e..ba1a1ae 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -149,8 +149,7 @@
/* if the binary is not readable than enforce mm->dumpable=0
regardless of the interpreter's permissions */
- if (file_permission(bprm->file, MAY_READ))
- bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+ would_dump(bprm, bprm->file);
allow_write_access(bprm->file);
bprm->file = NULL;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 610e8e0..9fb0b15 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,25 +355,30 @@
mutex_lock(&bd_inode->i_mutex);
size = i_size_read(bd_inode);
+ retval = -EINVAL;
switch (origin) {
- case 2:
+ case SEEK_END:
offset += size;
break;
- case 1:
+ case SEEK_CUR:
offset += file->f_pos;
+ case SEEK_SET:
+ break;
+ default:
+ goto out;
}
- retval = -EINVAL;
if (offset >= 0 && offset <= size) {
if (offset != file->f_pos) {
file->f_pos = offset;
}
retval = offset;
}
+out:
mutex_unlock(&bd_inode->i_mutex);
return retval;
}
-int blkdev_fsync(struct file *filp, int datasync)
+int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *bd_inode = filp->f_mapping->host;
struct block_device *bdev = I_BDEV(bd_inode);
@@ -384,14 +389,10 @@
* i_mutex and doing so causes performance issues with concurrent
* O_SYNC writers to a block device.
*/
- mutex_unlock(&bd_inode->i_mutex);
-
error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
if (error == -EOPNOTSUPP)
error = 0;
- mutex_lock(&bd_inode->i_mutex);
-
return error;
}
EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index f66fc99..9f62ab2 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -195,14 +195,13 @@
return ret;
}
-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int btrfs_check_acl(struct inode *inode, int mask)
{
int error = -EAGAIN;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
error = -ECHILD;
-
} else {
struct posix_acl *acl;
acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f30ac05..82be74e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,7 +1219,7 @@
* right now this just gets used so that a root has its own devid
* for stat. It may be used for more later
*/
- struct super_block anon_super;
+ dev_t anon_dev;
};
struct btrfs_ioctl_defrag_range_args {
@@ -1335,6 +1335,11 @@
*/
#define BTRFS_STRING_ITEM_KEY 253
+/*
+ * Flags for mount options.
+ *
+ * Note: don't forget to add new options to btrfs_show_options()
+ */
#define BTRFS_MOUNT_NODATASUM (1 << 0)
#define BTRFS_MOUNT_NODATACOW (1 << 1)
#define BTRFS_MOUNT_NOBARRIER (1 << 2)
@@ -2505,6 +2510,9 @@
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create);
/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@ -2597,7 +2605,7 @@
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct inode *inode);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
-int btrfs_sync_file(struct file *file, int datasync);
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
@@ -2637,7 +2645,7 @@
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
+int btrfs_check_acl(struct inode *inode, int mask);
#else
#define btrfs_check_acl NULL
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ac8db5d..b231ae1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1077,12 +1077,7 @@
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
- root->anon_super.s_root = NULL;
- root->anon_super.s_dev = 0;
- INIT_LIST_HEAD(&root->anon_super.s_list);
- INIT_LIST_HEAD(&root->anon_super.s_instances);
- init_rwsem(&root->anon_super.s_umount);
-
+ root->anon_dev = 0;
return 0;
}
@@ -1311,7 +1306,7 @@
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- ret = set_anon_super(&root->anon_super, NULL);
+ ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
@@ -2393,10 +2388,8 @@
{
iput(root->cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- if (root->anon_super.s_dev) {
- down_write(&root->anon_super.s_umount);
- kill_anon_super(&root->anon_super);
- }
+ if (root->anon_dev)
+ free_anon_bdev(root->anon_dev);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18..59cbdb1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1452,7 +1452,7 @@
* important optimization for directories because holding the mutex prevents
* new operations on the dir while we write to disk.
*/
-int btrfs_sync_file(struct file *file, int datasync)
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -1462,9 +1462,13 @@
trace_btrfs_sync_file(file, datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* we wait first, since the writeback may change the inode */
root->log_batch++;
- /* the VFS called filemap_fdatawrite for us */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
@@ -1472,8 +1476,10 @@
* check the transaction that last modified this inode
* and see if its already been committed
*/
- if (!BTRFS_I(inode)->last_trans)
+ if (!BTRFS_I(inode)->last_trans) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/*
* if the last transaction that changed this file was before
@@ -1484,6 +1490,7 @@
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
+ mutex_unlock(&inode->i_mutex);
goto out;
}
@@ -1496,12 +1503,15 @@
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ mutex_unlock(&inode->i_mutex);
goto out;
}
ret = btrfs_log_dentry_safe(trans, root, dentry);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/* we've logged all the items and now have a consistent
* version of the file in the log. It is possible that
@@ -1513,7 +1523,7 @@
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&dentry->d_inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
@@ -1528,7 +1538,6 @@
} else {
ret = btrfs_end_transaction(trans, root);
}
- mutex_lock(&dentry->d_inode->i_mutex);
out:
return ret > 0 ? -EIO : ret;
}
@@ -1664,8 +1673,154 @@
return ret;
}
+static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map *em;
+ struct extent_state *cached_state = NULL;
+ u64 lockstart = *offset;
+ u64 lockend = i_size_read(inode);
+ u64 start = *offset;
+ u64 orig_start = *offset;
+ u64 len = i_size_read(inode);
+ u64 last_end = 0;
+ int ret = 0;
+
+ lockend = max_t(u64, root->sectorsize, lockend);
+ if (lockend <= lockstart)
+ lockend = lockstart + root->sectorsize;
+
+ len = lockend - lockstart + 1;
+
+ len = max_t(u64, len, root->sectorsize);
+ if (inode->i_size == 0)
+ return -ENXIO;
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ &cached_state, GFP_NOFS);
+
+ /*
+ * Delalloc is such a pain. If we have a hole and we have pending
+ * delalloc for a portion of the hole we will get back a hole that
+ * exists for the entire range since it hasn't been actually written
+ * yet. So to take care of this case we need to look for an extent just
+ * before the position we want in case there is outstanding delalloc
+ * going on here.
+ */
+ if (origin == SEEK_HOLE && start != 0) {
+ if (start <= root->sectorsize)
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
+ root->sectorsize, 0);
+ else
+ em = btrfs_get_extent_fiemap(inode, NULL, 0,
+ start - root->sectorsize,
+ root->sectorsize, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ goto out;
+ }
+ last_end = em->start + em->len;
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+ free_extent_map(em);
+ }
+
+ while (1) {
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ break;
+ }
+
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (last_end <= orig_start) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ if (origin == SEEK_HOLE) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ } else {
+ if (origin == SEEK_DATA) {
+ if (em->block_start == EXTENT_MAP_DELALLOC) {
+ if (start >= inode->i_size) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ }
+
+ start = em->start + em->len;
+ last_end = em->start + em->len;
+
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ free_extent_map(em);
+ cond_resched();
+ }
+ if (!ret)
+ *offset = min(*offset, inode->i_size);
+out:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state, GFP_NOFS);
+ return ret;
+}
+
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ case SEEK_CUR:
+ offset = generic_file_llseek_unlocked(file, offset, origin);
+ goto out;
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ ret = find_desired_extent(inode, &offset, origin);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+ }
+
+ if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+ return -EINVAL;
+ if (offset > inode->i_sb->s_maxbytes)
+ return -EINVAL;
+
+ /* Special lock needed here? */
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+out:
+ mutex_unlock(&inode->i_mutex);
+ return offset;
+}
+
const struct file_operations btrfs_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = btrfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d340f63..2548a04 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2678,12 +2678,14 @@
int ret;
/*
- * If root is tree root, it means this inode is used to
- * store free space information. And these inodes are updated
- * when committing the transaction, so they needn't delaye to
- * be updated, or deadlock will occured.
+ * If the inode is a free space inode, we can deadlock during commit
+ * if we put it into the delayed code.
+ *
+ * The data relocation inode should also be directly updated
+ * without delay
*/
- if (!is_free_space_inode(root, inode)) {
+ if (!is_free_space_inode(root, inode)
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
@@ -4077,13 +4079,7 @@
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode;
-
- inode = btrfs_lookup_dentry(dir, dentry);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- return d_splice_alias(inode, dentry);
+ return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
}
unsigned char btrfs_filetype_table[] = {
@@ -4770,11 +4766,10 @@
if (err) {
drop_inode = 1;
} else {
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
btrfs_log_new_name(trans, inode, NULL, parent);
- dput(parent);
}
nr = trans->blocks_used;
@@ -6898,7 +6893,7 @@
{
struct inode *inode = dentry->d_inode;
generic_fillattr(inode, stat);
- stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
+ stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->blksize = PAGE_CACHE_SIZE;
stat->blocks = (inode_get_bytes(inode) +
BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7066,9 +7061,8 @@
BUG_ON(ret);
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- struct dentry *parent = dget_parent(new_dentry);
+ struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
- dput(parent);
btrfs_end_log_trans(root);
}
out_fail:
@@ -7329,7 +7323,7 @@
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int btrfs_permission(struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -7337,7 +7331,7 @@
return -EROFS;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
- return generic_permission(inode, mask, flags, btrfs_check_acl);
+ return generic_permission(inode, mask);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7357,10 +7351,12 @@
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
+ .check_acl = btrfs_check_acl,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
+ .check_acl = btrfs_check_acl,
};
static const struct file_operations btrfs_dir_file_operations = {
@@ -7429,6 +7425,7 @@
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.fiemap = btrfs_fiemap,
+ .check_acl = btrfs_check_acl,
};
static const struct inode_operations btrfs_special_inode_operations = {
.getattr = btrfs_getattr,
@@ -7438,6 +7435,7 @@
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .check_acl = btrfs_check_acl,
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
@@ -7449,6 +7447,7 @@
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .check_acl = btrfs_check_acl,
};
const struct dentry_operations btrfs_dentry_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a3c4751..6225433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -323,7 +323,7 @@
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root;
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dentry->d_parent;
struct inode *dir;
int ret;
int err;
@@ -332,10 +332,8 @@
u64 index = 0;
ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
- if (ret) {
- dput(parent);
+ if (ret)
return ret;
- }
dir = parent->d_inode;
@@ -346,10 +344,8 @@
* 2 - dir items
*/
trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans)) {
- dput(parent);
+ if (IS_ERR(trans))
return PTR_ERR(trans);
- }
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
@@ -439,7 +435,6 @@
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- dput(parent);
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +451,6 @@
bool readonly)
{
struct inode *inode;
- struct dentry *parent;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
@@ -504,9 +498,7 @@
if (ret)
goto fail;
- parent = dget_parent(dentry);
- inode = btrfs_lookup_dentry(parent->d_inode, dentry);
- dput(parent);
+ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto fail;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0bb4ebb..15634d4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -723,6 +723,12 @@
seq_puts(seq, ",clear_cache");
if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
seq_puts(seq, ",user_subvol_rm_allowed");
+ if (btrfs_test_opt(root, ENOSPC_DEBUG))
+ seq_puts(seq, ",enospc_debug");
+ if (btrfs_test_opt(root, AUTO_DEFRAG))
+ seq_puts(seq, ",autodefrag");
+ if (btrfs_test_opt(root, INODE_MAP_CACHE))
+ seq_puts(seq, ",inode_cache");
return 0;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1efa56e..19450bc 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2098,7 +2098,8 @@
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
- BUG_ON(ret && ret != -ENOSPC);
+ if (ret && ret != -ENOSPC)
+ goto error;
key.offset = found_key.offset - 1;
}
ret = 0;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index a2603e7..622f469 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -129,8 +129,6 @@
!root->d_inode->i_op->mkdir ||
!root->d_inode->i_op->setxattr ||
!root->d_inode->i_op->getxattr ||
- !root->d_sb ||
- !root->d_sb->s_op ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs)
goto error_unsupported;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f605753..8d74ad7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1811,7 +1811,7 @@
spin_unlock(&ci->i_unsafe_lock);
}
-int ceph_fsync(struct file *file, int datasync)
+int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1822,9 +1822,10 @@
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
sync_write_wait(inode);
- ret = filemap_write_and_wait(inode->i_mapping);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
return ret;
+ mutex_lock(&inode->i_mutex);
dirty = try_flush_caps(inode, NULL, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -1841,6 +1842,7 @@
}
dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ef8f08c..1065ac7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -252,7 +252,7 @@
off = 1;
}
if (filp->f_pos == 1) {
- ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
+ ino_t ino = parent_ino(filp->f_dentry);
dout("readdir off 1 -> '..'\n");
if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
ceph_translate_ino(inode->i_sb, ino),
@@ -446,14 +446,19 @@
loff_t retval;
mutex_lock(&inode->i_mutex);
+ retval = -EINVAL;
switch (origin) {
case SEEK_END:
offset += inode->i_size + 2; /* FIXME */
break;
case SEEK_CUR:
offset += file->f_pos;
+ case SEEK_SET:
+ break;
+ default:
+ goto out;
}
- retval = -EINVAL;
+
if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
if (offset != file->f_pos) {
file->f_pos = offset;
@@ -477,6 +482,7 @@
if (offset > old_offset)
fi->dir_release_count--;
}
+out:
mutex_unlock(&inode->i_mutex);
return retval;
}
@@ -566,7 +572,6 @@
/* open (but not create!) intent? */
if (nd &&
(nd->flags & LOOKUP_OPEN) &&
- (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
!(nd->intent.open.flags & O_CREAT)) {
int mode = nd->intent.open.create_mode & ~current->fs->umask;
return ceph_lookup_open(dir, dentry, nd, mode, 1);
@@ -1113,7 +1118,8 @@
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(struct file *file, int datasync)
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1123,6 +1129,11 @@
int ret = 0;
dout("dir_fsync %p\n", inode);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
spin_lock(&ci->i_unsafe_lock);
if (list_empty(head))
goto out;
@@ -1156,6 +1167,8 @@
} while (req->r_tid < last_tid);
out:
spin_unlock(&ci->i_unsafe_lock);
+ mutex_unlock(&inode->i_mutex);
+
return ret;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9542f07..0d0eae0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -226,7 +226,7 @@
struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
struct ceph_mds_request *req;
int err;
- int flags = nd->intent.open.flags - 1; /* silly vfs! */
+ int flags = nd->intent.open.flags;
dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -290,7 +290,6 @@
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
int io_align, page_align;
- int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
int left, pages_left;
int read;
struct page **page_pos;
@@ -326,12 +325,11 @@
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
if (ret > 0) {
- int didpages =
- ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT;
+ int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
if (read < pos - off) {
dout(" zero gap %llu to %llu\n", off + read, pos);
- ceph_zero_page_vector_range(page_off + read,
+ ceph_zero_page_vector_range(page_align + read,
pos - off - read, pages);
}
pos += ret;
@@ -356,7 +354,7 @@
left = inode->i_size - pos;
dout("zero tail %d\n", left);
- ceph_zero_page_vector_range(page_off + read, left,
+ ceph_zero_page_vector_range(page_align + read, left,
pages);
read += left;
}
@@ -478,9 +476,6 @@
else
pos = *offset;
- io_align = pos & ~PAGE_MASK;
- buf_align = (unsigned long)data & ~PAGE_MASK;
-
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
return ret;
@@ -504,6 +499,8 @@
* boundary. this isn't atomic, unfortunately. :(
*/
more:
+ io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
len = left;
if (file->f_flags & O_DIRECT) {
/* write from beginning of first page, regardless of
@@ -593,6 +590,7 @@
pos += len;
written += len;
left -= len;
+ data += written;
if (left)
goto more;
@@ -770,13 +768,16 @@
mutex_lock(&inode->i_mutex);
__ceph_do_pending_vmtruncate(inode);
- switch (origin) {
- case SEEK_END:
+ if (origin != SEEK_CUR || origin != SEEK_SET) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
if (ret < 0) {
offset = ret;
goto out;
}
+ }
+
+ switch (origin) {
+ case SEEK_END:
offset += inode->i_size;
break;
case SEEK_CUR:
@@ -792,6 +793,19 @@
}
offset += file->f_pos;
break;
+ case SEEK_DATA:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ break;
+ case SEEK_HOLE:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ offset = inode->i_size;
+ break;
}
if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d8858e9..dfb2831 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1795,17 +1795,17 @@
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct inode *inode, int mask, unsigned int flags)
+int ceph_permission(struct inode *inode, int mask)
{
int err;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
if (!err)
- err = generic_permission(inode, mask, flags, NULL);
+ err = generic_permission(inode, mask);
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 79743d1..0c1d917 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1438,12 +1438,15 @@
struct dentry *temp;
char *path;
int len, pos;
+ unsigned seq;
if (dentry == NULL)
return ERR_PTR(-EINVAL);
retry:
len = 0;
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp);) {
struct inode *inode = temp->d_inode;
if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
@@ -1455,10 +1458,12 @@
len += 1 + temp->d_name.len;
temp = temp->d_parent;
if (temp == NULL) {
+ rcu_read_unlock();
pr_err("build_path corrupt dentry %p\n", dentry);
return ERR_PTR(-EINVAL);
}
}
+ rcu_read_unlock();
if (len)
len--; /* no leading '/' */
@@ -1467,9 +1472,12 @@
return ERR_PTR(-ENOMEM);
pos = len;
path[pos] = 0; /* trailing null */
+ rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
- struct inode *inode = temp->d_inode;
+ struct inode *inode;
+ spin_lock(&temp->d_lock);
+ inode = temp->d_inode;
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
pos, temp);
@@ -1478,21 +1486,26 @@
break;
} else {
pos -= temp->d_name.len;
- if (pos < 0)
+ if (pos < 0) {
+ spin_unlock(&temp->d_lock);
break;
+ }
strncpy(path + pos, temp->d_name.name,
temp->d_name.len);
}
+ spin_unlock(&temp->d_lock);
if (pos)
path[--pos] = '/';
temp = temp->d_parent;
if (temp == NULL) {
+ rcu_read_unlock();
pr_err("build_path corrupt dentry\n");
kfree(path);
return ERR_PTR(-EINVAL);
}
}
- if (pos != 0) {
+ rcu_read_unlock();
+ if (pos != 0 || read_seqretry(&rename_lock, seq)) {
pr_err("build_path did not end path lookup where "
"expected, namelen is %d, pos is %d\n", len, pos);
/* presumably this is only possible if racing with a
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f5cabef..30446b1 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -692,7 +692,7 @@
extern void ceph_queue_writeback(struct inode *inode);
extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
+extern int ceph_permission(struct inode *inode, int mask);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
@@ -728,7 +728,8 @@
extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, int datasync);
+extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 35f9154..8655174 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -35,6 +35,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/namei.h>
#include <net/ipv6.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -223,7 +224,7 @@
return 0;
}
-static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
+static int cifs_permission(struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -238,7 +239,7 @@
on the client (above and beyond ACL on servers) for
servers which do not support setting and viewing mode bits,
so allowing client to check permissions is useful */
- return generic_permission(inode, mask, flags, NULL);
+ return generic_permission(inode, mask);
}
static struct kmem_cache *cifs_inode_cachep;
@@ -542,14 +543,12 @@
static struct dentry *
cifs_get_root(struct smb_vol *vol, struct super_block *sb)
{
- int xid, rc;
- struct inode *inode;
- struct qstr name;
- struct dentry *dparent = NULL, *dchild = NULL, *alias;
+ struct dentry *dentry;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- unsigned int i, full_len, len;
- char *full_path = NULL, *pstart;
+ char *full_path = NULL;
+ char *s, *p;
char sep;
+ int xid;
full_path = cifs_build_path_to_root(vol, cifs_sb,
cifs_sb_master_tcon(cifs_sb));
@@ -560,73 +559,32 @@
xid = GetXid();
sep = CIFS_DIR_SEP(cifs_sb);
- dparent = dget(sb->s_root);
- full_len = strlen(full_path);
- full_path[full_len] = sep;
- pstart = full_path + 1;
+ dentry = dget(sb->s_root);
+ p = s = full_path;
- for (i = 1, len = 0; i <= full_len; i++) {
- if (full_path[i] != sep || !len) {
- len++;
- continue;
- }
+ do {
+ struct inode *dir = dentry->d_inode;
+ struct dentry *child;
- full_path[i] = 0;
- cFYI(1, "get dentry for %s", pstart);
+ /* skip separators */
+ while (*s == sep)
+ s++;
+ if (!*s)
+ break;
+ p = s++;
+ /* next separator */
+ while (*s && *s != sep)
+ s++;
- name.name = pstart;
- name.len = len;
- name.hash = full_name_hash(pstart, len);
- dchild = d_lookup(dparent, &name);
- if (dchild == NULL) {
- cFYI(1, "not exists");
- dchild = d_alloc(dparent, &name);
- if (dchild == NULL) {
- dput(dparent);
- dparent = ERR_PTR(-ENOMEM);
- goto out;
- }
- }
-
- cFYI(1, "get inode");
- if (dchild->d_inode == NULL) {
- cFYI(1, "not exists");
- inode = NULL;
- if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
- rc = cifs_get_inode_info_unix(&inode, full_path,
- sb, xid);
- else
- rc = cifs_get_inode_info(&inode, full_path,
- NULL, sb, xid, NULL);
- if (rc) {
- dput(dchild);
- dput(dparent);
- dparent = ERR_PTR(rc);
- goto out;
- }
- alias = d_materialise_unique(dchild, inode);
- if (alias != NULL) {
- dput(dchild);
- if (IS_ERR(alias)) {
- dput(dparent);
- dparent = ERR_PTR(-EINVAL); /* XXX */
- goto out;
- }
- dchild = alias;
- }
- }
- cFYI(1, "parent %p, child %p", dparent, dchild);
-
- dput(dparent);
- dparent = dchild;
- len = 0;
- pstart = full_path + i + 1;
- full_path[i] = sep;
- }
-out:
+ mutex_lock(&dir->i_mutex);
+ child = lookup_one_len(p, dentry, s - p);
+ mutex_unlock(&dir->i_mutex);
+ dput(dentry);
+ dentry = child;
+ } while (!IS_ERR(dentry));
_FreeXid(xid);
kfree(full_path);
- return dparent;
+ return dentry;
}
static int cifs_set_super(struct super_block *sb, void *data)
@@ -649,9 +607,9 @@
cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
- rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name);
- if (rc)
- return ERR_PTR(rc);
+ volume_info = cifs_get_volume_info((char *)data, dev_name);
+ if (IS_ERR(volume_info))
+ return ERR_CAST(volume_info);
cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
if (cifs_sb == NULL) {
@@ -713,7 +671,7 @@
out_super:
deactivate_locked_super(sb);
out:
- cifs_cleanup_volume_info(&volume_info);
+ cifs_cleanup_volume_info(volume_info);
return root;
out_mountdata:
@@ -746,8 +704,11 @@
static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
{
- /* origin == SEEK_END => we must revalidate the cached file length */
- if (origin == SEEK_END) {
+ /*
+ * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+ * the cached file length
+ */
+ if (origin != SEEK_SET || origin != SEEK_CUR) {
int rc;
struct inode *inode = file->f_path.dentry->d_inode;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 0900e16..fbd050c 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -91,8 +91,8 @@
extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int cifs_lock(struct file *, int, struct file_lock *);
-extern int cifs_fsync(struct file *, int);
-extern int cifs_strict_fsync(struct file *, int);
+extern int cifs_fsync(struct file *, loff_t, loff_t, int);
+extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
extern int cifs_flush(struct file *, fl_owner_t id);
extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
@@ -129,5 +129,5 @@
extern const struct export_operations cifs_export_ops;
#endif /* CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "1.73"
+#define CIFS_VERSION "1.74"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 257f312..8df28e9 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -154,9 +154,9 @@
extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
struct cifs_sb_info *cifs_sb);
extern int cifs_match_super(struct super_block *, void *);
-extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
-extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
- char *mount_data, const char *devname);
+extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
+extern struct smb_vol *cifs_get_volume_info(char *mount_data,
+ const char *devname);
extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
extern void cifs_umount(struct cifs_sb_info *);
extern void cifs_dfs_release_automount_timer(void);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7f540df..e66297b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -65,6 +65,8 @@
static int generic_ip_connect(struct TCP_Server_Info *server);
static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
static void cifs_prune_tlinks(struct work_struct *work);
+static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
+ const char *devname);
/*
* cifs tcp session reconnection
@@ -318,9 +320,10 @@
}
static int
-cifs_demultiplex_thread(struct TCP_Server_Info *server)
+cifs_demultiplex_thread(void *p)
{
int length;
+ struct TCP_Server_Info *server = p;
unsigned int pdu_length, total_read;
struct smb_hdr *smb_buffer = NULL;
struct smb_hdr *bigbuf = NULL;
@@ -1789,7 +1792,7 @@
* this will succeed. No need for try_module_get().
*/
__module_get(THIS_MODULE);
- tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread,
+ tcp_ses->tsk = kthread_run(cifs_demultiplex_thread,
tcp_ses, "cifsd");
if (IS_ERR(tcp_ses->tsk)) {
rc = PTR_ERR(tcp_ses->tsk);
@@ -2240,8 +2243,8 @@
rc = compare_mount_options(sb, mnt_data);
out:
- cifs_put_tlink(tlink);
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_put_tlink(tlink);
return rc;
}
@@ -2474,14 +2477,6 @@
if (rc < 0)
return rc;
- rc = socket->ops->connect(socket, saddr, slen, 0);
- if (rc < 0) {
- cFYI(1, "Error %d connecting to server", rc);
- sock_release(socket);
- server->ssocket = NULL;
- return rc;
- }
-
/*
* Eventually check for other socket options to change from
* the default. sock_setsockopt not used because it expects
@@ -2510,6 +2505,14 @@
socket->sk->sk_sndbuf,
socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
+ rc = socket->ops->connect(socket, saddr, slen, 0);
+ if (rc < 0) {
+ cFYI(1, "Error %d connecting to server", rc);
+ sock_release(socket);
+ server->ssocket = NULL;
+ return rc;
+ }
+
if (sport == htons(RFC1001_PORT))
rc = ip_rfc1001_connect(server);
@@ -2830,15 +2833,9 @@
return rc;
}
-void
-cifs_cleanup_volume_info(struct smb_vol **pvolume_info)
+static void
+cleanup_volume_info_contents(struct smb_vol *volume_info)
{
- struct smb_vol *volume_info;
-
- if (!pvolume_info || !*pvolume_info)
- return;
-
- volume_info = *pvolume_info;
kfree(volume_info->username);
kzfree(volume_info->password);
kfree(volume_info->UNC);
@@ -2846,28 +2843,44 @@
kfree(volume_info->domainname);
kfree(volume_info->iocharset);
kfree(volume_info->prepath);
- kfree(volume_info);
- *pvolume_info = NULL;
- return;
}
+void
+cifs_cleanup_volume_info(struct smb_vol *volume_info)
+{
+ if (!volume_info)
+ return;
+ cleanup_volume_info_contents(volume_info);
+ kfree(volume_info);
+}
+
+
#ifdef CONFIG_CIFS_DFS_UPCALL
/* build_path_to_root returns full path to root when
* we do not have an exiting connection (tcon) */
static char *
-build_unc_path_to_root(const struct smb_vol *volume_info,
+build_unc_path_to_root(const struct smb_vol *vol,
const struct cifs_sb_info *cifs_sb)
{
- char *full_path;
+ char *full_path, *pos;
+ unsigned int pplen = vol->prepath ? strlen(vol->prepath) : 0;
+ unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1);
- int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1);
- full_path = kmalloc(unc_len + 1, GFP_KERNEL);
+ full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL);
if (full_path == NULL)
return ERR_PTR(-ENOMEM);
- strncpy(full_path, volume_info->UNC, unc_len);
- full_path[unc_len] = 0; /* add trailing null */
+ strncpy(full_path, vol->UNC, unc_len);
+ pos = full_path + unc_len;
+
+ if (pplen) {
+ strncpy(pos, vol->prepath, pplen);
+ pos += pplen;
+ }
+
+ *pos = '\0'; /* add trailing null */
convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
+ cFYI(1, "%s: full_path=%s", __func__, full_path);
return full_path;
}
@@ -2910,15 +2923,18 @@
&fake_devname);
free_dfs_info_array(referrals, num_referrals);
- kfree(fake_devname);
-
- if (cifs_sb->mountdata != NULL)
- kfree(cifs_sb->mountdata);
if (IS_ERR(mdata)) {
rc = PTR_ERR(mdata);
mdata = NULL;
+ } else {
+ cleanup_volume_info_contents(volume_info);
+ memset(volume_info, '\0', sizeof(*volume_info));
+ rc = cifs_setup_volume_info(volume_info, mdata,
+ fake_devname);
}
+ kfree(fake_devname);
+ kfree(cifs_sb->mountdata);
cifs_sb->mountdata = mdata;
}
kfree(full_path);
@@ -2926,33 +2942,20 @@
}
#endif
-int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
- const char *devname)
+static int
+cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
+ const char *devname)
{
- struct smb_vol *volume_info;
int rc = 0;
- *pvolume_info = NULL;
-
- volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL);
- if (!volume_info) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (cifs_parse_mount_options(mount_data, devname,
- volume_info)) {
- rc = -EINVAL;
- goto out;
- }
+ if (cifs_parse_mount_options(mount_data, devname, volume_info))
+ return -EINVAL;
if (volume_info->nullauth) {
cFYI(1, "null user");
volume_info->username = kzalloc(1, GFP_KERNEL);
- if (volume_info->username == NULL) {
- rc = -ENOMEM;
- goto out;
- }
+ if (volume_info->username == NULL)
+ return -ENOMEM;
} else if (volume_info->username) {
/* BB fixme parse for domain name here */
cFYI(1, "Username: %s", volume_info->username);
@@ -2960,8 +2963,7 @@
cifserror("No username specified");
/* In userspace mount helper we can get user name from alternate
locations such as env variables and files on disk */
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* this is needed for ASCII cp to Unicode converts */
@@ -2973,16 +2975,30 @@
if (volume_info->local_nls == NULL) {
cERROR(1, "CIFS mount error: iocharset %s not found",
volume_info->iocharset);
- rc = -ELIBACC;
- goto out;
+ return -ELIBACC;
}
}
- *pvolume_info = volume_info;
return rc;
-out:
- cifs_cleanup_volume_info(&volume_info);
- return rc;
+}
+
+struct smb_vol *
+cifs_get_volume_info(char *mount_data, const char *devname)
+{
+ int rc;
+ struct smb_vol *volume_info;
+
+ volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL);
+ if (!volume_info)
+ return ERR_PTR(-ENOMEM);
+
+ rc = cifs_setup_volume_info(volume_info, mount_data, devname);
+ if (rc) {
+ cifs_cleanup_volume_info(volume_info);
+ volume_info = ERR_PTR(rc);
+ }
+
+ return volume_info;
}
int
@@ -2997,6 +3013,7 @@
struct tcon_link *tlink;
#ifdef CONFIG_CIFS_DFS_UPCALL
int referral_walks_count = 0;
+#endif
rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
if (rc)
@@ -3004,6 +3021,7 @@
cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
+#ifdef CONFIG_CIFS_DFS_UPCALL
try_mount_again:
/* cleanup activities if we're chasing a referral */
if (referral_walks_count) {
@@ -3012,7 +3030,6 @@
else if (pSesInfo)
cifs_put_smb_ses(pSesInfo);
- cifs_cleanup_volume_info(&volume_info);
FreeXid(xid);
}
#endif
@@ -3469,7 +3486,7 @@
goto out;
}
- snprintf(username, MAX_USERNAME_SIZE, "krb50x%x", fsuid);
+ snprintf(username, sizeof(username), "krb50x%x", fsuid);
vol_info->username = username;
vol_info->local_nls = cifs_sb->local_nls;
vol_info->linux_uid = fsuid;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 81914df..14d602f 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -55,6 +55,7 @@
char dirsep;
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ unsigned seq;
if (direntry == NULL)
return NULL; /* not much we can do if dentry is freed and
@@ -68,22 +69,29 @@
dfsplen = 0;
cifs_bp_rename_retry:
namelen = dfsplen;
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
for (temp = direntry; !IS_ROOT(temp);) {
namelen += (1 + temp->d_name.len);
temp = temp->d_parent;
if (temp == NULL) {
cERROR(1, "corrupt dentry");
+ rcu_read_unlock();
return NULL;
}
}
+ rcu_read_unlock();
full_path = kmalloc(namelen+1, GFP_KERNEL);
if (full_path == NULL)
return full_path;
full_path[namelen] = 0; /* trailing null */
+ rcu_read_lock();
for (temp = direntry; !IS_ROOT(temp);) {
+ spin_lock(&temp->d_lock);
namelen -= 1 + temp->d_name.len;
if (namelen < 0) {
+ spin_unlock(&temp->d_lock);
break;
} else {
full_path[namelen] = dirsep;
@@ -91,14 +99,17 @@
temp->d_name.len);
cFYI(0, "name: %s", full_path + namelen);
}
+ spin_unlock(&temp->d_lock);
temp = temp->d_parent;
if (temp == NULL) {
cERROR(1, "corrupt dentry");
+ rcu_read_unlock();
kfree(full_path);
return NULL;
}
}
- if (namelen != dfsplen) {
+ rcu_read_unlock();
+ if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
cERROR(1, "did not end path lookup where expected namelen is %d",
namelen);
/* presumably this is only possible if racing with a rename
@@ -168,7 +179,7 @@
if (oplockEnabled)
oplock = REQ_OPLOCK;
- if (nd && (nd->flags & LOOKUP_OPEN))
+ if (nd)
oflags = nd->intent.open.file->f_flags;
else
oflags = O_RDONLY | O_CREAT;
@@ -203,7 +214,7 @@
which should be rare for path not covered on files) */
}
- if (nd && (nd->flags & LOOKUP_OPEN)) {
+ if (nd) {
/* if the file is going to stay open, then we
need to set the desired access properly */
desiredAccess = 0;
@@ -317,7 +328,7 @@
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
+ if (newinode && nd) {
struct cifsFileInfo *pfile_info;
struct file *filp;
@@ -557,7 +568,7 @@
* reduction in network traffic in the other paths.
*/
if (pTcon->unix_ext) {
- if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
+ if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.file->f_flags & O_CREAT)) {
rc = cifs_posix_open(full_path, &newInode,
@@ -652,10 +663,8 @@
* case sensitive name which is specified by user if this is
* for creation.
*/
- if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
- if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
- return 0;
- }
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index bb71471..378acda 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1401,7 +1401,8 @@
return rc;
}
-int cifs_strict_fsync(struct file *file, int datasync)
+int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
int xid;
int rc = 0;
@@ -1410,6 +1411,11 @@
struct inode *inode = file->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (rc)
+ return rc;
+ mutex_lock(&inode->i_mutex);
+
xid = GetXid();
cFYI(1, "Sync file - name: %s datasync: 0x%x",
@@ -1428,16 +1434,23 @@
rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
FreeXid(xid);
+ mutex_unlock(&inode->i_mutex);
return rc;
}
-int cifs_fsync(struct file *file, int datasync)
+int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int xid;
int rc = 0;
struct cifs_tcon *tcon;
struct cifsFileInfo *smbfile = file->private_data;
struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ struct inode *inode = file->f_mapping->host;
+
+ rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (rc)
+ return rc;
+ mutex_lock(&inode->i_mutex);
xid = GetXid();
@@ -1449,6 +1462,7 @@
rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
FreeXid(xid);
+ mutex_unlock(&inode->i_mutex);
return rc;
}
@@ -1737,7 +1751,7 @@
io_parms.pid = pid;
io_parms.tcon = pTcon;
io_parms.offset = *poffset;
- io_parms.length = len;
+ io_parms.length = cur_len;
rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
&read_data, &buf_type);
pSMBr = (struct smb_com_read_rsp *)read_data;
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 8166966..42e5363 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -92,6 +92,7 @@
if (cifsi->fscache) {
cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
+ fscache_uncache_all_inode_pages(cifsi->fscache, inode);
fscache_relinquish_cookie(cifsi->fscache, 1);
cifsi->fscache = NULL;
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 6751e74..965a3af 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -796,7 +796,7 @@
file->f_pos++;
case 1:
if (filldir(direntry, "..", 2, file->f_pos,
- file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
+ parent_ino(file->f_path.dentry), DT_DIR) < 0) {
cERROR(1, "Filldir for parent dir failed");
rc = -ENOMEM;
break;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 3892ab8..d3e6196 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -428,8 +428,7 @@
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
if (!ses->server->session_estab)
- flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
- NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
}
sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -465,10 +464,11 @@
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
- flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+ if (!ses->server->session_estab)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+ }
tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 6b443ff..b7143cf 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,7 +11,7 @@
void coda_destroy_inodecache(void);
int coda_init_inodecache(void);
-int coda_fsync(struct file *coda_file, int datasync);
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync);
void coda_sysctl_init(void);
void coda_sysctl_clean(void);
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 9b0c532..44e17e9 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -39,7 +39,7 @@
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, unsigned int flags);
+int coda_permission(struct inode *inode, int mask);
int coda_revalidate_inode(struct dentry *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2b8dae4..0239433 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -132,11 +132,11 @@
}
-int coda_permission(struct inode *inode, int mask, unsigned int flags)
+int coda_permission(struct inode *inode, int mask)
{
int error;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -449,8 +449,7 @@
struct file *host_file;
struct dentry *de;
struct venus_dirent *vdir;
- unsigned long vdir_size =
- (unsigned long)(&((struct venus_dirent *)0)->d_name);
+ unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
unsigned int type;
struct qstr name;
ino_t ino;
@@ -474,7 +473,7 @@
coda_file->f_pos++;
}
if (coda_file->f_pos == 1) {
- ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR);
+ ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR);
if (ret < 0)
goto out;
result++;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 0433057..8edd404 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -199,7 +199,7 @@
return 0;
}
-int coda_fsync(struct file *coda_file, int datasync)
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
{
struct file *host_file;
struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
@@ -210,6 +210,11 @@
S_ISLNK(coda_inode->i_mode)))
return -EINVAL;
+ err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&coda_inode->i_mutex);
+
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
@@ -217,6 +222,7 @@
err = vfs_fsync(host_file, datasync);
if (!err && !datasync)
err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
+ mutex_unlock(&coda_inode->i_mutex);
return err;
}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index cb140ef..ee0981f 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
#include "coda_linux.h"
/* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
+static int coda_ioctl_permission(struct inode *inode, int mask);
static long coda_pioctl(struct file *filp, unsigned int cmd,
unsigned long user_data);
@@ -41,7 +41,7 @@
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
+static int coda_ioctl_permission(struct inode *inode, int mask)
{
return (mask & MAY_EXEC) ? -EACCES : 0;
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index e141939..739fb59 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -37,7 +37,7 @@
/* These macros may change in future, to provide better st_ino semantics. */
#define OFFSET(x) ((x)->i_ino)
-static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset)
+static unsigned long cramino(const struct cramfs_inode *cino, unsigned int offset)
{
if (!cino->offset)
return offset + 1;
@@ -61,7 +61,7 @@
}
static struct inode *get_cramfs_inode(struct super_block *sb,
- struct cramfs_inode *cramfs_inode, unsigned int offset)
+ const struct cramfs_inode *cramfs_inode, unsigned int offset)
{
struct inode *inode;
static struct timespec zerotime;
@@ -317,7 +317,7 @@
/* Set it all up.. */
sb->s_op = &cramfs_ops;
root = get_cramfs_inode(sb, &super.root, 0);
- if (!root)
+ if (IS_ERR(root))
goto out;
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
@@ -423,6 +423,7 @@
static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
unsigned int offset = 0;
+ struct inode *inode = NULL;
int sorted;
mutex_lock(&read_mutex);
@@ -449,8 +450,8 @@
for (;;) {
if (!namelen) {
- mutex_unlock(&read_mutex);
- return ERR_PTR(-EIO);
+ inode = ERR_PTR(-EIO);
+ goto out;
}
if (name[namelen-1])
break;
@@ -462,17 +463,18 @@
if (retval > 0)
continue;
if (!retval) {
- struct cramfs_inode entry = *de;
- mutex_unlock(&read_mutex);
- d_add(dentry, get_cramfs_inode(dir->i_sb, &entry, dir_off));
- return NULL;
+ inode = get_cramfs_inode(dir->i_sb, de, dir_off);
+ break;
}
/* else (retval < 0) */
if (sorted)
break;
}
+out:
mutex_unlock(&read_mutex);
- d_add(dentry, NULL);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ d_add(dentry, inode);
return NULL;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 37f72ee..be18598 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -344,6 +344,24 @@
EXPORT_SYMBOL(d_drop);
/*
+ * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
+ * @dentry: dentry to drop
+ *
+ * This is called when we do a lookup on a placeholder dentry that needed to be
+ * looked up. The dentry should have been hashed in order for it to be found by
+ * the lookup code, but now needs to be unhashed while we do the actual lookup
+ * and clear the DCACHE_NEED_LOOKUP flag.
+ */
+void d_clear_need_lookup(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_clear_need_lookup);
+
+/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.
* If ref is non-zero, then decrement the refcount too.
@@ -432,8 +450,13 @@
if (d_unhashed(dentry))
goto kill_it;
- /* Otherwise leave it cached and ensure it's on the LRU */
- dentry->d_flags |= DCACHE_REFERENCED;
+ /*
+ * If this dentry needs lookup, don't set the referenced flag so that it
+ * is more likely to be cleaned up by the dcache shrinker in case of
+ * memory pressure.
+ */
+ if (!d_need_lookup(dentry))
+ dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
dentry->d_count--;
@@ -526,10 +549,6 @@
*/
rcu_read_lock();
ret = dentry->d_parent;
- if (!ret) {
- rcu_read_unlock();
- goto out;
- }
spin_lock(&ret->d_lock);
if (unlikely(ret != dentry->d_parent)) {
spin_unlock(&ret->d_lock);
@@ -540,7 +559,6 @@
BUG_ON(!ret->d_count);
ret->d_count++;
spin_unlock(&ret->d_lock);
-out:
return ret;
}
EXPORT_SYMBOL(dget_parent);
@@ -720,13 +738,11 @@
*
* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
*/
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
{
- /* called from prune_dcache() and shrink_dcache_parent() */
struct dentry *dentry;
LIST_HEAD(referenced);
LIST_HEAD(tmp);
- int cnt = *count;
relock:
spin_lock(&dcache_lru_lock);
@@ -754,7 +770,7 @@
} else {
list_move_tail(&dentry->d_lru, &tmp);
spin_unlock(&dentry->d_lock);
- if (!--cnt)
+ if (!--count)
break;
}
cond_resched_lock(&dcache_lru_lock);
@@ -764,83 +780,22 @@
spin_unlock(&dcache_lru_lock);
shrink_dentry_list(&tmp);
-
- *count = cnt;
}
/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try to free
+ * prune_dcache_sb - shrink the dcache
+ * @nr_to_scan: number of entries to try to free
*
- * Shrink the dcache. This is done when we need more memory, or simply when we
- * need to unmount something (at which point we need to unuse all dentries).
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
+ * done when we need more memory an called from the superblock shrinker
+ * function.
*
- * This function may fail to free any resources if all the dentries are in use.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
*/
-static void prune_dcache(int count)
+void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
{
- struct super_block *sb, *p = NULL;
- int w_count;
- int unused = dentry_stat.nr_unused;
- int prune_ratio;
- int pruned;
-
- if (unused == 0 || count == 0)
- return;
- if (count >= unused)
- prune_ratio = 1;
- else
- prune_ratio = unused / count;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (list_empty(&sb->s_instances))
- continue;
- if (sb->s_nr_dentry_unused == 0)
- continue;
- sb->s_count++;
- /* Now, we reclaim unused dentrins with fairness.
- * We reclaim them same percentage from each superblock.
- * We calculate number of dentries to scan on this sb
- * as follows, but the implementation is arranged to avoid
- * overflows:
- * number of dentries to scan on this sb =
- * count * (number of dentries on this sb /
- * number of dentries in the machine)
- */
- spin_unlock(&sb_lock);
- if (prune_ratio != 1)
- w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
- else
- w_count = sb->s_nr_dentry_unused;
- pruned = w_count;
- /*
- * We need to be sure this filesystem isn't being unmounted,
- * otherwise we could race with generic_shutdown_super(), and
- * end up holding a reference to an inode while the filesystem
- * is unmounted. So we try to get s_umount, and make sure
- * s_root isn't NULL.
- */
- if (down_read_trylock(&sb->s_umount)) {
- if ((sb->s_root != NULL) &&
- (!list_empty(&sb->s_dentry_lru))) {
- __shrink_dcache_sb(sb, &w_count,
- DCACHE_REFERENCED);
- pruned -= w_count;
- }
- up_read(&sb->s_umount);
- }
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- count -= pruned;
- p = sb;
- /* more work left to do? */
- if (count <= 0)
- break;
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
+ __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
}
/**
@@ -1215,45 +1170,13 @@
int found;
while ((found = select_parent(parent)) != 0)
- __shrink_dcache_sb(sb, &found, 0);
+ __shrink_dcache_sb(sb, found, 0);
}
EXPORT_SYMBOL(shrink_dcache_parent);
-/*
- * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
- *
- * We need to avoid reentering the filesystem if the caller is performing a
- * GFP_NOFS allocation attempt. One example deadlock is:
- *
- * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
- * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
- * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
- *
- * In this case we return -1 to tell the caller that we baled.
- */
-static int shrink_dcache_memory(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int nr = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (nr) {
- if (!(gfp_mask & __GFP_FS))
- return -1;
- prune_dcache(nr);
- }
-
- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker dcache_shrinker = {
- .shrink = shrink_dcache_memory,
- .seeks = DEFAULT_SEEKS,
-};
-
/**
- * d_alloc - allocate a dcache entry
- * @parent: parent of entry to allocate
+ * __d_alloc - allocate a dcache entry
+ * @sb: filesystem it will belong to
* @name: qstr of the name
*
* Allocates a dentry. It returns %NULL if there is insufficient memory
@@ -1261,7 +1184,7 @@
* copied and the copy passed in may be reused after this call.
*/
-struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
@@ -1291,8 +1214,8 @@
spin_lock_init(&dentry->d_lock);
seqcount_init(&dentry->d_seq);
dentry->d_inode = NULL;
- dentry->d_parent = NULL;
- dentry->d_sb = NULL;
+ dentry->d_parent = dentry;
+ dentry->d_sb = sb;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
INIT_HLIST_BL_NODE(&dentry->d_hash);
@@ -1300,36 +1223,47 @@
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
INIT_LIST_HEAD(&dentry->d_u.d_child);
-
- if (parent) {
- spin_lock(&parent->d_lock);
- /*
- * don't need child lock because it is not subject
- * to concurrency here
- */
- __dget_dlock(parent);
- dentry->d_parent = parent;
- dentry->d_sb = parent->d_sb;
- d_set_d_op(dentry, dentry->d_sb->s_d_op);
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- spin_unlock(&parent->d_lock);
- }
+ d_set_d_op(dentry, dentry->d_sb->s_d_op);
this_cpu_inc(nr_dentry);
return dentry;
}
+
+/**
+ * d_alloc - allocate a dcache entry
+ * @parent: parent of entry to allocate
+ * @name: qstr of the name
+ *
+ * Allocates a dentry. It returns %NULL if there is insufficient memory
+ * available. On a success the dentry is returned. The name passed in is
+ * copied and the copy passed in may be reused after this call.
+ */
+struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+{
+ struct dentry *dentry = __d_alloc(parent->d_sb, name);
+ if (!dentry)
+ return NULL;
+
+ spin_lock(&parent->d_lock);
+ /*
+ * don't need child lock because it is not subject
+ * to concurrency here
+ */
+ __dget_dlock(parent);
+ dentry->d_parent = parent;
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ spin_unlock(&parent->d_lock);
+
+ return dentry;
+}
EXPORT_SYMBOL(d_alloc);
struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
{
- struct dentry *dentry = d_alloc(NULL, name);
- if (dentry) {
- dentry->d_sb = sb;
- d_set_d_op(dentry, dentry->d_sb->s_d_op);
- dentry->d_parent = dentry;
+ struct dentry *dentry = __d_alloc(sb, name);
+ if (dentry)
dentry->d_flags |= DCACHE_DISCONNECTED;
- }
return dentry;
}
EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1499,13 +1433,9 @@
if (root_inode) {
static const struct qstr name = { .name = "/", .len = 1 };
- res = d_alloc(NULL, &name);
- if (res) {
- res->d_sb = root_inode->i_sb;
- d_set_d_op(res, res->d_sb->s_d_op);
- res->d_parent = res;
+ res = __d_alloc(root_inode->i_sb, &name);
+ if (res)
d_instantiate(res, root_inode);
- }
}
return res;
}
@@ -1566,13 +1496,11 @@
if (res)
goto out_iput;
- tmp = d_alloc(NULL, &anonstring);
+ tmp = __d_alloc(inode->i_sb, &anonstring);
if (!tmp) {
res = ERR_PTR(-ENOMEM);
goto out_iput;
}
- tmp->d_parent = tmp; /* make sure dput doesn't croak */
-
spin_lock(&inode->i_lock);
res = __d_find_any_alias(inode);
@@ -1584,8 +1512,6 @@
/* attach a disconnected dentry */
spin_lock(&tmp->d_lock);
- tmp->d_sb = inode->i_sb;
- d_set_d_op(tmp, tmp->d_sb->s_d_op);
tmp->d_inode = inode;
tmp->d_flags |= DCACHE_DISCONNECTED;
list_add(&tmp->d_alias, &inode->i_dentry);
@@ -1626,6 +1552,9 @@
{
struct dentry *new = NULL;
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
if (inode && S_ISDIR(inode->i_mode)) {
spin_lock(&inode->i_lock);
new = __d_find_alias(inode, 1);
@@ -1708,29 +1637,22 @@
}
/*
+ * We are going to instantiate this dentry, unhash it and clear the
+ * lookup flag so we can do that.
+ */
+ if (unlikely(d_need_lookup(found)))
+ d_clear_need_lookup(found);
+
+ /*
* Negative dentry: instantiate it unless the inode is a directory and
* already has a dentry.
*/
- spin_lock(&inode->i_lock);
- if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
- __d_instantiate(found, inode);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(found, inode);
- return found;
+ new = d_splice_alias(inode, found);
+ if (new) {
+ dput(found);
+ found = new;
}
-
- /*
- * In case a directory already has a (disconnected) entry grab a
- * reference to it, move it in place and use it.
- */
- new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- __dget(new);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(found, inode);
- d_move(new, found);
- iput(inode);
- dput(found);
- return new;
+ return found;
err_out:
iput(inode);
@@ -1813,8 +1735,6 @@
tname = dentry->d_name.name;
i = dentry->d_inode;
prefetch(tname);
- if (i)
- prefetch(i);
/*
* This seqcount check is required to ensure name and
* len are loaded atomically, so as not to walk off the
@@ -2213,14 +2133,15 @@
* The hash value has to match the hash queue that the dentry is on..
*/
/*
- * d_move - move a dentry
+ * __d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. Caller hold
+ * rename_lock.
*/
-void d_move(struct dentry * dentry, struct dentry * target)
+static void __d_move(struct dentry * dentry, struct dentry * target)
{
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2228,8 +2149,6 @@
BUG_ON(d_ancestor(dentry, target));
BUG_ON(d_ancestor(target, dentry));
- write_seqlock(&rename_lock);
-
dentry_lock_for_move(dentry, target);
write_seqcount_begin(&dentry->d_seq);
@@ -2275,6 +2194,20 @@
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
+}
+
+/*
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+void d_move(struct dentry *dentry, struct dentry *target)
+{
+ write_seqlock(&rename_lock);
+ __d_move(dentry, target);
write_sequnlock(&rename_lock);
}
EXPORT_SYMBOL(d_move);
@@ -2302,7 +2235,7 @@
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
+ * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
@@ -2317,11 +2250,6 @@
if (alias->d_parent == dentry->d_parent)
goto out_unalias;
- /* Check for loops */
- ret = ERR_PTR(-ELOOP);
- if (d_ancestor(alias, dentry))
- goto out_err;
-
/* See lock_rename() */
ret = ERR_PTR(-EBUSY);
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
@@ -2331,7 +2259,7 @@
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
- d_move(alias, dentry);
+ __d_move(alias, dentry);
ret = alias;
out_err:
spin_unlock(&inode->i_lock);
@@ -2416,15 +2344,24 @@
alias = __d_find_alias(inode, 0);
if (alias) {
actual = alias;
- /* Is this an anonymous mountpoint that we could splice
- * into our tree? */
- if (IS_ROOT(alias)) {
+ write_seqlock(&rename_lock);
+
+ if (d_ancestor(alias, dentry)) {
+ /* Check for loops */
+ actual = ERR_PTR(-ELOOP);
+ } else if (IS_ROOT(alias)) {
+ /* Is this an anonymous mountpoint that we
+ * could splice into our tree? */
__d_materialise_dentry(dentry, alias);
+ write_sequnlock(&rename_lock);
__d_drop(alias);
goto found;
+ } else {
+ /* Nope, but we must(!) avoid directory
+ * aliasing */
+ actual = __d_unalias(inode, dentry, alias);
}
- /* Nope, but we must(!) avoid directory aliasing */
- actual = __d_unalias(inode, dentry, alias);
+ write_sequnlock(&rename_lock);
if (IS_ERR(actual))
dput(alias);
goto out_nolock;
@@ -3030,8 +2967,6 @@
*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-
- register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ac5f164..01d2d9e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -135,6 +135,50 @@
struct page *pages[DIO_PAGES]; /* page buffer */
};
+static void __inode_dio_wait(struct inode *inode)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+ DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+ do {
+ prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&inode->i_dio_count))
+ schedule();
+ } while (atomic_read(&inode->i_dio_count));
+ finish_wait(wq, &q.wait);
+}
+
+/**
+ * inode_dio_wait - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+void inode_dio_wait(struct inode *inode)
+{
+ if (atomic_read(&inode->i_dio_count))
+ __inode_dio_wait(inode);
+}
+EXPORT_SYMBOL_GPL(inode_dio_wait);
+
+/*
+ * inode_dio_done - signal finish of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+void inode_dio_done(struct inode *inode)
+{
+ if (atomic_dec_and_test(&inode->i_dio_count))
+ wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+}
+EXPORT_SYMBOL_GPL(inode_dio_done);
+
/*
* How many pages are in the queue?
*/
@@ -249,14 +293,12 @@
if (dio->end_io && dio->result) {
dio->end_io(dio->iocb, offset, transferred,
dio->map_bh.b_private, ret, is_async);
- } else if (is_async) {
- aio_complete(dio->iocb, ret, 0);
+ } else {
+ if (is_async)
+ aio_complete(dio->iocb, ret, 0);
+ inode_dio_done(dio->inode);
}
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
return ret;
}
@@ -980,9 +1022,6 @@
return ret;
}
-/*
- * Releases both i_mutex and i_alloc_sem
- */
static ssize_t
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
@@ -1146,15 +1185,16 @@
* For writes this function is called under i_mutex and returns with
* i_mutex held, for reads, i_mutex is not held on entry, but it is
* taken and dropped again before returning.
- * For reads and writes i_alloc_sem is taken in shared mode and released
- * on I/O completion (which may happen asynchronously after returning to
- * the caller).
- *
* - if the flags value does NOT contain DIO_LOCKING we don't use any
* internal locking but rather rely on the filesystem to synchronize
* direct I/O reads/writes versus each other and truncate.
- * For reads and writes both i_mutex and i_alloc_sem are not held on
- * entry and are never taken.
+ *
+ * To help with locking against truncate we incremented the i_dio_count
+ * counter before starting direct I/O, and decrement it once we are done.
+ * Truncate can wait for it to reach zero to provide exclusion. It is
+ * expected that filesystem provide exclusion between new direct I/O
+ * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
+ * but other filesystems need to take care of this on their own.
*/
ssize_t
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1200,6 +1240,10 @@
}
}
+ /* watch out for a 0 len io from a tricksy fs */
+ if (rw == READ && end == offset)
+ return 0;
+
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
retval = -ENOMEM;
if (!dio)
@@ -1213,8 +1257,7 @@
dio->flags = flags;
if (dio->flags & DIO_LOCKING) {
- /* watch out for a 0 len io from a tricksy fs */
- if (rw == READ && end > offset) {
+ if (rw == READ) {
struct address_space *mapping =
iocb->ki_filp->f_mapping;
@@ -1229,15 +1272,14 @@
goto out;
}
}
-
- /*
- * Will be released at I/O completion, possibly in a
- * different thread.
- */
- down_read_non_owner(&inode->i_alloc_sem);
}
/*
+ * Will be decremented at I/O completion time.
+ */
+ atomic_inc(&inode->i_dio_count);
+
+ /*
* For file extending writes updating i_size before data
* writeouts complete can expose uninitialized blocks. So
* even for AIO, we need to wait for i/o to complete before
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index abc49f2..90e5997 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -14,17 +14,9 @@
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
-#include "ast.h"
-#define WAKE_ASTS 0
-
-static uint64_t ast_seq_count;
-static struct list_head ast_queue;
-static spinlock_t ast_queue_lock;
-static struct task_struct * astd_task;
-static unsigned long astd_wakeflags;
-static struct mutex astd_running;
-
+static uint64_t dlm_cb_seq;
+static spinlock_t dlm_cb_seq_spin;
static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
{
@@ -57,21 +49,13 @@
}
}
-void dlm_del_ast(struct dlm_lkb *lkb)
-{
- spin_lock(&ast_queue_lock);
- if (!list_empty(&lkb->lkb_astqueue))
- list_del_init(&lkb->lkb_astqueue);
- spin_unlock(&ast_queue_lock);
-}
-
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t prev_seq;
int prev_mode;
- int i;
+ int i, rv;
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (lkb->lkb_callbacks[i].seq)
@@ -100,7 +84,8 @@
mode,
(unsigned long long)prev_seq,
prev_mode);
- return 0;
+ rv = 0;
+ goto out;
}
}
@@ -109,6 +94,7 @@
lkb->lkb_callbacks[i].mode = mode;
lkb->lkb_callbacks[i].sb_status = status;
lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
+ rv = 0;
break;
}
@@ -117,21 +103,24 @@
lkb->lkb_id, (unsigned long long)seq,
flags, mode, status, sbflags);
dlm_dump_lkb_callbacks(lkb);
- return -1;
+ rv = -1;
+ goto out;
}
-
- return 0;
+ out:
+ return rv;
}
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid)
{
- int i;
+ int i, rv;
*resid = 0;
- if (!lkb->lkb_callbacks[0].seq)
- return -ENOENT;
+ if (!lkb->lkb_callbacks[0].seq) {
+ rv = -ENOENT;
+ goto out;
+ }
/* oldest undelivered cb is callbacks[0] */
@@ -163,7 +152,8 @@
cb->mode,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.mode);
- return 0;
+ rv = 0;
+ goto out;
}
}
@@ -176,171 +166,150 @@
memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_bast_time = ktime_get();
}
-
- return 0;
+ rv = 0;
+ out:
+ return rv;
}
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
- uint32_t sbflags)
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+ uint32_t sbflags)
{
- uint64_t seq;
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+ uint64_t new_seq, prev_seq;
int rv;
- spin_lock(&ast_queue_lock);
-
- seq = ++ast_seq_count;
+ spin_lock(&dlm_cb_seq_spin);
+ new_seq = ++dlm_cb_seq;
+ spin_unlock(&dlm_cb_seq_spin);
if (lkb->lkb_flags & DLM_IFL_USER) {
- spin_unlock(&ast_queue_lock);
- dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
+ dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
return;
}
- rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
- if (rv < 0) {
- spin_unlock(&ast_queue_lock);
- return;
- }
+ mutex_lock(&lkb->lkb_cb_mutex);
+ prev_seq = lkb->lkb_callbacks[0].seq;
- if (list_empty(&lkb->lkb_astqueue)) {
+ rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
+ if (rv < 0)
+ goto out;
+
+ if (!prev_seq) {
kref_get(&lkb->lkb_ref);
- list_add_tail(&lkb->lkb_astqueue, &ast_queue);
- }
- spin_unlock(&ast_queue_lock);
- set_bit(WAKE_ASTS, &astd_wakeflags);
- wake_up_process(astd_task);
+ if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
+ mutex_lock(&ls->ls_cb_mutex);
+ list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
+ mutex_unlock(&ls->ls_cb_mutex);
+ } else {
+ queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+ }
+ }
+ out:
+ mutex_unlock(&lkb->lkb_cb_mutex);
}
-static void process_asts(void)
+void dlm_callback_work(struct work_struct *work)
{
- struct dlm_ls *ls = NULL;
- struct dlm_rsb *r = NULL;
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
void (*castfn) (void *astparam);
void (*bastfn) (void *astparam, int mode);
struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
int i, rv, resid;
-repeat:
- spin_lock(&ast_queue_lock);
- list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
- r = lkb->lkb_resource;
- ls = r->res_ls;
+ memset(&callbacks, 0, sizeof(callbacks));
- if (dlm_locking_stopped(ls))
- continue;
-
- /* we remove from astqueue list and remove everything in
- lkb_callbacks before releasing the spinlock so empty
- lkb_astqueue is always consistent with empty lkb_callbacks */
-
- list_del_init(&lkb->lkb_astqueue);
-
- castfn = lkb->lkb_astfn;
- bastfn = lkb->lkb_bastfn;
-
- memset(&callbacks, 0, sizeof(callbacks));
-
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
- if (rv < 0)
- break;
- }
- spin_unlock(&ast_queue_lock);
-
- if (resid) {
- /* shouldn't happen, for loop should have removed all */
- log_error(ls, "callback resid %d lkb %x",
- resid, lkb->lkb_id);
- }
-
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- if (!callbacks[i].seq)
- break;
- if (callbacks[i].flags & DLM_CB_SKIP) {
- continue;
- } else if (callbacks[i].flags & DLM_CB_BAST) {
- bastfn(lkb->lkb_astparam, callbacks[i].mode);
- } else if (callbacks[i].flags & DLM_CB_CAST) {
- lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
- lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
- castfn(lkb->lkb_astparam);
- }
- }
-
- /* removes ref for ast_queue, may cause lkb to be freed */
- dlm_put_lkb(lkb);
-
- cond_resched();
- goto repeat;
+ mutex_lock(&lkb->lkb_cb_mutex);
+ if (!lkb->lkb_callbacks[0].seq) {
+ /* no callback work exists, shouldn't happen */
+ log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
+ dlm_print_lkb(lkb);
+ dlm_dump_lkb_callbacks(lkb);
}
- spin_unlock(&ast_queue_lock);
+
+ for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+ rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
+ if (rv < 0)
+ break;
+ }
+
+ if (resid) {
+ /* cbs remain, loop should have removed all, shouldn't happen */
+ log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
+ resid);
+ dlm_print_lkb(lkb);
+ dlm_dump_lkb_callbacks(lkb);
+ }
+ mutex_unlock(&lkb->lkb_cb_mutex);
+
+ castfn = lkb->lkb_astfn;
+ bastfn = lkb->lkb_bastfn;
+
+ for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+ if (!callbacks[i].seq)
+ break;
+ if (callbacks[i].flags & DLM_CB_SKIP) {
+ continue;
+ } else if (callbacks[i].flags & DLM_CB_BAST) {
+ bastfn(lkb->lkb_astparam, callbacks[i].mode);
+ } else if (callbacks[i].flags & DLM_CB_CAST) {
+ lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
+ lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+ castfn(lkb->lkb_astparam);
+ }
+ }
+
+ /* undo kref_get from dlm_add_callback, may cause lkb to be freed */
+ dlm_put_lkb(lkb);
}
-static inline int no_asts(void)
+int dlm_callback_start(struct dlm_ls *ls)
{
- int ret;
-
- spin_lock(&ast_queue_lock);
- ret = list_empty(&ast_queue);
- spin_unlock(&ast_queue_lock);
- return ret;
-}
-
-static int dlm_astd(void *data)
-{
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!test_bit(WAKE_ASTS, &astd_wakeflags))
- schedule();
- set_current_state(TASK_RUNNING);
-
- mutex_lock(&astd_running);
- if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
- process_asts();
- mutex_unlock(&astd_running);
+ ls->ls_callback_wq = alloc_workqueue("dlm_callback",
+ WQ_UNBOUND |
+ WQ_MEM_RECLAIM |
+ WQ_NON_REENTRANT,
+ 0);
+ if (!ls->ls_callback_wq) {
+ log_print("can't start dlm_callback workqueue");
+ return -ENOMEM;
}
return 0;
}
-void dlm_astd_wake(void)
+void dlm_callback_stop(struct dlm_ls *ls)
{
- if (!no_asts()) {
- set_bit(WAKE_ASTS, &astd_wakeflags);
- wake_up_process(astd_task);
+ if (ls->ls_callback_wq)
+ destroy_workqueue(ls->ls_callback_wq);
+}
+
+void dlm_callback_suspend(struct dlm_ls *ls)
+{
+ set_bit(LSFL_CB_DELAY, &ls->ls_flags);
+
+ if (ls->ls_callback_wq)
+ flush_workqueue(ls->ls_callback_wq);
+}
+
+void dlm_callback_resume(struct dlm_ls *ls)
+{
+ struct dlm_lkb *lkb, *safe;
+ int count = 0;
+
+ clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+
+ if (!ls->ls_callback_wq)
+ return;
+
+ mutex_lock(&ls->ls_cb_mutex);
+ list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
+ list_del_init(&lkb->lkb_cb_list);
+ queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+ count++;
}
-}
+ mutex_unlock(&ls->ls_cb_mutex);
-int dlm_astd_start(void)
-{
- struct task_struct *p;
- int error = 0;
-
- INIT_LIST_HEAD(&ast_queue);
- spin_lock_init(&ast_queue_lock);
- mutex_init(&astd_running);
-
- p = kthread_run(dlm_astd, NULL, "dlm_astd");
- if (IS_ERR(p))
- error = PTR_ERR(p);
- else
- astd_task = p;
- return error;
-}
-
-void dlm_astd_stop(void)
-{
- kthread_stop(astd_task);
-}
-
-void dlm_astd_suspend(void)
-{
- mutex_lock(&astd_running);
-}
-
-void dlm_astd_resume(void)
-{
- mutex_unlock(&astd_running);
+ log_debug(ls, "dlm_callback_resume %d", count);
}
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 8aa89c9..757b551 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -18,14 +18,15 @@
int status, uint32_t sbflags, uint64_t seq);
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid);
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
- uint32_t sbflags);
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+ uint32_t sbflags);
-void dlm_astd_wake(void);
-int dlm_astd_start(void);
-void dlm_astd_stop(void);
-void dlm_astd_suspend(void);
-void dlm_astd_resume(void);
+void dlm_callback_work(struct work_struct *work);
+int dlm_callback_start(struct dlm_ls *ls);
+void dlm_callback_stop(struct dlm_ls *ls);
+void dlm_callback_suspend(struct dlm_ls *ls);
+void dlm_callback_resume(struct dlm_ls *ls);
#endif
+
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9b026ea..6cf72fc 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -28,7 +28,8 @@
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
* /config/dlm/<cluster>/comms/<comm>/nodeid
* /config/dlm/<cluster>/comms/<comm>/local
- * /config/dlm/<cluster>/comms/<comm>/addr
+ * /config/dlm/<cluster>/comms/<comm>/addr (write only)
+ * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
* The <cluster> level is useless, but I haven't figured out how to avoid it.
*/
@@ -80,6 +81,7 @@
size_t len);
static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
size_t len);
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf);
static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
size_t len);
@@ -92,7 +94,6 @@
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
unsigned int cl_rsbtbl_size;
- unsigned int cl_lkbtbl_size;
unsigned int cl_dirtbl_size;
unsigned int cl_recover_timer;
unsigned int cl_toss_secs;
@@ -101,13 +102,13 @@
unsigned int cl_protocol;
unsigned int cl_timewarn_cs;
unsigned int cl_waitwarn_us;
+ unsigned int cl_new_rsb_count;
};
enum {
CLUSTER_ATTR_TCP_PORT = 0,
CLUSTER_ATTR_BUFFER_SIZE,
CLUSTER_ATTR_RSBTBL_SIZE,
- CLUSTER_ATTR_LKBTBL_SIZE,
CLUSTER_ATTR_DIRTBL_SIZE,
CLUSTER_ATTR_RECOVER_TIMER,
CLUSTER_ATTR_TOSS_SECS,
@@ -116,6 +117,7 @@
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_TIMEWARN_CS,
CLUSTER_ATTR_WAITWARN_US,
+ CLUSTER_ATTR_NEW_RSB_COUNT,
};
struct cluster_attribute {
@@ -160,7 +162,6 @@
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
-CLUSTER_ATTR(lkbtbl_size, 1);
CLUSTER_ATTR(dirtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
@@ -169,12 +170,12 @@
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(timewarn_cs, 1);
CLUSTER_ATTR(waitwarn_us, 0);
+CLUSTER_ATTR(new_rsb_count, 0);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
- [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
[CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
@@ -183,6 +184,7 @@
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
+ [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
NULL,
};
@@ -190,6 +192,7 @@
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
COMM_ATTR_ADDR,
+ COMM_ATTR_ADDR_LIST,
};
struct comm_attribute {
@@ -217,14 +220,22 @@
static struct comm_attribute comm_attr_addr = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "addr",
- .ca_mode = S_IRUGO | S_IWUSR },
+ .ca_mode = S_IWUSR },
.store = comm_addr_write,
};
+static struct comm_attribute comm_attr_addr_list = {
+ .attr = { .ca_owner = THIS_MODULE,
+ .ca_name = "addr_list",
+ .ca_mode = S_IRUGO },
+ .show = comm_addr_list_read,
+};
+
static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
[COMM_ATTR_LOCAL] = &comm_attr_local.attr,
[COMM_ATTR_ADDR] = &comm_attr_addr.attr,
+ [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr,
NULL,
};
@@ -435,7 +446,6 @@
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
- cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
cl->cl_recover_timer = dlm_config.ci_recover_timer;
cl->cl_toss_secs = dlm_config.ci_toss_secs;
@@ -444,6 +454,7 @@
cl->cl_protocol = dlm_config.ci_protocol;
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+ cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
@@ -720,6 +731,50 @@
return len;
}
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf)
+{
+ ssize_t s;
+ ssize_t allowance;
+ int i;
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr_in;
+ struct sockaddr_in6 *addr_in6;
+
+ /* Taken from ip6_addr_string() defined in lib/vsprintf.c */
+ char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")];
+
+
+ /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */
+ allowance = 4096;
+ buf[0] = '\0';
+
+ for (i = 0; i < cm->addr_count; i++) {
+ addr = cm->addr[i];
+
+ switch(addr->ss_family) {
+ case AF_INET:
+ addr_in = (struct sockaddr_in *)addr;
+ s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ addr_in6 = (struct sockaddr_in6 *)addr;
+ s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr);
+ break;
+ default:
+ s = sprintf(buf0, "%s\n", "<UNKNOWN>");
+ break;
+ }
+ allowance -= s;
+ if (allowance >= 0)
+ strcat(buf, buf0);
+ else {
+ allowance += s;
+ break;
+ }
+ }
+ return 4096 - allowance;
+}
+
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
@@ -983,7 +1038,6 @@
#define DEFAULT_TCP_PORT 21064
#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 1024
-#define DEFAULT_LKBTBL_SIZE 1024
#define DEFAULT_DIRTBL_SIZE 1024
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
@@ -992,12 +1046,12 @@
#define DEFAULT_PROTOCOL 0
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
#define DEFAULT_WAITWARN_US 0
+#define DEFAULT_NEW_RSB_COUNT 128
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
.ci_buffer_size = DEFAULT_BUFFER_SIZE,
.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
.ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
.ci_recover_timer = DEFAULT_RECOVER_TIMER,
.ci_toss_secs = DEFAULT_TOSS_SECS,
@@ -1005,6 +1059,7 @@
.ci_log_debug = DEFAULT_LOG_DEBUG,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
- .ci_waitwarn_us = DEFAULT_WAITWARN_US
+ .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+ .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dd0ce24..3099d0d 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -20,7 +20,6 @@
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
- int ci_lkbtbl_size;
int ci_dirtbl_size;
int ci_recover_timer;
int ci_toss_secs;
@@ -29,6 +28,7 @@
int ci_protocol;
int ci_timewarn_cs;
int ci_waitwarn_us;
+ int ci_new_rsb_count;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 0262451..fe2860c 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,6 +37,7 @@
#include <linux/jhash.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
#include <asm/uaccess.h>
#include <linux/dlm.h>
@@ -52,7 +53,6 @@
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
-struct dlm_lkbtable;
struct dlm_rsbtable;
struct dlm_dirtable;
struct dlm_direntry;
@@ -108,11 +108,6 @@
spinlock_t lock;
};
-struct dlm_lkbtable {
- struct list_head list;
- rwlock_t lock;
- uint16_t counter;
-};
/*
* Lockspace member (per node in a ls)
@@ -248,17 +243,18 @@
int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */
- struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
struct list_head lkb_statequeue; /* rsb g/c/w list */
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
- struct list_head lkb_astqueue; /* need ast to be sent */
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
ktime_t lkb_wait_time;
unsigned long lkb_timeout_cs;
+ struct mutex lkb_cb_mutex;
+ struct work_struct lkb_cb_work;
+ struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */
struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
struct dlm_callback lkb_last_cast;
struct dlm_callback lkb_last_bast;
@@ -299,7 +295,7 @@
int res_recover_locks_count;
char *res_lvbptr;
- char res_name[1];
+ char res_name[DLM_RESNAME_MAXLEN+1];
};
/* find_rsb() flags */
@@ -465,12 +461,12 @@
unsigned long ls_scan_time;
struct kobject ls_kobj;
+ struct idr ls_lkbidr;
+ spinlock_t ls_lkbidr_spin;
+
struct dlm_rsbtable *ls_rsbtbl;
uint32_t ls_rsbtbl_size;
- struct dlm_lkbtable *ls_lkbtbl;
- uint32_t ls_lkbtbl_size;
-
struct dlm_dirtable *ls_dirtbl;
uint32_t ls_dirtbl_size;
@@ -483,6 +479,10 @@
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
+ spinlock_t ls_new_rsb_spin;
+ int ls_new_rsb_count;
+ struct list_head ls_new_rsb; /* new rsb structs */
+
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
@@ -506,8 +506,12 @@
struct miscdevice ls_device;
+ struct workqueue_struct *ls_callback_wq;
+
/* recovery related */
+ struct mutex ls_cb_mutex;
+ struct list_head ls_cb_delay; /* save for queue_work later */
struct timer_list ls_timer;
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
@@ -544,6 +548,7 @@
#define LSFL_RCOM_WAIT 4
#define LSFL_UEVENT_WAIT 5
#define LSFL_TIMEWARN 6
+#define LSFL_CB_DELAY 7
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f71d0b5..83b5e32 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -305,7 +305,7 @@
rv = -EDEADLK;
}
- dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
+ dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
}
static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,7 +319,7 @@
if (is_master_copy(lkb)) {
send_bast(r, lkb, rqmode);
} else {
- dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0);
+ dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
}
}
@@ -327,19 +327,68 @@
* Basic operations on rsb's and lkb's
*/
-static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
+static int pre_rsb_struct(struct dlm_ls *ls)
+{
+ struct dlm_rsb *r1, *r2;
+ int count = 0;
+
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
+ spin_unlock(&ls->ls_new_rsb_spin);
+ return 0;
+ }
+ spin_unlock(&ls->ls_new_rsb_spin);
+
+ r1 = dlm_allocate_rsb(ls);
+ r2 = dlm_allocate_rsb(ls);
+
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (r1) {
+ list_add(&r1->res_hashchain, &ls->ls_new_rsb);
+ ls->ls_new_rsb_count++;
+ }
+ if (r2) {
+ list_add(&r2->res_hashchain, &ls->ls_new_rsb);
+ ls->ls_new_rsb_count++;
+ }
+ count = ls->ls_new_rsb_count;
+ spin_unlock(&ls->ls_new_rsb_spin);
+
+ if (!count)
+ return -ENOMEM;
+ return 0;
+}
+
+/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
+ unlock any spinlocks, go back and call pre_rsb_struct again.
+ Otherwise, take an rsb off the list and return it. */
+
+static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
+ struct dlm_rsb **r_ret)
{
struct dlm_rsb *r;
+ int count;
- r = dlm_allocate_rsb(ls, len);
- if (!r)
- return NULL;
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (list_empty(&ls->ls_new_rsb)) {
+ count = ls->ls_new_rsb_count;
+ spin_unlock(&ls->ls_new_rsb_spin);
+ log_debug(ls, "find_rsb retry %d %d %s",
+ count, dlm_config.ci_new_rsb_count, name);
+ return -EAGAIN;
+ }
+
+ r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
+ list_del(&r->res_hashchain);
+ ls->ls_new_rsb_count--;
+ spin_unlock(&ls->ls_new_rsb_spin);
r->res_ls = ls;
r->res_length = len;
memcpy(r->res_name, name, len);
mutex_init(&r->res_mutex);
+ INIT_LIST_HEAD(&r->res_hashchain);
INIT_LIST_HEAD(&r->res_lookup);
INIT_LIST_HEAD(&r->res_grantqueue);
INIT_LIST_HEAD(&r->res_convertqueue);
@@ -347,7 +396,8 @@
INIT_LIST_HEAD(&r->res_root_list);
INIT_LIST_HEAD(&r->res_recover_list);
- return r;
+ *r_ret = r;
+ return 0;
}
static int search_rsb_list(struct list_head *head, char *name, int len,
@@ -405,16 +455,6 @@
return error;
}
-static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
- unsigned int flags, struct dlm_rsb **r_ret)
-{
- int error;
- spin_lock(&ls->ls_rsbtbl[b].lock);
- error = _search_rsb(ls, name, len, b, flags, r_ret);
- spin_unlock(&ls->ls_rsbtbl[b].lock);
- return error;
-}
-
/*
* Find rsb in rsbtbl and potentially create/add one
*
@@ -432,35 +472,48 @@
static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret)
{
- struct dlm_rsb *r = NULL, *tmp;
+ struct dlm_rsb *r = NULL;
uint32_t hash, bucket;
- int error = -EINVAL;
+ int error;
- if (namelen > DLM_RESNAME_MAXLEN)
+ if (namelen > DLM_RESNAME_MAXLEN) {
+ error = -EINVAL;
goto out;
+ }
if (dlm_no_directory(ls))
flags |= R_CREATE;
- error = 0;
hash = jhash(name, namelen, 0);
bucket = hash & (ls->ls_rsbtbl_size - 1);
- error = search_rsb(ls, name, namelen, bucket, flags, &r);
+ retry:
+ if (flags & R_CREATE) {
+ error = pre_rsb_struct(ls);
+ if (error < 0)
+ goto out;
+ }
+
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
+
+ error = _search_rsb(ls, name, namelen, bucket, flags, &r);
if (!error)
- goto out;
+ goto out_unlock;
if (error == -EBADR && !(flags & R_CREATE))
- goto out;
+ goto out_unlock;
/* the rsb was found but wasn't a master copy */
if (error == -ENOTBLK)
- goto out;
+ goto out_unlock;
- error = -ENOMEM;
- r = create_rsb(ls, name, namelen);
- if (!r)
- goto out;
+ error = get_rsb_struct(ls, name, namelen, &r);
+ if (error == -EAGAIN) {
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+ goto retry;
+ }
+ if (error)
+ goto out_unlock;
r->res_hash = hash;
r->res_bucket = bucket;
@@ -474,18 +527,10 @@
nodeid = 0;
r->res_nodeid = nodeid;
}
-
- spin_lock(&ls->ls_rsbtbl[bucket].lock);
- error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
- if (!error) {
- spin_unlock(&ls->ls_rsbtbl[bucket].lock);
- dlm_free_rsb(r);
- r = tmp;
- goto out;
- }
list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
- spin_unlock(&ls->ls_rsbtbl[bucket].lock);
error = 0;
+ out_unlock:
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
out:
*r_ret = r;
return error;
@@ -580,9 +625,8 @@
static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
{
- struct dlm_lkb *lkb, *tmp;
- uint32_t lkid = 0;
- uint16_t bucket;
+ struct dlm_lkb *lkb;
+ int rv, id;
lkb = dlm_allocate_lkb(ls);
if (!lkb)
@@ -594,60 +638,42 @@
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
INIT_LIST_HEAD(&lkb->lkb_time_list);
- INIT_LIST_HEAD(&lkb->lkb_astqueue);
+ INIT_LIST_HEAD(&lkb->lkb_cb_list);
+ mutex_init(&lkb->lkb_cb_mutex);
+ INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
- get_random_bytes(&bucket, sizeof(bucket));
- bucket &= (ls->ls_lkbtbl_size - 1);
+ retry:
+ rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
+ if (!rv)
+ return -ENOMEM;
- write_lock(&ls->ls_lkbtbl[bucket].lock);
+ spin_lock(&ls->ls_lkbidr_spin);
+ rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
+ if (!rv)
+ lkb->lkb_id = id;
+ spin_unlock(&ls->ls_lkbidr_spin);
- /* counter can roll over so we must verify lkid is not in use */
+ if (rv == -EAGAIN)
+ goto retry;
- while (lkid == 0) {
- lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++;
-
- list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
- lkb_idtbl_list) {
- if (tmp->lkb_id != lkid)
- continue;
- lkid = 0;
- break;
- }
+ if (rv < 0) {
+ log_error(ls, "create_lkb idr error %d", rv);
+ return rv;
}
- lkb->lkb_id = lkid;
- list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
-
*lkb_ret = lkb;
return 0;
}
-static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
-{
- struct dlm_lkb *lkb;
- uint16_t bucket = (lkid >> 16);
-
- list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
- if (lkb->lkb_id == lkid)
- return lkb;
- }
- return NULL;
-}
-
static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
{
struct dlm_lkb *lkb;
- uint16_t bucket = (lkid >> 16);
- if (bucket >= ls->ls_lkbtbl_size)
- return -EBADSLT;
-
- read_lock(&ls->ls_lkbtbl[bucket].lock);
- lkb = __find_lkb(ls, lkid);
+ spin_lock(&ls->ls_lkbidr_spin);
+ lkb = idr_find(&ls->ls_lkbidr, lkid);
if (lkb)
kref_get(&lkb->lkb_ref);
- read_unlock(&ls->ls_lkbtbl[bucket].lock);
+ spin_unlock(&ls->ls_lkbidr_spin);
*lkb_ret = lkb;
return lkb ? 0 : -ENOENT;
@@ -668,12 +694,12 @@
static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
- uint16_t bucket = (lkb->lkb_id >> 16);
+ uint32_t lkid = lkb->lkb_id;
- write_lock(&ls->ls_lkbtbl[bucket].lock);
+ spin_lock(&ls->ls_lkbidr_spin);
if (kref_put(&lkb->lkb_ref, kill_lkb)) {
- list_del(&lkb->lkb_idtbl_list);
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
+ idr_remove(&ls->ls_lkbidr, lkid);
+ spin_unlock(&ls->ls_lkbidr_spin);
detach_lkb(lkb);
@@ -683,7 +709,7 @@
dlm_free_lkb(lkb);
return 1;
} else {
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
+ spin_unlock(&ls->ls_lkbidr_spin);
return 0;
}
}
@@ -849,9 +875,7 @@
if (!num_nodes) {
num_nodes = ls->ls_num_nodes;
- warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
- if (warned)
- memset(warned, 0, num_nodes * sizeof(int));
+ warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL);
}
if (!warned)
continue;
@@ -863,9 +887,7 @@
dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
}
mutex_unlock(&ls->ls_waiters_mutex);
-
- if (warned)
- kfree(warned);
+ kfree(warned);
if (debug_expired)
log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
@@ -2401,9 +2423,6 @@
if (deadlk) {
/* it's left on the granted queue */
- log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
- lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
- lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
revert_lock(r, lkb);
queue_cast(r, lkb, -EDEADLK);
error = -EDEADLK;
@@ -3993,8 +4012,6 @@
default:
log_error(ls, "unknown message type %d", ms->m_type);
}
-
- dlm_astd_wake();
}
/* If the lockspace is in recovery mode (locking stopped), then normal
@@ -4133,7 +4150,7 @@
struct dlm_message *ms_stub;
int wait_type, stub_unlock_result, stub_cancel_result;
- ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
+ ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL);
if (!ms_stub) {
log_error(ls, "dlm_recover_waiters_pre no mem");
return;
@@ -4809,7 +4826,7 @@
goto out_put;
spin_lock(&ua->proc->locks_spin);
- /* dlm_user_add_ast() may have already taken lkb off the proc list */
+ /* dlm_user_add_cb() may have already taken lkb off the proc list */
if (!list_empty(&lkb->lkb_ownqueue))
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
@@ -4946,7 +4963,7 @@
/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
(which does lock_rsb) due to deadlock with receiving a message that does
- lock_rsb followed by dlm_user_add_ast() */
+ lock_rsb followed by dlm_user_add_cb() */
static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
struct dlm_user_proc *proc)
@@ -4969,7 +4986,7 @@
return lkb;
}
-/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
+/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
which we clear here. */
@@ -5011,10 +5028,10 @@
dlm_put_lkb(lkb);
}
- list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
memset(&lkb->lkb_callbacks, 0,
sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
dlm_put_lkb(lkb);
}
@@ -5053,10 +5070,10 @@
spin_unlock(&proc->locks_spin);
spin_lock(&proc->asts_spin);
- list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
memset(&lkb->lkb_callbacks, 0,
sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
dlm_put_lkb(lkb);
}
spin_unlock(&proc->asts_spin);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 14cbf40..a1d8f1a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -15,7 +15,6 @@
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
-#include "ast.h"
#include "dir.h"
#include "lowcomms.h"
#include "config.h"
@@ -24,6 +23,7 @@
#include "recover.h"
#include "requestqueue.h"
#include "user.h"
+#include "ast.h"
static int ls_count;
static struct mutex ls_lock;
@@ -359,17 +359,10 @@
{
int error;
- /* Thread which process lock requests for all lockspace's */
- error = dlm_astd_start();
- if (error) {
- log_print("cannot start dlm_astd thread %d", error);
- goto fail;
- }
-
error = dlm_scand_start();
if (error) {
log_print("cannot start dlm_scand thread %d", error);
- goto astd_fail;
+ goto fail;
}
/* Thread for sending/receiving messages for all lockspace's */
@@ -383,8 +376,6 @@
scand_fail:
dlm_scand_stop();
- astd_fail:
- dlm_astd_stop();
fail:
return error;
}
@@ -393,7 +384,6 @@
{
dlm_scand_stop();
dlm_lowcomms_stop();
- dlm_astd_stop();
}
static int new_lockspace(const char *name, int namelen, void **lockspace,
@@ -463,7 +453,7 @@
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
- ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
+ ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
if (!ls->ls_rsbtbl)
goto out_lsfree;
for (i = 0; i < size; i++) {
@@ -472,22 +462,13 @@
spin_lock_init(&ls->ls_rsbtbl[i].lock);
}
- size = dlm_config.ci_lkbtbl_size;
- ls->ls_lkbtbl_size = size;
-
- ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
- if (!ls->ls_lkbtbl)
- goto out_rsbfree;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
- rwlock_init(&ls->ls_lkbtbl[i].lock);
- ls->ls_lkbtbl[i].counter = 1;
- }
+ idr_init(&ls->ls_lkbidr);
+ spin_lock_init(&ls->ls_lkbidr_spin);
size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
- ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
+ ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
if (!ls->ls_dirtbl)
goto out_lkbfree;
for (i = 0; i < size; i++) {
@@ -502,6 +483,9 @@
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
+ INIT_LIST_HEAD(&ls->ls_new_rsb);
+ spin_lock_init(&ls->ls_new_rsb_spin);
+
INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone);
ls->ls_num_nodes = 0;
@@ -520,6 +504,9 @@
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
+ mutex_init(&ls->ls_cb_mutex);
+ INIT_LIST_HEAD(&ls->ls_cb_delay);
+
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
spin_lock_init(&ls->ls_recover_lock);
@@ -553,18 +540,26 @@
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
+ if (flags & DLM_LSFL_FS) {
+ error = dlm_callback_start(ls);
+ if (error) {
+ log_error(ls, "can't start dlm_callback %d", error);
+ goto out_delist;
+ }
+ }
+
/* needs to find ls in lslist */
error = dlm_recoverd_start(ls);
if (error) {
log_error(ls, "can't start dlm_recoverd %d", error);
- goto out_delist;
+ goto out_callback;
}
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
if (error)
- goto out_stop;
+ goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
/* let kobject handle freeing of ls if there's an error */
@@ -578,7 +573,7 @@
error = do_uevent(ls, 1);
if (error)
- goto out_stop;
+ goto out_recoverd;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
@@ -595,19 +590,20 @@
do_uevent(ls, 0);
dlm_clear_members(ls);
kfree(ls->ls_node_array);
- out_stop:
+ out_recoverd:
dlm_recoverd_stop(ls);
+ out_callback:
+ dlm_callback_stop(ls);
out_delist:
spin_lock(&lslist_lock);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
kfree(ls->ls_recover_buf);
out_dirfree:
- kfree(ls->ls_dirtbl);
+ vfree(ls->ls_dirtbl);
out_lkbfree:
- kfree(ls->ls_lkbtbl);
- out_rsbfree:
- kfree(ls->ls_rsbtbl);
+ idr_destroy(&ls->ls_lkbidr);
+ vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
kobject_put(&ls->ls_kobj);
@@ -641,50 +637,64 @@
return error;
}
-/* Return 1 if the lockspace still has active remote locks,
- * 2 if the lockspace still has active local locks.
- */
-static int lockspace_busy(struct dlm_ls *ls)
+static int lkb_idr_is_local(int id, void *p, void *data)
{
- int i, lkb_found = 0;
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = p;
- /* NOTE: We check the lockidtbl here rather than the resource table.
- This is because there may be LKBs queued as ASTs that have been
- unlinked from their RSBs and are pending deletion once the AST has
- been delivered */
+ if (!lkb->lkb_nodeid)
+ return 1;
+ return 0;
+}
- for (i = 0; i < ls->ls_lkbtbl_size; i++) {
- read_lock(&ls->ls_lkbtbl[i].lock);
- if (!list_empty(&ls->ls_lkbtbl[i].list)) {
- lkb_found = 1;
- list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
- lkb_idtbl_list) {
- if (!lkb->lkb_nodeid) {
- read_unlock(&ls->ls_lkbtbl[i].lock);
- return 2;
- }
- }
- }
- read_unlock(&ls->ls_lkbtbl[i].lock);
+static int lkb_idr_is_any(int id, void *p, void *data)
+{
+ return 1;
+}
+
+static int lkb_idr_free(int id, void *p, void *data)
+{
+ struct dlm_lkb *lkb = p;
+
+ if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
+ dlm_free_lvb(lkb->lkb_lvbptr);
+
+ dlm_free_lkb(lkb);
+ return 0;
+}
+
+/* NOTE: We check the lkbidr here rather than the resource table.
+ This is because there may be LKBs queued as ASTs that have been unlinked
+ from their RSBs and are pending deletion once the AST has been delivered */
+
+static int lockspace_busy(struct dlm_ls *ls, int force)
+{
+ int rv;
+
+ spin_lock(&ls->ls_lkbidr_spin);
+ if (force == 0) {
+ rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
+ } else if (force == 1) {
+ rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
+ } else {
+ rv = 0;
}
- return lkb_found;
+ spin_unlock(&ls->ls_lkbidr_spin);
+ return rv;
}
static int release_lockspace(struct dlm_ls *ls, int force)
{
- struct dlm_lkb *lkb;
struct dlm_rsb *rsb;
struct list_head *head;
int i, busy, rv;
- busy = lockspace_busy(ls);
+ busy = lockspace_busy(ls, force);
spin_lock(&lslist_lock);
if (ls->ls_create_count == 1) {
- if (busy > force)
+ if (busy) {
rv = -EBUSY;
- else {
+ } else {
/* remove_lockspace takes ls off lslist */
ls->ls_create_count = 0;
rv = 0;
@@ -708,12 +718,12 @@
dlm_recoverd_stop(ls);
+ dlm_callback_stop(ls);
+
remove_lockspace(ls);
dlm_delete_debug_file(ls);
- dlm_astd_suspend();
-
kfree(ls->ls_recover_buf);
/*
@@ -721,31 +731,15 @@
*/
dlm_dir_clear(ls);
- kfree(ls->ls_dirtbl);
+ vfree(ls->ls_dirtbl);
/*
- * Free all lkb's on lkbtbl[] lists.
+ * Free all lkb's in idr
*/
- for (i = 0; i < ls->ls_lkbtbl_size; i++) {
- head = &ls->ls_lkbtbl[i].list;
- while (!list_empty(head)) {
- lkb = list_entry(head->next, struct dlm_lkb,
- lkb_idtbl_list);
-
- list_del(&lkb->lkb_idtbl_list);
-
- dlm_del_ast(lkb);
-
- if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
- dlm_free_lvb(lkb->lkb_lvbptr);
-
- dlm_free_lkb(lkb);
- }
- }
- dlm_astd_resume();
-
- kfree(ls->ls_lkbtbl);
+ idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
+ idr_remove_all(&ls->ls_lkbidr);
+ idr_destroy(&ls->ls_lkbidr);
/*
* Free all rsb's on rsbtbl[] lists
@@ -770,7 +764,14 @@
}
}
- kfree(ls->ls_rsbtbl);
+ vfree(ls->ls_rsbtbl);
+
+ while (!list_empty(&ls->ls_new_rsb)) {
+ rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
+ res_hashchain);
+ list_del(&rsb->res_hashchain);
+ dlm_free_rsb(rsb);
+ }
/*
* Free structures on any other lists
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5e2c71f..990626e 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -512,12 +512,10 @@
}
make_sockaddr(&prim.ssp_addr, 0, &addr_len);
if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
- int i;
unsigned char *b=(unsigned char *)&prim.ssp_addr;
log_print("reject connect from unknown addr");
- for (i=0; i<sizeof(struct sockaddr_storage);i++)
- printk("%02x ", b[i]);
- printk("\n");
+ print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
+ b, sizeof(struct sockaddr_storage));
sctp_send_shutdown(prim.ssp_assoc_id);
return;
}
@@ -748,7 +746,10 @@
/* Get the new node's NODEID */
make_sockaddr(&peeraddr, 0, &len);
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+ unsigned char *b=(unsigned char *)&peeraddr;
log_print("connect from non cluster node");
+ print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
+ b, sizeof(struct sockaddr_storage));
sock_release(newsock);
mutex_unlock(&con->sock_mutex);
return -1;
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 8e0d00d..da64df7 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -16,6 +16,7 @@
#include "memory.h"
static struct kmem_cache *lkb_cache;
+static struct kmem_cache *rsb_cache;
int __init dlm_memory_init(void)
@@ -26,6 +27,14 @@
__alignof__(struct dlm_lkb), 0, NULL);
if (!lkb_cache)
ret = -ENOMEM;
+
+ rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
+ __alignof__(struct dlm_rsb), 0, NULL);
+ if (!rsb_cache) {
+ kmem_cache_destroy(lkb_cache);
+ ret = -ENOMEM;
+ }
+
return ret;
}
@@ -33,6 +42,8 @@
{
if (lkb_cache)
kmem_cache_destroy(lkb_cache);
+ if (rsb_cache)
+ kmem_cache_destroy(rsb_cache);
}
char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -48,16 +59,11 @@
kfree(p);
}
-/* FIXME: have some minimal space built-in to rsb for the name and
- kmalloc a separate name if needed, like dentries are done */
-
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
{
struct dlm_rsb *r;
- DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
-
- r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
+ r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
return r;
}
@@ -65,7 +71,7 @@
{
if (r->res_lvbptr)
dlm_free_lvb(r->res_lvbptr);
- kfree(r);
+ kmem_cache_free(rsb_cache, r);
}
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 485fb29..177c11c 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -16,7 +16,7 @@
int dlm_memory_init(void);
void dlm_memory_exit(void);
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
void dlm_free_rsb(struct dlm_rsb *r);
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
void dlm_free_lkb(struct dlm_lkb *l);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index fd677c8..774da3c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -58,13 +58,7 @@
mutex_lock(&ls->ls_recoverd_active);
- /*
- * Suspending and resuming dlm_astd ensures that no lkb's from this ls
- * will be processed by dlm_astd during recovery.
- */
-
- dlm_astd_suspend();
- dlm_astd_resume();
+ dlm_callback_suspend(ls);
/*
* Free non-master tossed rsb's. Master rsb's are kept on toss
@@ -202,6 +196,8 @@
dlm_adjust_timeouts(ls);
+ dlm_callback_resume(ls);
+
error = enable_locking(ls, rv->seq);
if (error) {
log_debug(ls, "enable_locking failed %d", error);
@@ -222,8 +218,6 @@
dlm_grant_after_purge(ls);
- dlm_astd_wake();
-
log_debug(ls, "recover %llx done: %u ms",
(unsigned long long)rv->seq,
jiffies_to_msecs(jiffies - start));
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e96bf3e..d8ea607 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -213,9 +213,9 @@
goto out;
}
- if (list_empty(&lkb->lkb_astqueue)) {
+ if (list_empty(&lkb->lkb_cb_list)) {
kref_get(&lkb->lkb_ref);
- list_add_tail(&lkb->lkb_astqueue, &proc->asts);
+ list_add_tail(&lkb->lkb_cb_list, &proc->asts);
wake_up_interruptible(&proc->wait);
}
spin_unlock(&proc->asts_spin);
@@ -832,24 +832,24 @@
}
/* if we empty lkb_callbacks, we don't want to unlock the spinlock
- without removing lkb_astqueue; so empty lkb_astqueue is always
+ without removing lkb_cb_list; so empty lkb_cb_list is always
consistent with empty lkb_callbacks */
- lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
+ lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
if (rv < 0) {
/* this shouldn't happen; lkb should have been removed from
list when resid was zero */
log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
spin_unlock(&proc->asts_spin);
/* removes ref for proc->asts, may cause lkb to be freed */
dlm_put_lkb(lkb);
goto try_another;
}
if (!resid)
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
spin_unlock(&proc->asts_spin);
if (cb.flags & DLM_CB_SKIP) {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 4ec9eb0..c6ac98c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -270,14 +270,15 @@
}
static int
-ecryptfs_fsync(struct file *file, int datasync)
+ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int rc = 0;
- rc = generic_file_fsync(file, datasync);
+ rc = generic_file_fsync(file, start, end, datasync);
if (rc)
goto out;
- rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
+ rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
+ datasync);
out:
return rc;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7349ade..340c657 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -147,7 +147,6 @@
* @lower_dir_inode: inode of the parent in the lower fs of the new file
* @dentry: New file's dentry
* @mode: The mode of the new file
- * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
*
* Creates the file in the lower file system.
*
@@ -155,31 +154,10 @@
*/
static int
ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
- struct dentry *dentry, int mode,
- struct nameidata *nd)
+ struct dentry *dentry, int mode)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
- struct dentry *dentry_save;
- struct vfsmount *vfsmount_save;
- unsigned int flags_save;
- int rc;
-
- if (nd) {
- dentry_save = nd->path.dentry;
- vfsmount_save = nd->path.mnt;
- flags_save = nd->flags;
- nd->path.dentry = lower_dentry;
- nd->path.mnt = lower_mnt;
- nd->flags &= ~LOOKUP_OPEN;
- }
- rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
- if (nd) {
- nd->path.dentry = dentry_save;
- nd->path.mnt = vfsmount_save;
- nd->flags = flags_save;
- }
- return rc;
+ return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
}
/**
@@ -197,8 +175,7 @@
*/
static int
ecryptfs_do_create(struct inode *directory_inode,
- struct dentry *ecryptfs_dentry, int mode,
- struct nameidata *nd)
+ struct dentry *ecryptfs_dentry, int mode)
{
int rc;
struct dentry *lower_dentry;
@@ -213,7 +190,7 @@
goto out;
}
rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
- ecryptfs_dentry, mode, nd);
+ ecryptfs_dentry, mode);
if (rc) {
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
"rc = [%d]\n", __func__, rc);
@@ -294,7 +271,7 @@
int rc;
/* ecryptfs_do_create() calls ecryptfs_interpose() */
- rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
+ rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
if (unlikely(rc)) {
ecryptfs_printk(KERN_WARNING, "Failed to create file in"
"lower filesystem\n");
@@ -942,10 +919,8 @@
}
static int
-ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
+ecryptfs_permission(struct inode *inode, int mask)
{
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
return inode_permission(ecryptfs_inode_to_lower(inode), mask);
}
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 1511bf9..832b10d 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -60,14 +60,11 @@
struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
efs_ino_t inodenum;
- struct inode * inode = NULL;
+ struct inode *inode = NULL;
inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
- if (inodenum) {
+ if (inodenum)
inode = efs_iget(dir->i_sb, inodenum);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
return d_splice_alias(inode, dentry);
}
diff --git a/fs/exec.c b/fs/exec.c
index 6075a1e..842d570 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -963,9 +963,18 @@
leader->group_leader = tsk;
tsk->exit_signal = SIGCHLD;
+ leader->exit_signal = -1;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
+
+ /*
+ * We are going to release_task()->ptrace_unlink() silently,
+ * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
+ * the tracer wont't block again waiting for this thread.
+ */
+ if (unlikely(leader->ptrace))
+ __wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
release_task(leader);
@@ -1105,6 +1114,13 @@
}
EXPORT_SYMBOL(flush_old_exec);
+void would_dump(struct linux_binprm *bprm, struct file *file)
+{
+ if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+}
+EXPORT_SYMBOL(would_dump);
+
void setup_new_exec(struct linux_binprm * bprm)
{
int i, ch;
@@ -1144,9 +1160,10 @@
if (bprm->cred->uid != current_euid() ||
bprm->cred->gid != current_egid()) {
current->pdeath_signal = 0;
- } else if (file_permission(bprm->file, MAY_READ) ||
- bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
- set_dumpable(current->mm, suid_dumpable);
+ } else {
+ would_dump(bprm, bprm->file);
+ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+ set_dumpable(current->mm, suid_dumpable);
}
/*
@@ -1225,7 +1242,12 @@
unsigned n_fs;
int res = 0;
- bprm->unsafe = tracehook_unsafe_exec(p);
+ if (p->ptrace) {
+ if (p->ptrace & PT_PTRACE_CAP)
+ bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+ else
+ bprm->unsafe |= LSM_UNSAFE_PTRACE;
+ }
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1353,6 +1375,7 @@
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
+ pid_t old_pid;
retval = security_bprm_check(bprm);
if (retval)
@@ -1362,6 +1385,11 @@
if (retval)
return retval;
+ /* Need to fetch pid before load_binary changes it */
+ rcu_read_lock();
+ old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
+ rcu_read_unlock();
+
retval = -ENOENT;
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
@@ -1381,7 +1409,8 @@
bprm->recursion_depth = depth;
if (retval >= 0) {
if (depth == 0)
- tracehook_report_exec(fmt, bprm, regs);
+ ptrace_event(PTRACE_EVENT_EXEC,
+ old_pid);
put_binfmt(fmt);
allow_write_access(bprm->file);
if (bprm->file)
@@ -1769,7 +1798,7 @@
t = start;
do {
- task_clear_group_stop_pending(t);
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 45ca323..491c6c0 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -42,11 +42,19 @@
* Note, in exofs all metadata is written as part of inode, regardless.
* The writeout is synchronous
*/
-static int exofs_file_fsync(struct file *filp, int datasync)
+static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
+ struct inode *inode = filp->f_mapping->host;
int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4d70db1..b54c437 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -55,12 +55,7 @@
return ERR_PTR(-ENAMETOOLONG);
ino = exofs_inode_by_name(dir, dentry);
- inode = NULL;
- if (ino) {
- inode = exofs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
+ inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
return d_splice_alias(inode, dentry);
}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 06065bd..c57bedd 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -913,7 +913,7 @@
unsigned long ino = exofs_parent_ino(child);
if (!ino)
- return NULL;
+ return ERR_PTR(-ESTALE);
return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index abea5a1..bfe651f 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,11 +232,11 @@
}
int
-ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
return -EAGAIN;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index c939b7b..3ff6cbb 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@
#ifdef CONFIG_EXT2_FS_POSIX_ACL
/* acl.c */
-extern int ext2_check_acl (struct inode *, int, unsigned int);
+extern int ext2_check_acl (struct inode *, int);
extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 645be9e..af9fc89 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -150,7 +150,8 @@
extern const struct file_operations ext2_dir_operations;
/* file.c */
-extern int ext2_fsync(struct file *file, int datasync);
+extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
extern const struct inode_operations ext2_file_inode_operations;
extern const struct file_operations ext2_file_operations;
extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 49eec94..82e0632 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -40,13 +40,13 @@
return 0;
}
-int ext2_fsync(struct file *file, int datasync)
+int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
- ret = generic_file_fsync(file, datasync);
+ ret = generic_file_fsync(file, start, end, datasync);
if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 788e09a..a8a58f6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -843,8 +843,8 @@
struct inode *inode = mapping->host;
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
- iov, offset, nr_segs, ext2_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ ext2_get_block);
if (ret < 0 && (rw & WRITE))
ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
return ret;
@@ -1184,6 +1184,8 @@
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
+ inode_dio_wait(inode);
+
if (mapping_is_xip(inode->i_mapping))
error = xip_truncate_page(inode->i_mapping, newsize);
else if (test_opt(inode->i_sb, NOBH))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ed5c5d4..d60b709 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,15 +67,11 @@
inode = NULL;
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ESTALE) {
- ext2_error(dir->i_sb, __func__,
- "deleted inode referenced: %lu",
- (unsigned long) ino);
- return ERR_PTR(-EIO);
- } else {
- return ERR_CAST(inode);
- }
+ if (inode == ERR_PTR(-ESTALE)) {
+ ext2_error(dir->i_sb, __func__,
+ "deleted inode referenced: %lu",
+ (unsigned long) ino);
+ return ERR_PTR(-EIO);
}
}
return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0..edfeb29 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,11 +240,11 @@
}
int
-ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext3_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
return -EAGAIN;
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf804..5973346 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_check_acl (struct inode *, int, unsigned int);
+extern int ext3_check_acl (struct inode *, int);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb..0bcf63a 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -43,7 +43,7 @@
* inode to disk.
*/
-int ext3_sync_file(struct file *file, int datasync)
+int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
@@ -54,6 +54,17 @@
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ /*
+ * Taking the mutex here just to keep consistent with how fsync was
+ * called previously, however it looks like we don't need to take
+ * i_mutex at all.
+ */
+ mutex_lock(&inode->i_mutex);
+
J_ASSERT(ext3_journal_current_handle() == NULL);
/*
@@ -70,8 +81,10 @@
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext3_should_journal_data(inode))
+ if (ext3_should_journal_data(inode)) {
+ mutex_unlock(&inode->i_mutex);
return ext3_force_commit(inode->i_sb);
+ }
if (datasync)
commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +104,6 @@
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23..2978a2a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1816,9 +1816,8 @@
}
retry:
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext3_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ ext3_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
@@ -3216,6 +3215,9 @@
ext3_journal_stop(handle);
}
+ if (attr->ia_valid & ATTR_SIZE)
+ inode_dio_wait(inode);
+
if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
handle_t *handle;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9b..c095cf5 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1038,15 +1038,11 @@
return ERR_PTR(-EIO);
}
inode = ext3_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ESTALE) {
- ext3_error(dir->i_sb, __func__,
- "deleted inode referenced: %lu",
- ino);
- return ERR_PTR(-EIO);
- } else {
- return ERR_CAST(inode);
- }
+ if (inode == ERR_PTR(-ESTALE)) {
+ ext3_error(dir->i_sb, __func__,
+ "deleted inode referenced: %lu",
+ ino);
+ return ERR_PTR(-EIO);
}
}
return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index aad153e..b57ea2f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1718,6 +1718,8 @@
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ /* enable barriers by default */
+ set_opt(sbi->s_mount_opt, BARRIER);
set_opt(sbi->s_mount_opt, RESERVATION);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 21eacd7..60d900f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,11 +238,11 @@
}
int
-ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext4_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
return -EAGAIN;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index dec8211..9d843d5 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@
#ifdef CONFIG_EXT4_FS_POSIX_ACL
/* acl.c */
-extern int ext4_check_acl(struct inode *, int, unsigned int);
+extern int ext4_check_acl(struct inode *, int);
extern int ext4_acl_chmod(struct inode *);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1921392..fa44df8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1758,7 +1758,7 @@
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
-extern int ext4_sync_file(struct file *, int);
+extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
extern int ext4_flush_completed_IO(struct inode *);
/* hash.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c09723..ce766f9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -236,6 +236,27 @@
}
offset += file->f_pos;
break;
+ case SEEK_DATA:
+ /*
+ * In the generic case the entire file is data, so as long as
+ * offset isn't at the end of the file then the offset is data.
+ */
+ if (offset >= inode->i_size) {
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+ }
+ break;
+ case SEEK_HOLE:
+ /*
+ * There is a virtual hole at the end of the file, so as long as
+ * offset isn't i_size or larger, return i_size.
+ */
+ if (offset >= inode->i_size) {
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+ }
+ offset = inode->i_size;
+ break;
}
if (offset < 0 || offset > maxbytes) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index ce66d2f..da3bed3 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -151,6 +151,32 @@
return ret;
}
+/**
+ * __sync_file - generic_file_fsync without the locking and filemap_write
+ * @inode: inode to sync
+ * @datasync: only sync essential metadata if true
+ *
+ * This is just generic_file_fsync without the locking. This is needed for
+ * nojournal mode to make sure this inodes data/metadata makes it to disk
+ * properly. The i_mutex should be held already.
+ */
+static int __sync_inode(struct inode *inode, int datasync)
+{
+ int err;
+ int ret;
+
+ ret = sync_mapping_buffers(inode->i_mapping);
+ if (!(inode->i_state & I_DIRTY))
+ return ret;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return ret;
+
+ err = sync_inode_metadata(inode, 1);
+ if (ret == 0)
+ ret = err;
+ return ret;
+}
+
/*
* akpm: A new design for ext4_sync_file().
*
@@ -165,7 +191,7 @@
* i_mutex lock is held when entering and exiting this function
*/
-int ext4_sync_file(struct file *file, int datasync)
+int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -178,15 +204,20 @@
trace_ext4_sync_file_enter(file, datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
if (inode->i_sb->s_flags & MS_RDONLY)
- return 0;
+ goto out;
ret = ext4_flush_completed_IO(inode);
if (ret < 0)
goto out;
if (!journal) {
- ret = generic_file_fsync(file, datasync);
+ ret = __sync_inode(inode, datasync);
if (!ret && !list_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
@@ -220,6 +251,7 @@
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
out:
+ mutex_unlock(&inode->i_mutex);
trace_ext4_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c0..678cde8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3501,10 +3501,8 @@
offset, nr_segs,
ext4_get_block, NULL, NULL, 0);
else {
- ret = blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext4_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov,
+ offset, nr_segs, ext4_get_block);
if (unlikely((rw & WRITE) && ret < 0)) {
loff_t isize = i_size_read(inode);
@@ -3575,6 +3573,7 @@
ssize_t size, void *private, int ret,
bool is_async)
{
+ struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
unsigned long flags;
@@ -3596,6 +3595,7 @@
out:
if (is_async)
aio_complete(iocb, ret, 0);
+ inode_dio_done(inode);
return;
}
@@ -3616,6 +3616,9 @@
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
iocb->private = NULL;
+
+ /* XXX: probably should move into the real I/O completion handler */
+ inode_dio_done(inode);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3748,11 +3751,13 @@
EXT4_I(inode)->cur_aio_dio = iocb->private;
}
- ret = blockdev_direct_IO(rw, iocb, inode,
+ ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block_write,
- ext4_end_io_dio);
+ ext4_end_io_dio,
+ NULL,
+ DIO_LOCKING | DIO_SKIP_HOLES);
if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL;
/*
@@ -5351,6 +5356,8 @@
}
if (attr->ia_valid & ATTR_SIZE) {
+ inode_dio_wait(inode);
+
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5843,80 +5850,84 @@
struct page *page = vmf->page;
loff_t size;
unsigned long len;
- int ret = -EINVAL;
- void *fsdata;
+ int ret;
struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
+ handle_t *handle;
+ get_block_t *get_block;
+ int retries = 0;
/*
- * Get i_alloc_sem to stop truncates messing with the inode. We cannot
- * get i_mutex because we are already holding mmap_sem.
+ * This check is racy but catches the common case. We rely on
+ * __block_page_mkwrite() to do a reliable check.
*/
- down_read(&inode->i_alloc_sem);
- size = i_size_read(inode);
- if (page->mapping != mapping || size <= page_offset(page)
- || !PageUptodate(page)) {
- /* page got truncated from under us? */
- goto out_unlock;
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ /* Delalloc case is easy... */
+ if (test_opt(inode->i_sb, DELALLOC) &&
+ !ext4_should_journal_data(inode) &&
+ !ext4_nonda_switch(inode->i_sb)) {
+ do {
+ ret = __block_page_mkwrite(vma, vmf,
+ ext4_da_get_block_prep);
+ } while (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries));
+ goto out_ret;
}
- ret = 0;
lock_page(page);
- wait_on_page_writeback(page);
- if (PageMappedToDisk(page)) {
- up_read(&inode->i_alloc_sem);
- return VM_FAULT_LOCKED;
+ size = i_size_read(inode);
+ /* Page got truncated from under us? */
+ if (page->mapping != mapping || page_offset(page) > size) {
+ unlock_page(page);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
}
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
else
len = PAGE_CACHE_SIZE;
-
/*
- * return if we have all the buffers mapped. This avoid
- * the need to call write_begin/write_end which does a
- * journal_start/journal_stop which can block and take
- * long time
+ * Return if we have all the buffers mapped. This avoids the need to do
+ * journal_start/journal_stop which can block and take a long time
*/
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
ext4_bh_unmapped)) {
- up_read(&inode->i_alloc_sem);
- return VM_FAULT_LOCKED;
+ /* Wait so that we don't change page under IO */
+ wait_on_page_writeback(page);
+ ret = VM_FAULT_LOCKED;
+ goto out;
}
}
unlock_page(page);
- /*
- * OK, we need to fill the hole... Do write_begin write_end
- * to do block allocation/reservation.We are not holding
- * inode.i__mutex here. That allow * parallel write_begin,
- * write_end call. lock_page prevent this from happening
- * on the same page though
- */
- ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
- len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
- if (ret < 0)
- goto out_unlock;
- ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
- len, len, page, fsdata);
- if (ret < 0)
- goto out_unlock;
- ret = 0;
-
- /*
- * write_begin/end might have created a dirty page and someone
- * could wander in and start the IO. Make sure that hasn't
- * happened.
- */
- lock_page(page);
- wait_on_page_writeback(page);
- up_read(&inode->i_alloc_sem);
- return VM_FAULT_LOCKED;
-out_unlock:
- if (ret)
+ /* OK, we need to fill the hole... */
+ if (ext4_should_dioread_nolock(inode))
+ get_block = ext4_get_block_write;
+ else
+ get_block = ext4_get_block;
+retry_alloc:
+ handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+ if (IS_ERR(handle)) {
ret = VM_FAULT_SIGBUS;
- up_read(&inode->i_alloc_sem);
+ goto out;
+ }
+ ret = __block_page_mkwrite(vma, vmf, get_block);
+ if (!ret && ext4_should_journal_data(inode)) {
+ if (walk_page_buffers(handle, page_buffers(page), 0,
+ PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+ unlock_page(page);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+ }
+ ext4_journal_stop(handle);
+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry_alloc;
+out_ret:
+ ret = block_page_mkwrite_return(ret);
+out:
return ret;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b77..707d605 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1037,15 +1037,11 @@
return ERR_PTR(-EIO);
}
inode = ext4_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ESTALE) {
- EXT4_ERROR_INODE(dir,
- "deleted inode referenced: %u",
- ino);
- return ERR_PTR(-EIO);
- } else {
- return ERR_CAST(inode);
- }
+ if (inode == ERR_PTR(-ESTALE)) {
+ EXT4_ERROR_INODE(dir,
+ "deleted inode referenced: %u",
+ ino);
+ return ERR_PTR(-EIO);
}
}
return d_splice_alias(inode, dentry);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8276cc2..a5d3853 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -109,6 +109,7 @@
int i_attrs; /* unused attribute bits */
loff_t i_pos; /* on-disk position of directory entry or 0 */
struct hlist_node i_fat_hash; /* hash by i_location */
+ struct rw_semaphore truncate_lock; /* protect bmap against truncate */
struct inode vfs_inode;
};
@@ -309,7 +310,8 @@
extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-extern int fat_file_fsync(struct file *file, int datasync);
+extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
/* fat/inode.c */
extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7018e1d..c118acf 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -149,12 +149,12 @@
return 0;
}
-int fat_file_fsync(struct file *filp, int datasync)
+int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
int res, err;
- res = generic_file_fsync(filp, datasync);
+ res = generic_file_fsync(filp, start, end, datasync);
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
return res ? res : err;
@@ -397,6 +397,8 @@
* sequence.
*/
if (attr->ia_valid & ATTR_SIZE) {
+ inode_dio_wait(inode);
+
if (attr->ia_size > inode->i_size) {
error = fat_cont_expand(inode, attr->ia_size);
if (error || attr->ia_valid == ATTR_SIZE)
@@ -429,8 +431,10 @@
}
if (attr->ia_valid & ATTR_SIZE) {
+ down_write(&MSDOS_I(inode)->truncate_lock);
truncate_setsize(inode, attr->ia_size);
fat_truncate_blocks(inode, attr->ia_size);
+ up_write(&MSDOS_I(inode)->truncate_lock);
}
setattr_copy(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cb8d839..5942fec 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -211,8 +211,8 @@
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
- iov, offset, nr_segs, fat_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ fat_get_block);
if (ret < 0 && (rw & WRITE))
fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
@@ -224,9 +224,9 @@
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
- down_read(&mapping->host->i_alloc_sem);
+ down_read(&MSDOS_I(mapping->host)->truncate_lock);
blocknr = generic_block_bmap(mapping, block, fat_get_block);
- up_read(&mapping->host->i_alloc_sem);
+ up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
}
@@ -510,6 +510,8 @@
ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
+
+ init_rwsem(&ei->truncate_lock);
return &ei->vfs_inode;
}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3b222da..66e83b8 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -209,29 +209,20 @@
int err;
lock_super(sb);
-
err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
- if (err) {
- if (err == -ENOENT) {
- inode = NULL;
- goto out;
- }
- goto error;
+ switch (err) {
+ case -ENOENT:
+ inode = NULL;
+ break;
+ case 0:
+ inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
+ brelse(sinfo.bh);
+ break;
+ default:
+ inode = ERR_PTR(err);
}
-
- inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
- brelse(sinfo.bh);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
-out:
unlock_super(sb);
return d_splice_alias(inode, dentry);
-
-error:
- unlock_super(sb);
- return ERR_PTR(err);
}
/***** Creates a directory entry (name is already formatted). */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 20b4ea5..bb3f29c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -82,10 +82,8 @@
* case sensitive name which is specified by user if this is
* for creation.
*/
- if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
- if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
- return 0;
- }
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
return vfat_revalidate_shortname(dentry);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0f015a0..b8c507c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -461,32 +461,6 @@
}
/*
- * For background writeback the caller does not have the sb pinned
- * before calling writeback. So make sure that we do pin it, so it doesn't
- * go away while we are writing inodes from it.
- */
-static bool pin_sb_for_writeback(struct super_block *sb)
-{
- spin_lock(&sb_lock);
- if (list_empty(&sb->s_instances)) {
- spin_unlock(&sb_lock);
- return false;
- }
-
- sb->s_count++;
- spin_unlock(&sb_lock);
-
- if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root)
- return true;
- up_read(&sb->s_umount);
- }
-
- put_super(sb);
- return false;
-}
-
-/*
* Write a portion of b_io inodes which belong to @sb.
*
* If @only_this_sb is true, then find and write all such
@@ -585,7 +559,7 @@
struct inode *inode = wb_inode(wb->b_io.prev);
struct super_block *sb = inode->i_sb;
- if (!pin_sb_for_writeback(sb)) {
+ if (!grab_super_passive(sb)) {
requeue_io(inode);
continue;
}
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index a2a5d19..3f7a59b 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -954,3 +954,43 @@
pagevec_reinit(pagevec);
}
EXPORT_SYMBOL(fscache_mark_pages_cached);
+
+/*
+ * Uncache all the pages in an inode that are marked PG_fscache, assuming them
+ * to be associated with the given cookie.
+ */
+void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+ struct inode *inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct pagevec pvec;
+ pgoff_t next;
+ int i;
+
+ _enter("%p,%p", cookie, inode);
+
+ if (!mapping || mapping->nrpages == 0) {
+ _leave(" [no pages]");
+ return;
+ }
+
+ pagevec_init(&pvec, 0);
+ next = 0;
+ do {
+ if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
+ break;
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ next = page->index;
+ if (PageFsCache(page)) {
+ __fscache_wait_on_page_write(cookie, page);
+ __fscache_uncache_page(cookie, page);
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ } while (++next);
+
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_uncache_all_inode_pages);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d5016071..9f63e49 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -382,7 +382,7 @@
struct fuse_entry_out outentry;
struct fuse_file *ff;
struct file *file;
- int flags = nd->intent.open.flags - 1;
+ int flags = nd->intent.open.flags;
if (fc->no_create)
return -ENOSYS;
@@ -576,7 +576,7 @@
static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
struct nameidata *nd)
{
- if (nd && (nd->flags & LOOKUP_OPEN)) {
+ if (nd) {
int err = fuse_create_open(dir, entry, mode, nd);
if (err != -ENOSYS)
return err;
@@ -971,9 +971,9 @@
return err;
}
-static int fuse_perm_getattr(struct inode *inode, int flags)
+static int fuse_perm_getattr(struct inode *inode, int mask)
{
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
return fuse_do_getattr(inode, NULL, NULL);
@@ -992,7 +992,7 @@
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
+static int fuse_permission(struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
bool refreshed = false;
@@ -1011,23 +1011,22 @@
if (fi->i_time < get_jiffies_64()) {
refreshed = true;
- err = fuse_perm_getattr(inode, flags);
+ err = fuse_perm_getattr(inode, mask);
if (err)
return err;
}
}
if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
- err = generic_permission(inode, mask, flags, NULL);
+ err = generic_permission(inode, mask);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
node will at first have no permissions */
if (err == -EACCES && !refreshed) {
- err = fuse_perm_getattr(inode, flags);
+ err = fuse_perm_getattr(inode, mask);
if (!err)
- err = generic_permission(inode, mask,
- flags, NULL);
+ err = generic_permission(inode, mask);
}
/* Note: the opposite of the above test does not
@@ -1035,7 +1034,7 @@
noticed immediately, only after the attribute
timeout has expired */
} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
err = fuse_access(inode, mask);
@@ -1044,7 +1043,7 @@
if (refreshed)
return -EACCES;
- err = fuse_perm_getattr(inode, flags);
+ err = fuse_perm_getattr(inode, mask);
if (!err && !(inode->i_mode & S_IXUGO))
return -EACCES;
}
@@ -1177,9 +1176,10 @@
return 0;
}
-static int fuse_dir_fsync(struct file *file, int datasync)
+static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
- return fuse_fsync_common(file, datasync, 1);
+ return fuse_fsync_common(file, start, end, datasync, 1);
}
static bool update_mtime(unsigned ivalid)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82a6646..7bb685c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -400,7 +400,8 @@
fuse_release_nowrite(inode);
}
-int fuse_fsync_common(struct file *file, int datasync, int isdir)
+int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int isdir)
{
struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -412,9 +413,15 @@
if (is_bad_inode(inode))
return -EIO;
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
return 0;
+ mutex_lock(&inode->i_mutex);
+
/*
* Start writeback against all dirty pages of the inode, then
* wait for all outstanding writes, before sending the FSYNC
@@ -422,13 +429,15 @@
*/
err = write_inode_now(inode, 0);
if (err)
- return err;
+ goto out;
fuse_sync_writes(inode);
req = fuse_get_req(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -448,12 +457,15 @@
fc->no_fsync = 1;
err = 0;
}
+out:
+ mutex_unlock(&inode->i_mutex);
return err;
}
-static int fuse_fsync(struct file *file, int datasync)
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
- return fuse_fsync_common(file, datasync, 0);
+ return fuse_fsync_common(file, start, end, datasync, 0);
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -1600,15 +1612,32 @@
struct inode *inode = file->f_path.dentry->d_inode;
mutex_lock(&inode->i_mutex);
- switch (origin) {
- case SEEK_END:
+ if (origin != SEEK_CUR || origin != SEEK_SET) {
retval = fuse_update_attributes(inode, NULL, file, NULL);
if (retval)
goto exit;
+ }
+
+ switch (origin) {
+ case SEEK_END:
offset += i_size_read(inode);
break;
case SEEK_CUR:
offset += file->f_pos;
+ break;
+ case SEEK_DATA:
+ if (offset >= i_size_read(inode)) {
+ retval = -ENXIO;
+ goto exit;
+ }
+ break;
+ case SEEK_HOLE:
+ if (offset >= i_size_read(inode)) {
+ retval = -ENXIO;
+ goto exit;
+ }
+ offset = i_size_read(inode);
+ break;
}
retval = -EINVAL;
if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b788bec..c6aa2d4 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -589,7 +589,8 @@
/**
* Send FSYNC or FSYNCDIR request
*/
-int fuse_fsync_common(struct file *file, int datasync, int isdir);
+int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int isdir);
/**
* Notify poll wakeup
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 8f26d1a..70e90b4 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,9 +190,9 @@
}
int
-generic_check_acl(struct inode *inode, int mask, unsigned int flags)
+generic_check_acl(struct inode *inode, int mask)
{
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
} else {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index cbc0715..8ef1079 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,12 +75,12 @@
* Returns: errno
*/
-int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
+int gfs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
int error;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
return -EAGAIN;
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index a93907c..b522b0c 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES 25
-extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
+extern int gfs2_check_acl(struct inode *inode, int mask);
extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 802ac5e..f9fbbe9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1069,6 +1069,7 @@
return 0;
gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
do {
if (atomic_read(&bh->b_count))
@@ -1080,6 +1081,7 @@
goto not_possible;
bh = bh->b_this_page;
} while(bh != head);
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
head = bh = page_buffers(page);
@@ -1112,6 +1114,7 @@
WARN_ON(buffer_dirty(bh));
WARN_ON(buffer_pinned(bh));
cannot_release:
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
return 0;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e65493a..7878c47 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -854,11 +854,7 @@
blen++;
else {
if (bstart) {
- if (metadata)
- __gfs2_free_meta(ip, bstart, blen);
- else
- __gfs2_free_data(ip, bstart, blen);
-
+ __gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
@@ -870,11 +866,7 @@
gfs2_add_inode_blocks(&ip->i_inode, -1);
}
if (bstart) {
- if (metadata)
- __gfs2_free_meta(ip, bstart, blen);
- else
- __gfs2_free_data(ip, bstart, blen);
-
+ __gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
@@ -1224,6 +1216,8 @@
if (ret)
return ret;
+ inode_dio_wait(inode);
+
oldsize = inode->i_size;
if (newsize >= oldsize)
return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 091ee47..1cc2f8e 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -339,6 +339,67 @@
return (copied) ? copied : error;
}
+/**
+ * gfs2_dir_get_hash_table - Get pointer to the dir hash table
+ * @ip: The inode in question
+ *
+ * Returns: The hash table or an error
+ */
+
+static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ int ret;
+ u32 hsize;
+ __be64 *hc;
+
+ BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
+
+ hc = ip->i_hash_cache;
+ if (hc)
+ return hc;
+
+ hsize = 1 << ip->i_depth;
+ hsize *= sizeof(__be64);
+ if (hsize != i_size_read(&ip->i_inode)) {
+ gfs2_consist_inode(ip);
+ return ERR_PTR(-EIO);
+ }
+
+ hc = kmalloc(hsize, GFP_NOFS);
+ ret = -ENOMEM;
+ if (hc == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
+ if (ret < 0) {
+ kfree(hc);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (ip->i_hash_cache)
+ kfree(hc);
+ else
+ ip->i_hash_cache = hc;
+ spin_unlock(&inode->i_lock);
+
+ return ip->i_hash_cache;
+}
+
+/**
+ * gfs2_dir_hash_inval - Invalidate dir hash
+ * @ip: The directory inode
+ *
+ * Must be called with an exclusive glock, or during glock invalidation.
+ */
+void gfs2_dir_hash_inval(struct gfs2_inode *ip)
+{
+ __be64 *hc = ip->i_hash_cache;
+ ip->i_hash_cache = NULL;
+ kfree(hc);
+}
+
static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
{
return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
@@ -686,17 +747,12 @@
static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
u64 *leaf_out)
{
- __be64 leaf_no;
- int error;
+ __be64 *hash;
- error = gfs2_dir_read_data(dip, (char *)&leaf_no,
- index * sizeof(__be64),
- sizeof(__be64), 0);
- if (error != sizeof(u64))
- return (error < 0) ? error : -EIO;
-
- *leaf_out = be64_to_cpu(leaf_no);
-
+ hash = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(hash))
+ return PTR_ERR(hash);
+ *leaf_out = be64_to_cpu(*(hash + index));
return 0;
}
@@ -966,6 +1022,8 @@
for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
+ gfs2_dir_hash_inval(dip);
+
error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
half_len * sizeof(u64));
if (error != half_len * sizeof(u64)) {
@@ -1052,70 +1110,54 @@
static int dir_double_exhash(struct gfs2_inode *dip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct buffer_head *dibh;
u32 hsize;
- u64 *buf;
- u64 *from, *to;
- u64 block;
- u64 disksize = i_size_read(&dip->i_inode);
+ u32 hsize_bytes;
+ __be64 *hc;
+ __be64 *hc2, *h;
int x;
int error = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != disksize) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
+ hsize_bytes = hsize * sizeof(__be64);
- /* Allocate both the "from" and "to" buffers in one big chunk */
+ hc = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(hc))
+ return PTR_ERR(hc);
- buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
- if (!buf)
+ h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
+ if (!hc2)
return -ENOMEM;
- for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
- error = gfs2_dir_read_data(dip, (char *)buf,
- block * sdp->sd_hash_bsize,
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto fail;
- }
-
- from = buf;
- to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
-
- for (x = sdp->sd_hash_ptrs; x--; from++) {
- *to++ = *from; /* No endianess worries */
- *to++ = *from;
- }
-
- error = gfs2_dir_write_data(dip,
- (char *)buf + sdp->sd_hash_bsize,
- block * sdp->sd_sb.sb_bsize,
- sdp->sd_sb.sb_bsize);
- if (error != sdp->sd_sb.sb_bsize) {
- if (error >= 0)
- error = -EIO;
- goto fail;
- }
- }
-
- kfree(buf);
-
error = gfs2_meta_inode_buffer(dip, &dibh);
- if (!gfs2_assert_withdraw(sdp, !error)) {
- dip->i_depth++;
- gfs2_dinode_out(dip, dibh->b_data);
- brelse(dibh);
+ if (error)
+ goto out_kfree;
+
+ for (x = 0; x < hsize; x++) {
+ *h++ = *hc;
+ *h++ = *hc;
+ hc++;
}
- return error;
+ error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
+ if (error != (hsize_bytes * 2))
+ goto fail;
+
+ gfs2_dir_hash_inval(dip);
+ dip->i_hash_cache = hc2;
+ dip->i_depth++;
+ gfs2_dinode_out(dip, dibh->b_data);
+ brelse(dibh);
+ return 0;
fail:
- kfree(buf);
+ /* Replace original hash table & size */
+ gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
+ i_size_write(&dip->i_inode, hsize_bytes);
+ gfs2_dinode_out(dip, dibh->b_data);
+ brelse(dibh);
+out_kfree:
+ kfree(hc2);
return error;
}
@@ -1348,6 +1390,7 @@
return error;
}
+
/**
* dir_e_read - Reads the entries from a directory into a filldir buffer
* @dip: dinode pointer
@@ -1362,9 +1405,7 @@
filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
u32 hsize, len = 0;
- u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 hash, index;
__be64 *lp;
int copied = 0;
@@ -1372,37 +1413,17 @@
unsigned depth = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != i_size_read(inode)) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
-
hash = gfs2_dir_offset2hash(*offset);
index = hash >> (32 - dip->i_depth);
- lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
- if (!lp)
- return -ENOMEM;
+ lp = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
while (index < hsize) {
- lp_offset = index & (sdp->sd_hash_ptrs - 1);
- ht_offset = index - lp_offset;
-
- if (ht_offset_cur != ht_offset) {
- error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(__be64),
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto out;
- }
- ht_offset_cur = ht_offset;
- }
-
error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
&copied, &depth,
- be64_to_cpu(lp[lp_offset]));
+ be64_to_cpu(lp[index]));
if (error)
break;
@@ -1410,8 +1431,6 @@
index = (index & ~(len - 1)) + len;
}
-out:
- kfree(lp);
if (error > 0)
error = 0;
return error;
@@ -1914,43 +1933,22 @@
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct buffer_head *bh;
struct gfs2_leaf *leaf;
u32 hsize, len;
- u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 index = 0, next_index;
__be64 *lp;
u64 leaf_no;
int error = 0, last;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
- lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
- if (!lp)
- return -ENOMEM;
+ lp = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
while (index < hsize) {
- lp_offset = index & (sdp->sd_hash_ptrs - 1);
- ht_offset = index - lp_offset;
-
- if (ht_offset_cur != ht_offset) {
- error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(__be64),
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto out;
- }
- ht_offset_cur = ht_offset;
- }
-
- leaf_no = be64_to_cpu(lp[lp_offset]);
+ leaf_no = be64_to_cpu(lp[index]);
if (leaf_no) {
error = get_leaf(dip, leaf_no, &bh);
if (error)
@@ -1976,7 +1974,6 @@
}
out:
- kfree(lp);
return error;
}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index e686af1..ff5772f 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -35,6 +35,7 @@
const struct qstr *filename);
extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head **bhp);
+extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
static inline u32 gfs2_disk_hash(const char *data, int len)
{
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a9f5cbe..edeb9e8 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -174,7 +174,9 @@
struct gfs2_inode *ip = GFS2_I(inode);
unsigned int flags = inode->i_flags;
- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
+ if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
+ inode->i_flags |= S_NOSEC;
if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
flags |= S_IMMUTABLE;
if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
@@ -243,7 +245,7 @@
!capable(CAP_LINUX_IMMUTABLE))
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(inode, MAY_WRITE, 0);
+ error = gfs2_permission(inode, MAY_WRITE);
if (error)
goto out;
}
@@ -544,7 +546,9 @@
/**
* gfs2_fsync - sync the dirty data for a file (across the cluster)
- * @file: the file that points to the dentry (we ignore this)
+ * @file: the file that points to the dentry
+ * @start: the start position in the file to sync
+ * @end: the end position in the file to sync
* @datasync: set if we can ignore timestamp changes
*
* The VFS will flush data for us. We only need to worry
@@ -553,23 +557,32 @@
* Returns: errno
*/
-static int gfs2_fsync(struct file *file, int datasync)
+static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
struct gfs2_inode *ip = GFS2_I(inode);
int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
if (datasync)
sync_state &= ~I_DIRTY_SYNC;
if (sync_state) {
ret = sync_inode_metadata(inode, 1);
- if (ret)
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
return ret;
+ }
gfs2_ail_flush(ip->i_gl);
}
+ mutex_unlock(&inode->i_mutex);
return 0;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c1336e..88e8a23 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -409,6 +409,10 @@
if (held1 && held2 && list_empty(&gl->gl_holders))
clear_bit(GLF_QUEUED, &gl->gl_flags);
+ if (new_state != gl->gl_target)
+ /* shorten our minimum hold time */
+ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
+ GL_GLOCK_MIN_HOLD);
gl->gl_state = new_state;
gl->gl_tchange = jiffies;
}
@@ -668,7 +672,7 @@
gl->gl_demote_state != LM_ST_EXCLUSIVE) {
unsigned long holdtime, now = jiffies;
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
@@ -679,9 +683,14 @@
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
- if (!delay ||
- queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ if (!delay)
gfs2_glock_put(gl);
+ else {
+ if (gl->gl_name.ln_type != LM_TYPE_INODE)
+ delay = 0;
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ gfs2_glock_put(gl);
+ }
if (drop_ref)
gfs2_glock_put(gl);
}
@@ -743,6 +752,7 @@
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
+ gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -855,8 +865,15 @@
static void wait_on_holder(struct gfs2_holder *gh)
{
+ unsigned long time1 = jiffies;
+
might_sleep();
wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+ if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
+ /* Lengthen the minimum hold time. */
+ gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
+ GL_GLOCK_HOLD_INCR,
+ GL_GLOCK_MAX_HOLD);
}
static void wait_on_demote(struct gfs2_glock *gl)
@@ -1093,8 +1110,9 @@
gfs2_glock_hold(gl);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
+ !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
+ gl->gl_name.ln_type == LM_TYPE_INODE)
+ delay = gl->gl_hold_time;
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
@@ -1273,12 +1291,13 @@
unsigned long now = jiffies;
gfs2_glock_hold(gl);
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
- if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
+ if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
+ gl->gl_name.ln_type == LM_TYPE_INODE) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
+ delay = gl->gl_hold_time;
}
spin_lock(&gl->gl_spin);
@@ -1667,7 +1686,7 @@
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
@@ -1676,7 +1695,7 @@
state2str(gl->gl_demote_state), dtime,
atomic_read(&gl->gl_ail_count),
atomic_read(&gl->gl_revokes),
- atomic_read(&gl->gl_ref));
+ atomic_read(&gl->gl_ref), gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
error = dump_holder(seq, gh);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 6b2f757..6670711 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -113,6 +113,12 @@
#define GLR_TRYFAILED 13
+#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
+#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
+#define GL_GLOCK_MIN_HOLD (long)(10)
+#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
+#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
+
struct lm_lockops {
const char *lm_proto_name;
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8ef70f4..da21eca 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -26,6 +26,7 @@
#include "rgrp.h"
#include "util.h"
#include "trans.h"
+#include "dir.h"
/**
* __gfs2_ail_flush - remove all buffers for a given lock from the AIL
@@ -47,10 +48,10 @@
bd_ail_gl_list);
bh = bd->bd_bh;
gfs2_remove_from_ail(bd);
- spin_unlock(&sdp->sd_ail_lock);
-
bd->bd_bh = NULL;
bh->b_private = NULL;
+ spin_unlock(&sdp->sd_ail_lock);
+
bd->bd_blkno = bh->b_blocknr;
gfs2_log_lock(sdp);
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
@@ -218,11 +219,14 @@
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
forget_all_cached_acls(&ip->i_inode);
+ gfs2_dir_hash_inval(ip);
}
}
- if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
+ if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
+ gfs2_log_flush(gl->gl_sbd, NULL);
gl->gl_sbd->sd_rindex_uptodate = 0;
+ }
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
}
@@ -314,6 +318,8 @@
ip->i_generation = be64_to_cpu(str->di_generation);
ip->i_diskflags = be32_to_cpu(str->di_flags);
+ ip->i_eattr = be64_to_cpu(str->di_eattr);
+ /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(&ip->i_inode);
height = be16_to_cpu(str->di_height);
if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -326,7 +332,6 @@
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
- ip->i_eattr = be64_to_cpu(str->di_eattr);
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
@@ -547,7 +552,6 @@
.go_lock = inode_go_lock,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
@@ -558,7 +562,6 @@
.go_unlock = rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0a064e9..892ac37 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -17,6 +17,7 @@
#include <linux/buffer_head.h>
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
+#include <linux/completion.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -162,7 +163,6 @@
int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
void (*go_callback) (struct gfs2_glock *gl);
const int go_type;
- const unsigned long go_min_hold_time;
const unsigned long go_flags;
#define GLOF_ASPACE 1
};
@@ -220,6 +220,7 @@
unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */
+ long gl_hold_time;
struct list_head gl_holders;
const struct gfs2_glock_operations *gl_ops;
@@ -284,6 +285,7 @@
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
struct list_head i_trunc_list;
+ __be64 *i_hash_cache;
u32 i_entries;
u32 i_diskflags;
u8 i_height;
@@ -546,6 +548,7 @@
struct gfs2_glock *sd_trans_gl;
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
+ struct completion sd_locking_init;
/* Inode Stuff */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 03e0c52..0fb51a9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -307,7 +307,7 @@
}
if (!is_root) {
- error = gfs2_permission(dir, MAY_EXEC, 0);
+ error = gfs2_permission(dir, MAY_EXEC);
if (error)
goto out;
}
@@ -337,7 +337,7 @@
{
int error;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -792,13 +792,8 @@
static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode = NULL;
-
- inode = gfs2_lookupi(dir, &dentry->d_name, 0);
- if (inode && IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (inode) {
+ struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+ if (inode && !IS_ERR(inode)) {
struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
struct gfs2_holder gh;
int error;
@@ -808,11 +803,8 @@
return ERR_PTR(error);
}
gfs2_glock_dq_uninit(&gh);
- return d_splice_alias(inode, dentry);
}
- d_add(dentry, inode);
-
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/**
@@ -857,7 +849,7 @@
if (inode->i_nlink == 0)
goto out_gunlock;
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -990,7 +982,7 @@
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -1336,7 +1328,7 @@
}
}
} else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1371,7 +1363,7 @@
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
+ error = gfs2_permission(odentry->d_inode, MAY_WRITE);
if (error)
goto out_gunlock;
}
@@ -1543,7 +1535,7 @@
* Returns: errno
*/
-int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
+int gfs2_permission(struct inode *inode, int mask)
{
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
@@ -1553,7 +1545,7 @@
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
@@ -1564,7 +1556,7 @@
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EACCES;
else
- error = generic_permission(inode, mask, flags, gfs2_check_acl);
+ error = generic_permission(inode, mask);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
@@ -1854,6 +1846,7 @@
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .check_acl = gfs2_check_acl,
};
const struct inode_operations gfs2_dir_iops = {
@@ -1874,6 +1867,7 @@
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .check_acl = gfs2_check_acl,
};
const struct inode_operations gfs2_symlink_iops = {
@@ -1888,5 +1882,6 @@
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .check_acl = gfs2_check_acl,
};
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3160607..8d90e0c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -108,7 +108,7 @@
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
-extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
+extern int gfs2_permission(struct inode *inode, int mask);
extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 903115f..85c6292 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -903,6 +903,7 @@
if (gfs2_ail1_empty(sdp))
break;
}
+ gfs2_log_flush(sdp, NULL);
}
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c2b34cd..29e1ace 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -41,6 +41,7 @@
init_rwsem(&ip->i_rw_mutex);
INIT_LIST_HEAD(&ip->i_trunc_list);
ip->i_alloc = NULL;
+ ip->i_hash_cache = NULL;
}
static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8ac9ae1..516516e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -72,6 +72,7 @@
init_waitqueue_head(&sdp->sd_glock_wait);
atomic_set(&sdp->sd_glock_disposal, 0);
+ init_completion(&sdp->sd_locking_init);
spin_lock_init(&sdp->sd_statfs_spin);
spin_lock_init(&sdp->sd_rindex_spin);
@@ -1017,11 +1018,13 @@
fsname++;
if (lm->lm_mount == NULL) {
fs_info(sdp, "Now mounting FS...\n");
+ complete(&sdp->sd_locking_init);
return 0;
}
ret = lm->lm_mount(sdp, fsname);
if (ret == 0)
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+ complete(&sdp->sd_locking_init);
return ret;
}
@@ -1091,6 +1094,7 @@
if (sdp->sd_args.ar_nobarrier)
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ sb->s_flags |= MS_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_d_op = &gfs2_dops;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b780df..7f8af1e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1607,14 +1607,15 @@
}
/**
- * gfs2_free_data - free a contiguous run of data block(s)
+ * __gfs2_free_blocks - free a contiguous run of block(s)
* @ip: the inode these blocks are being freed from
* @bstart: first block of a run of contiguous blocks
* @blen: the length of the block run
+ * @meta: 1 if the blocks represent metadata
*
*/
-void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1631,54 +1632,11 @@
gfs2_trans_add_rg(rgd);
/* Directories keep their data in the metadata address space */
- if (ip->i_depth)
+ if (meta || ip->i_depth)
gfs2_meta_wipe(ip, bstart, blen);
}
/**
- * gfs2_free_data - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-
- __gfs2_free_data(ip, bstart, blen);
- gfs2_statfs_change(sdp, 0, +blen, 0);
- gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
-}
-
-/**
- * gfs2_free_meta - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-
-void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
-
- rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
- if (!rgd)
- return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
- rgd->rd_free += blen;
-
- gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
- gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-
- gfs2_trans_add_rg(rgd);
- gfs2_meta_wipe(ip, bstart, blen);
-}
-
-/**
* gfs2_free_meta - free a contiguous run of data block(s)
* @ip: the inode these blocks are being freed from
* @bstart: first block of a run of contiguous blocks
@@ -1690,7 +1648,7 @@
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- __gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_blocks(ip, bstart, blen, 1);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a80e303..d253f9a 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,9 +52,7 @@
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
-extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ed540e7..b7beadd 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -757,13 +757,17 @@
struct timespec atime;
struct gfs2_dinode *di;
int ret = -EAGAIN;
+ int unlock_required = 0;
/* Skip timestamp update, if this is from a memalloc */
if (current->flags & PF_MEMALLOC)
goto do_flush;
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- if (ret)
- goto do_flush;
+ if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ if (ret)
+ goto do_flush;
+ unlock_required = 1;
+ }
ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (ret)
goto do_unlock;
@@ -780,7 +784,8 @@
}
gfs2_trans_end(sdp);
do_unlock:
- gfs2_glock_dq_uninit(&gh);
+ if (unlock_required)
+ gfs2_glock_dq_uninit(&gh);
do_flush:
if (wbc->sync_mode == WB_SYNC_ALL)
gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
@@ -1427,7 +1432,20 @@
return error;
}
-/*
+/**
+ * gfs2_evict_inode - Remove an inode from cache
+ * @inode: The inode to evict
+ *
+ * There are three cases to consider:
+ * 1. i_nlink == 0, we are final opener (and must deallocate)
+ * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
+ * 3. i_nlink > 0
+ *
+ * If the fs is read only, then we have to treat all cases as per #3
+ * since we are unable to do any deallocation. The inode will be
+ * deallocated by the next read/write node to attempt an allocation
+ * in the same resource group
+ *
* We have to (at the moment) hold the inodes main lock to cover
* the gap between unlocking the shared lock on the iopen lock and
* taking the exclusive lock. I'd rather do a shared -> exclusive
@@ -1470,6 +1488,8 @@
if (error)
goto out_truncate;
+ /* Case 1 starts here */
+
if (S_ISDIR(inode->i_mode) &&
(ip->i_diskflags & GFS2_DIF_EXHASH)) {
error = gfs2_dir_exhash_dealloc(ip);
@@ -1493,13 +1513,16 @@
goto out_unlock;
out_truncate:
+ /* Case 2 starts here */
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
goto out_unlock;
- gfs2_final_release_pages(ip);
+ /* Needs to be done before glock release & also in a transaction */
+ truncate_inode_pages(&inode->i_data, 0);
gfs2_trans_end(sdp);
out_unlock:
+ /* Error path for case 1 */
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
gfs2_glock_dq(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1507,9 +1530,10 @@
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
out:
+ /* Case 3 starts here */
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
-
+ gfs2_dir_hash_inval(ip);
ip->i_gl->gl_object = NULL;
gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index e20eab3..443cabc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -338,6 +338,9 @@
rv = sscanf(buf, "%u", &first);
if (rv != 1 || first > 1)
return -EINVAL;
+ rv = wait_for_completion_killable(&sdp->sd_locking_init);
+ if (rv)
+ return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EBUSY;
if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
@@ -414,7 +417,9 @@
rv = sscanf(buf, "%d", &jid);
if (rv != 1)
return -EINVAL;
-
+ rv = wait_for_completion_killable(&sdp->sd_locking_init);
+ if (rv)
+ return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EINVAL;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index fff16c9..96a1b62 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -123,8 +123,8 @@
struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, hfs_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ hfs_get_block);
/*
* In case of error extending write may have instantiated a few
@@ -615,6 +615,8 @@
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
+ inode_dio_wait(inode);
+
error = vmtruncate(inode, attr->ia_size);
if (error)
return error;
@@ -625,12 +627,18 @@
return 0;
}
-static int hfs_file_fsync(struct file *filp, int datasync)
+static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = filp->f_mapping->host;
struct super_block * sb;
int ret, err;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* sync the inode to buffers */
ret = write_inode_now(inode, 0);
@@ -647,6 +655,7 @@
err = sync_blockdev(sb->s_bdev);
if (!ret)
ret = err;
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2312de3..2a734cf 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -43,6 +43,10 @@
node->tree->node_size - (rec + 1) * 2);
if (!recoff)
return 0;
+ if (recoff > node->tree->node_size - 2) {
+ printk(KERN_ERR "hfs: recoff %d too large\n", recoff);
+ return 0;
+ }
retval = hfs_bnode_read_u16(node, recoff) + 2;
if (retval > node->tree->max_key_len + 2) {
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index b4ba1b3..4dfbfec 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -212,7 +212,9 @@
dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
str->name, cnid, inode->i_nlink);
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ if (err)
+ return err;
hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
@@ -269,7 +271,9 @@
dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
str ? str->name : NULL, cnid);
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ if (err)
+ return err;
if (!str) {
int len;
@@ -347,12 +351,14 @@
struct hfs_find_data src_fd, dst_fd;
hfsplus_cat_entry entry;
int entry_size, type;
- int err = 0;
+ int err;
dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
cnid, src_dir->i_ino, src_name->name,
dst_dir->i_ino, dst_name->name);
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
+ if (err)
+ return err;
dst_fd = src_fd;
/* find the old dir entry and read the data */
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059..25b2443 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -38,7 +38,9 @@
sb = dir->i_sb;
dentry->d_fsdata = NULL;
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ if (err)
+ return ERR_PTR(err);
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
@@ -132,7 +134,9 @@
if (filp->f_pos >= inode->i_size)
return 0;
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ if (err)
+ return err;
hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
err = hfs_brec_find(&fd);
if (err)
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index b1991a2..5849e3e 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -119,22 +119,31 @@
set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
}
-static void hfsplus_ext_write_extent_locked(struct inode *inode)
+static int hfsplus_ext_write_extent_locked(struct inode *inode)
{
+ int res;
+
if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
struct hfs_find_data fd;
- hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+ res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+ if (res)
+ return res;
__hfsplus_ext_write_extent(inode, &fd);
hfs_find_exit(&fd);
}
+ return 0;
}
-void hfsplus_ext_write_extent(struct inode *inode)
+int hfsplus_ext_write_extent(struct inode *inode)
{
+ int res;
+
mutex_lock(&HFSPLUS_I(inode)->extents_lock);
- hfsplus_ext_write_extent_locked(inode);
+ res = hfsplus_ext_write_extent_locked(inode);
mutex_unlock(&HFSPLUS_I(inode)->extents_lock);
+
+ return res;
}
static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
@@ -194,9 +203,11 @@
block < hip->cached_start + hip->cached_blocks)
return 0;
- hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
- res = __hfsplus_ext_cache_extent(&fd, inode, block);
- hfs_find_exit(&fd);
+ res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+ if (!res) {
+ res = __hfsplus_ext_cache_extent(&fd, inode, block);
+ hfs_find_exit(&fd);
+ }
return res;
}
@@ -209,6 +220,7 @@
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
int res = -EIO;
u32 ablock, dblock, mask;
+ sector_t sector;
int was_dirty = 0;
int shift;
@@ -255,10 +267,12 @@
done:
dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
inode->i_ino, (long long)iblock, dblock);
+
mask = (1 << sbi->fs_shift) - 1;
- map_bh(bh_result, sb,
- (dblock << sbi->fs_shift) + sbi->blockoffset +
- (iblock & mask));
+ sector = ((sector_t)dblock << sbi->fs_shift) +
+ sbi->blockoffset + (iblock & mask);
+ map_bh(bh_result, sb, sector);
+
if (create) {
set_buffer_new(bh_result);
hip->phys_size += sb->s_blocksize;
@@ -371,7 +385,9 @@
if (total_blocks == blocks)
return 0;
- hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+ res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+ if (res)
+ return res;
do {
res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid,
total_blocks, type);
@@ -469,7 +485,9 @@
insert_extent:
dprint(DBG_EXTENT, "insert new extent\n");
- hfsplus_ext_write_extent_locked(inode);
+ res = hfsplus_ext_write_extent_locked(inode);
+ if (res)
+ goto out;
memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
hip->cached_extents[0].start_block = cpu_to_be32(start);
@@ -500,7 +518,6 @@
struct page *page;
void *fsdata;
u32 size = inode->i_size;
- int res;
res = pagecache_write_begin(NULL, mapping, size, 0,
AOP_FLAG_UNINTERRUPTIBLE,
@@ -523,7 +540,12 @@
goto out;
mutex_lock(&hip->extents_lock);
- hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+ res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+ if (res) {
+ mutex_unlock(&hip->extents_lock);
+ /* XXX: We lack error handling of hfsplus_file_truncate() */
+ return;
+ }
while (1) {
if (alloc_cnt == hip->first_blocks) {
hfsplus_free_extents(sb, hip->first_extents,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d685752..d7674d0 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
#include "hfsplus_raw.h"
#define DBG_BNODE_REFS 0x00000001
@@ -110,7 +111,9 @@
struct hfs_btree;
struct hfsplus_sb_info {
+ void *s_vhdr_buf;
struct hfsplus_vh *s_vhdr;
+ void *s_backup_vhdr_buf;
struct hfsplus_vh *s_backup_vhdr;
struct hfs_btree *ext_tree;
struct hfs_btree *cat_tree;
@@ -258,6 +261,15 @@
struct hfsplus_cat_key key;
};
+/*
+ * Find minimum acceptible I/O size for an hfsplus sb.
+ */
+static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
+{
+ return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
+ HFSPLUS_SECTOR_SIZE);
+}
+
#define hfs_btree_open hfsplus_btree_open
#define hfs_btree_close hfsplus_btree_close
#define hfs_btree_write hfsplus_btree_write
@@ -374,7 +386,7 @@
/* extents.c */
int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
-void hfsplus_ext_write_extent(struct inode *);
+int hfsplus_ext_write_extent(struct inode *);
int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
int hfsplus_free_fork(struct super_block *, u32,
struct hfsplus_fork_raw *, int);
@@ -392,7 +404,8 @@
int hfsplus_cat_write_inode(struct inode *);
struct inode *hfsplus_new_inode(struct super_block *, int);
void hfsplus_delete_inode(struct inode *);
-int hfsplus_file_fsync(struct file *file, int datasync);
+int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
/* ioctl.c */
long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -436,8 +449,8 @@
/* wrapper.c */
int hfsplus_read_wrapper(struct super_block *);
int hfs_part_find(struct super_block *, sector_t *, sector_t *);
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
- void *data, int rw);
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+ void *buf, void **data, int rw);
/* time macros */
#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b248a6cf..4cc1e3a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -119,8 +119,8 @@
struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, hfsplus_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ hfsplus_get_block);
/*
* In case of error extending write may have instantiated a few
@@ -195,11 +195,13 @@
hip->flags = 0;
set_bit(HFSPLUS_I_RSRC, &hip->flags);
- hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
- err = hfsplus_find_cat(sb, dir->i_ino, &fd);
- if (!err)
- err = hfsplus_cat_read_inode(inode, &fd);
- hfs_find_exit(&fd);
+ err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+ if (!err) {
+ err = hfsplus_find_cat(sb, dir->i_ino, &fd);
+ if (!err)
+ err = hfsplus_cat_read_inode(inode, &fd);
+ hfs_find_exit(&fd);
+ }
if (err) {
iput(inode);
return ERR_PTR(err);
@@ -296,6 +298,8 @@
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
+ inode_dio_wait(inode);
+
error = vmtruncate(inode, attr->ia_size);
if (error)
return error;
@@ -306,13 +310,19 @@
return 0;
}
-int hfsplus_file_fsync(struct file *file, int datasync)
+int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_mapping->host;
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
int error = 0, error2;
+ error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (error)
+ return error;
+ mutex_lock(&inode->i_mutex);
+
/*
* Sync inode metadata into the catalog and extent trees.
*/
@@ -340,6 +350,8 @@
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ mutex_unlock(&inode->i_mutex);
+
return error;
}
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 40ad88c..eb355d8 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -88,11 +88,12 @@
return -ENOENT;
}
-static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
- sector_t *part_start, sector_t *part_size)
+static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
+ struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
int size = be32_to_cpu(pm->pmMapBlkCnt);
+ int buf_size = hfsplus_min_io_size(sb);
int res;
int i = 0;
@@ -107,11 +108,14 @@
if (++i >= size)
return -ENOENT;
- res = hfsplus_submit_bio(sb->s_bdev,
- *part_start + HFS_PMAP_BLK + i,
- pm, READ);
- if (res)
- return res;
+ pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
+ if ((u8 *)pm - (u8 *)buf >= buf_size) {
+ res = hfsplus_submit_bio(sb,
+ *part_start + HFS_PMAP_BLK + i,
+ buf, (void **)&pm, READ);
+ if (res)
+ return res;
+ }
} while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
return -ENOENT;
@@ -124,15 +128,15 @@
int hfs_part_find(struct super_block *sb,
sector_t *part_start, sector_t *part_size)
{
- void *data;
+ void *buf, *data;
int res;
- data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!data)
+ buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
- data, READ);
+ res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
+ buf, &data, READ);
if (res)
goto out;
@@ -141,13 +145,13 @@
res = hfs_parse_old_pmap(sb, data, part_start, part_size);
break;
case HFS_NEW_PMAP_MAGIC:
- res = hfs_parse_new_pmap(sb, data, part_start, part_size);
+ res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
break;
default:
res = -ENOENT;
break;
}
out:
- kfree(data);
+ kfree(buf);
return res;
}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b49b555..c106ca2 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -73,11 +73,13 @@
if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
inode->i_ino == HFSPLUS_ROOT_CNID) {
- hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
- err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
- if (!err)
- err = hfsplus_cat_read_inode(inode, &fd);
- hfs_find_exit(&fd);
+ err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+ if (!err) {
+ err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+ if (!err)
+ err = hfsplus_cat_read_inode(inode, &fd);
+ hfs_find_exit(&fd);
+ }
} else {
err = hfsplus_system_read_inode(inode);
}
@@ -133,9 +135,13 @@
static int hfsplus_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
+ int err;
+
dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
- hfsplus_ext_write_extent(inode);
+ err = hfsplus_ext_write_extent(inode);
+ if (err)
+ return err;
if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
inode->i_ino == HFSPLUS_ROOT_CNID)
@@ -197,17 +203,17 @@
write_backup = 1;
}
- error2 = hfsplus_submit_bio(sb->s_bdev,
+ error2 = hfsplus_submit_bio(sb,
sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
- sbi->s_vhdr, WRITE_SYNC);
+ sbi->s_vhdr_buf, NULL, WRITE_SYNC);
if (!error)
error = error2;
if (!write_backup)
goto out;
- error2 = hfsplus_submit_bio(sb->s_bdev,
+ error2 = hfsplus_submit_bio(sb,
sbi->part_start + sbi->sect_count - 2,
- sbi->s_backup_vhdr, WRITE_SYNC);
+ sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
if (!error)
error2 = error;
out:
@@ -251,8 +257,8 @@
hfs_btree_close(sbi->ext_tree);
iput(sbi->alloc_file);
iput(sbi->hidden_dir);
- kfree(sbi->s_vhdr);
- kfree(sbi->s_backup_vhdr);
+ kfree(sbi->s_vhdr_buf);
+ kfree(sbi->s_backup_vhdr_buf);
unload_nls(sbi->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
@@ -393,6 +399,13 @@
if (!sbi->rsrc_clump_blocks)
sbi->rsrc_clump_blocks = 1;
+ err = generic_check_addressable(sbi->alloc_blksz_shift,
+ sbi->total_blocks);
+ if (err) {
+ printk(KERN_ERR "hfs: filesystem size too large.\n");
+ goto out_free_vhdr;
+ }
+
/* Set up operations so we can load metadata */
sb->s_op = &hfsplus_sops;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -417,6 +430,8 @@
sb->s_flags |= MS_RDONLY;
}
+ err = -EINVAL;
+
/* Load metadata objects (B*Trees) */
sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
if (!sbi->ext_tree) {
@@ -447,7 +462,9 @@
str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
str.name = HFSP_HIDDENDIR_NAME;
- hfs_find_init(sbi->cat_tree, &fd);
+ err = hfs_find_init(sbi->cat_tree, &fd);
+ if (err)
+ goto out_put_root;
hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
@@ -500,7 +517,7 @@
out_put_hidden_dir:
iput(sbi->hidden_dir);
out_put_root:
- iput(sbi->alloc_file);
+ iput(root);
out_put_alloc_file:
iput(sbi->alloc_file);
out_close_cat_tree:
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a3f0bfc..a32998f 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -142,7 +142,11 @@
/* search for single decomposed char */
if (likely(compose))
ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
- if (ce1 && (cc = ce1[0])) {
+ if (ce1)
+ cc = ce1[0];
+ else
+ cc = 0;
+ if (cc) {
/* start of a possibly decomposed Hangul char */
if (cc != 0xffff)
goto done;
@@ -209,7 +213,8 @@
i++;
ce2 = ce1;
}
- if ((cc = ce2[0])) {
+ cc = ce2[0];
+ if (cc) {
ip += i;
ustrlen -= i;
goto done;
@@ -301,7 +306,11 @@
while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
size = asc2unichar(sb, astr, len, &c);
- if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+ if (decompose)
+ dstr = decompose_unichar(c, &dsize);
+ else
+ dstr = NULL;
+ if (dstr) {
if (outlen + dsize > HFSPLUS_MAX_STRLEN)
break;
do {
@@ -346,15 +355,23 @@
astr += size;
len -= size;
- if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+ if (decompose)
+ dstr = decompose_unichar(c, &dsize);
+ else
+ dstr = NULL;
+ if (dstr) {
do {
c2 = *dstr++;
- if (!casefold || (c2 = case_fold(c2)))
+ if (casefold)
+ c2 = case_fold(c2);
+ if (!casefold || c2)
hash = partial_name_hash(c2, hash);
} while (--dsize > 0);
} else {
c2 = c;
- if (!casefold || (c2 = case_fold(c2)))
+ if (casefold)
+ c2 = case_fold(c2);
+ if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
@@ -422,12 +439,14 @@
c1 = *dstr1;
c2 = *dstr2;
if (casefold) {
- if (!(c1 = case_fold(c1))) {
+ c1 = case_fold(c1);
+ if (!c1) {
dstr1++;
dsize1--;
continue;
}
- if (!(c2 = case_fold(c2))) {
+ c2 = case_fold(c2);
+ if (!c2) {
dstr2++;
dsize2--;
continue;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 3031d81..10e515a 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -31,31 +31,77 @@
complete(bio->bi_private);
}
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
- void *data, int rw)
+/*
+ * hfsplus_submit_bio - Perfrom block I/O
+ * @sb: super block of volume for I/O
+ * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
+ * @buf: buffer for I/O
+ * @data: output pointer for location of requested data
+ * @rw: direction of I/O
+ *
+ * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
+ * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
+ * @data will return a pointer to the start of the requested sector,
+ * which may not be the same location as @buf.
+ *
+ * If @sector is not aligned to the bdev logical block size it will
+ * be rounded down. For writes this means that @buf should contain data
+ * that starts at the rounded-down address. As long as the data was
+ * read using hfsplus_submit_bio() and the same buffer is used things
+ * will work correctly.
+ */
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+ void *buf, void **data, int rw)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
+ int ret = 0;
+ unsigned int io_size;
+ loff_t start;
+ int offset;
+
+ /*
+ * Align sector to hardware sector size and find offset. We
+ * assume that io_size is a power of two, which _should_
+ * be true.
+ */
+ io_size = hfsplus_min_io_size(sb);
+ start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
+ offset = start & (io_size - 1);
+ sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio->bi_bdev = sb->s_bdev;
bio->bi_end_io = hfsplus_end_io_sync;
bio->bi_private = &wait;
- /*
- * We always submit one sector at a time, so bio_add_page must not fail.
- */
- if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
- offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
- BUG();
+ if (!(rw & WRITE) && data)
+ *data = (u8 *)buf + offset;
+
+ while (io_size > 0) {
+ unsigned int page_offset = offset_in_page(buf);
+ unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
+ io_size);
+
+ ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
+ if (ret != len) {
+ ret = -EIO;
+ goto out;
+ }
+ io_size -= len;
+ buf = (u8 *)buf + len;
+ }
submit_bio(rw, bio);
wait_for_completion(&wait);
if (!bio_flagged(bio, BIO_UPTODATE))
- return -EIO;
- return 0;
+ ret = -EIO;
+
+out:
+ bio_put(bio);
+ return ret < 0 ? ret : 0;
}
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -138,23 +184,19 @@
if (hfsplus_get_last_session(sb, &part_start, &part_size))
goto out;
- if ((u64)part_start + part_size > 0x100000000ULL) {
- pr_err("hfs: volumes larger than 2TB are not supported yet\n");
- goto out;
- }
error = -ENOMEM;
- sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!sbi->s_vhdr)
+ sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!sbi->s_vhdr_buf)
goto out;
- sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!sbi->s_backup_vhdr)
+ sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!sbi->s_backup_vhdr_buf)
goto out_free_vhdr;
reread:
- error = hfsplus_submit_bio(sb->s_bdev,
- part_start + HFSPLUS_VOLHEAD_SECTOR,
- sbi->s_vhdr, READ);
+ error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
+ sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
+ READ);
if (error)
goto out_free_backup_vhdr;
@@ -169,8 +211,9 @@
if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
goto out_free_backup_vhdr;
wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
- part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
- part_size = wd.embed_count * wd.ablk_size;
+ part_start += (sector_t)wd.ablk_start +
+ (sector_t)wd.embed_start * wd.ablk_size;
+ part_size = (sector_t)wd.embed_count * wd.ablk_size;
goto reread;
default:
/*
@@ -183,9 +226,9 @@
goto reread;
}
- error = hfsplus_submit_bio(sb->s_bdev,
- part_start + part_size - 2,
- sbi->s_backup_vhdr, READ);
+ error = hfsplus_submit_bio(sb, part_start + part_size - 2,
+ sbi->s_backup_vhdr_buf,
+ (void **)&sbi->s_backup_vhdr, READ);
if (error)
goto out_free_backup_vhdr;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2638c834e..0d22afd 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -362,9 +362,20 @@
return 0;
}
-int hostfs_fsync(struct file *file, int datasync)
+int hostfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync);
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+ ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
}
static const struct file_operations hostfs_file_fops = {
@@ -748,12 +759,12 @@
return err;
}
-int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
+int hostfs_permission(struct inode *ino, int desired)
{
char *name;
int r = 0, w = 0, x = 0, err;
- if (flags & IPERM_FLAG_RCU)
+ if (desired & MAY_NOT_BLOCK)
return -ECHILD;
if (desired & MAY_READ) r = 1;
@@ -770,7 +781,7 @@
err = access_file(name, r, w, x);
__putname(name);
if (!err)
- err = generic_permission(ino, desired, flags, NULL);
+ err = generic_permission(ino, desired);
return err;
}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index f46ae02..96a8ed9 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -29,6 +29,10 @@
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct super_block *s = i->i_sb;
+ /* Somebody else will have to figure out what to do here */
+ if (whence == SEEK_DATA || whence == SEEK_HOLE)
+ return -EINVAL;
+
hpfs_lock(s);
/*printk("dir lseek\n");*/
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 89c500e..89d2a58 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -18,9 +18,14 @@
return 0;
}
-int hpfs_file_fsync(struct file *file, int datasync)
+int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ ret = filemap_write_and_wait_range(file->f_mapping, start, end);
+ if (ret)
+ return ret;
return sync_blockdev(inode->i_sb->s_bdev);
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index dd552f8..331b5e2 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -258,7 +258,7 @@
/* file.c */
-int hpfs_file_fsync(struct file *, int);
+int hpfs_file_fsync(struct file *, loff_t, loff_t, int);
extern const struct file_operations hpfs_file_ops;
extern const struct inode_operations hpfs_file_iops;
extern const struct address_space_operations hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index acf95da..2df69e2 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -398,7 +398,7 @@
hpfs_unlock(dir->i_sb);
return -ENOSPC;
}
- if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
+ if (generic_permission(inode, MAY_WRITE) ||
!S_ISREG(inode->i_mode) ||
get_write_access(inode)) {
d_rehash(dentry);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 87ed48e..8635be5 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -139,7 +139,8 @@
static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
struct nameidata *nd)
{
- struct dentry *proc_dentry, *new, *parent;
+ struct dentry *proc_dentry, *parent;
+ struct qstr *name = &dentry->d_name;
struct inode *inode;
int err, deleted;
@@ -149,23 +150,9 @@
else if (deleted)
return ERR_PTR(-ENOENT);
- err = -ENOMEM;
parent = HPPFS_I(ino)->proc_dentry;
mutex_lock(&parent->d_inode->i_mutex);
- proc_dentry = d_lookup(parent, &dentry->d_name);
- if (proc_dentry == NULL) {
- proc_dentry = d_alloc(parent, &dentry->d_name);
- if (proc_dentry == NULL) {
- mutex_unlock(&parent->d_inode->i_mutex);
- goto out;
- }
- new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
- proc_dentry, NULL);
- if (new) {
- dput(proc_dentry);
- proc_dentry = new;
- }
- }
+ proc_dentry = lookup_one_len(name->name, parent, name->len);
mutex_unlock(&parent->d_inode->i_mutex);
if (IS_ERR(proc_dentry))
@@ -174,13 +161,11 @@
err = -ENOMEM;
inode = get_inode(ino->i_sb, proc_dentry);
if (!inode)
- goto out_dput;
+ goto out;
d_add(dentry, inode);
return NULL;
- out_dput:
- dput(proc_dentry);
out:
return ERR_PTR(err);
}
@@ -588,9 +573,10 @@
return err;
}
-static int hppfs_fsync(struct file *file, int datasync)
+static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
- return 0;
+ return filemap_write_and_wait_range(file->f_mapping, start, end);
}
static const struct file_operations hppfs_dir_fops = {
@@ -690,8 +676,10 @@
struct inode *proc_ino = dentry->d_inode;
struct inode *inode = new_inode(sb);
- if (!inode)
+ if (!inode) {
+ dput(dentry);
return ERR_PTR(-ENOMEM);
+ }
if (S_ISDIR(dentry->d_inode->i_mode)) {
inode->i_op = &hppfs_dir_iops;
@@ -704,7 +692,7 @@
inode->i_fop = &hppfs_file_fops;
}
- HPPFS_I(inode)->proc_dentry = dget(dentry);
+ HPPFS_I(inode)->proc_dentry = dentry;
inode->i_uid = proc_ino->i_uid;
inode->i_gid = proc_ino->i_gid;
@@ -737,7 +725,7 @@
sb->s_fs_info = proc_mnt;
err = -ENOMEM;
- root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root);
+ root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root));
if (!root_inode)
goto out_mntput;
diff --git a/fs/inode.c b/fs/inode.c
index 43566d1..96c77b8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,8 +33,8 @@
*
* inode->i_lock protects:
* inode->i_state, inode->i_hash, __iget()
- * inode_lru_lock protects:
- * inode_lru, inode->i_lru
+ * inode->i_sb->s_inode_lru_lock protects:
+ * inode->i_sb->s_inode_lru, inode->i_lru
* inode_sb_list_lock protects:
* sb->s_inodes, inode->i_sb_list
* inode_wb_list_lock protects:
@@ -46,7 +46,7 @@
*
* inode_sb_list_lock
* inode->i_lock
- * inode_lru_lock
+ * inode->i_sb->s_inode_lru_lock
*
* inode_wb_list_lock
* inode->i_lock
@@ -64,24 +64,10 @@
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
-static LIST_HEAD(inode_lru);
-static DEFINE_SPINLOCK(inode_lru_lock);
-
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
/*
- * iprune_sem provides exclusion between the icache shrinking and the
- * umount path.
- *
- * We don't actually need it to protect anything in the umount path,
- * but only need to cycle through it to make sure any inode that
- * prune_icache took off the LRU list has been fully torn down by the
- * time we are past evict_inodes.
- */
-static DECLARE_RWSEM(iprune_sem);
-
-/*
* Empty aops. Can be used for the cases where the user does not
* define any of the address_space operations.
*/
@@ -95,6 +81,7 @@
struct inodes_stat_t inodes_stat;
static DEFINE_PER_CPU(unsigned int, nr_inodes);
+static DEFINE_PER_CPU(unsigned int, nr_unused);
static struct kmem_cache *inode_cachep __read_mostly;
@@ -109,7 +96,11 @@
static inline int get_nr_inodes_unused(void)
{
- return inodes_stat.nr_unused;
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_unused, i);
+ return sum < 0 ? 0 : sum;
}
int get_nr_dirty_inodes(void)
@@ -127,6 +118,7 @@
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
+ inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -176,8 +168,7 @@
mutex_init(&inode->i_mutex);
lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
- init_rwsem(&inode->i_alloc_sem);
- lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+ atomic_set(&inode->i_dio_count, 0);
mapping->a_ops = &empty_aops;
mapping->host = inode;
@@ -337,22 +328,24 @@
static void inode_lru_list_add(struct inode *inode)
{
- spin_lock(&inode_lru_lock);
+ spin_lock(&inode->i_sb->s_inode_lru_lock);
if (list_empty(&inode->i_lru)) {
- list_add(&inode->i_lru, &inode_lru);
- inodes_stat.nr_unused++;
+ list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
+ inode->i_sb->s_nr_inodes_unused++;
+ this_cpu_inc(nr_unused);
}
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&inode->i_sb->s_inode_lru_lock);
}
static void inode_lru_list_del(struct inode *inode)
{
- spin_lock(&inode_lru_lock);
+ spin_lock(&inode->i_sb->s_inode_lru_lock);
if (!list_empty(&inode->i_lru)) {
list_del_init(&inode->i_lru);
- inodes_stat.nr_unused--;
+ inode->i_sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
}
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&inode->i_sb->s_inode_lru_lock);
}
/**
@@ -537,14 +530,6 @@
spin_unlock(&inode_sb_list_lock);
dispose_list(&dispose);
-
- /*
- * Cycle through iprune_sem to make sure any inode that prune_icache
- * moved off the list before we took the lock has been fully torn
- * down.
- */
- down_write(&iprune_sem);
- up_write(&iprune_sem);
}
/**
@@ -607,8 +592,10 @@
}
/*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
- * temporary list and then are freed outside inode_lru_lock by dispose_list().
+ * Walk the superblock inode LRU for freeable inodes and attempt to free them.
+ * This is called from the superblock shrinker function with a number of inodes
+ * to trim from the LRU. Inodes to be freed are moved to a temporary list and
+ * then are freed outside inode_lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. If the inode has metadata buffers attached to
@@ -622,29 +609,28 @@
* LRU does not have strict ordering. Hence we don't want to reclaim inodes
* with this flag set because they are the inodes that are out of order.
*/
-static void prune_icache(int nr_to_scan)
+void prune_icache_sb(struct super_block *sb, int nr_to_scan)
{
LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
- down_read(&iprune_sem);
- spin_lock(&inode_lru_lock);
- for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
+ spin_lock(&sb->s_inode_lru_lock);
+ for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
- if (list_empty(&inode_lru))
+ if (list_empty(&sb->s_inode_lru))
break;
- inode = list_entry(inode_lru.prev, struct inode, i_lru);
+ inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
/*
- * we are inverting the inode_lru_lock/inode->i_lock here,
+ * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
* so use a trylock. If we fail to get the lock, just move the
* inode to the back of the list so we don't spin on it.
*/
if (!spin_trylock(&inode->i_lock)) {
- list_move(&inode->i_lru, &inode_lru);
+ list_move(&inode->i_lru, &sb->s_inode_lru);
continue;
}
@@ -656,28 +642,29 @@
(inode->i_state & ~I_REFERENCED)) {
list_del_init(&inode->i_lru);
spin_unlock(&inode->i_lock);
- inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
continue;
}
/* recently referenced inodes get one more pass */
if (inode->i_state & I_REFERENCED) {
inode->i_state &= ~I_REFERENCED;
- list_move(&inode->i_lru, &inode_lru);
+ list_move(&inode->i_lru, &sb->s_inode_lru);
spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
__iget(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&sb->s_inode_lru_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
- spin_lock(&inode_lru_lock);
+ spin_lock(&sb->s_inode_lru_lock);
- if (inode != list_entry(inode_lru.next,
+ if (inode != list_entry(sb->s_inode_lru.next,
struct inode, i_lru))
continue; /* wrong inode or list_empty */
/* avoid lock inversions with trylock */
@@ -693,51 +680,18 @@
spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &freeable);
- inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&sb->s_inode_lru_lock);
dispose_list(&freeable);
- up_read(&iprune_sem);
}
-/*
- * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
- * "unused" means that no dentries are referring to the inodes: the files are
- * not open and the dcache references to those inodes have already been
- * reclaimed.
- *
- * This function is passed the number of inodes to scan, and it returns the
- * total number of remaining possibly-reclaimable inodes.
- */
-static int shrink_icache_memory(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int nr = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (nr) {
- /*
- * Nasty deadlock avoidance. We may hold various FS locks,
- * and we don't want to recurse into the FS that called us
- * in clear_inode() and friends..
- */
- if (!(gfp_mask & __GFP_FS))
- return -1;
- prune_icache(nr);
- }
- return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker icache_shrinker = {
- .shrink = shrink_icache_memory,
- .seeks = DEFAULT_SEEKS,
-};
-
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
@@ -1331,7 +1285,7 @@
WARN_ON(inode->i_state & I_NEW);
- if (op && op->drop_inode)
+ if (op->drop_inode)
drop = op->drop_inode(inode);
else
drop = generic_drop_inode(inode);
@@ -1617,7 +1571,6 @@
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
init_once);
- register_shrinker(&icache_shrinker);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index b29c46e..fe327c2 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,6 +97,7 @@
* super.c
*/
extern int do_remount_sb(struct super_block *, int, void *, int);
+extern bool grab_super_passive(struct super_block *sb);
extern void __put_super(struct super_block *sb);
extern void put_super(struct super_block *sb);
extern struct dentry *mount_fs(struct file_system_type *,
@@ -135,3 +136,8 @@
extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *, bool);
+
+/*
+ * dcache.c
+ */
+extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 0542b6e..f20437c 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -254,19 +254,16 @@
char *tmpname;
struct iso_directory_record *tmpde;
struct inode *inode = filp->f_path.dentry->d_inode;
- struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
tmpname = (char *)__get_free_page(GFP_KERNEL);
if (tmpname == NULL)
return -ENOMEM;
- mutex_lock(&sbi->s_mutex);
tmpde = (struct iso_directory_record *) (tmpname+1024);
result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde);
free_page((unsigned long) tmpname);
- mutex_unlock(&sbi->s_mutex);
return result;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b3cc858..a5d0367 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -863,7 +863,6 @@
sbi->s_utf8 = opt.utf8;
sbi->s_nocompress = opt.nocompress;
sbi->s_overriderockperm = opt.overriderockperm;
- mutex_init(&sbi->s_mutex);
/*
* It would be incredibly stupid to allow people to mark every file
* on the disk as suid, so we merely allow them to set the default
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 2882dc0..7d33de8 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -55,7 +55,6 @@
gid_t s_gid;
uid_t s_uid;
struct nls_table *s_nls_iocharset; /* Native language support table */
- struct mutex s_mutex; /* replaces BKL, please remove if possible */
};
#define ISOFS_INVALID_MODE ((mode_t) -1)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 4fb3e80..1e2946f 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -168,7 +168,6 @@
int found;
unsigned long uninitialized_var(block);
unsigned long uninitialized_var(offset);
- struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);
struct inode *inode;
struct page *page;
@@ -176,21 +175,13 @@
if (!page)
return ERR_PTR(-ENOMEM);
- mutex_lock(&sbi->s_mutex);
found = isofs_find_entry(dir, dentry,
&block, &offset,
page_address(page),
1024 + page_address(page));
__free_page(page);
- inode = NULL;
- if (found) {
- inode = isofs_iget(dir->i_sb, block, offset);
- if (IS_ERR(inode)) {
- mutex_unlock(&sbi->s_mutex);
- return ERR_CAST(inode);
- }
- }
- mutex_unlock(&sbi->s_mutex);
+ inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL;
+
return d_splice_alias(inode, dentry);
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index f9cd04d..1fbc7de 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -678,7 +678,6 @@
init_rock_state(&rs, inode);
block = ei->i_iget5_block;
- mutex_lock(&sbi->s_mutex);
bh = sb_bread(inode->i_sb, block);
if (!bh)
goto out_noread;
@@ -748,7 +747,6 @@
goto fail;
brelse(bh);
*rpnt = '\0';
- mutex_unlock(&sbi->s_mutex);
SetPageUptodate(page);
kunmap(page);
unlock_page(page);
@@ -765,7 +763,6 @@
printk("symlink spans iso9660 blocks\n");
fail:
brelse(bh);
- mutex_unlock(&sbi->s_mutex);
error:
SetPageError(page);
kunmap(page);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 828a0e1..3675b3c 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,12 +259,12 @@
return rc;
}
-int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
+int jffs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
int rc;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 3119f59..5e42de8 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-extern int jffs2_check_acl(struct inode *, int, unsigned int);
+extern int jffs2_check_acl(struct inode *, int);
extern int jffs2_acl_chmod(struct inode *);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 4bca6a2..5f243cd 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -102,10 +102,8 @@
mutex_unlock(&dir_f->sem);
if (ino) {
inode = jffs2_iget(dir_i->i_sb, ino);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
printk(KERN_WARNING "iget() failed for ino #%u\n", ino);
- return ERR_CAST(inode);
- }
}
return d_splice_alias(inode, target);
@@ -822,7 +820,10 @@
if (victim_f) {
/* There was a victim. Kill it off nicely */
- drop_nlink(new_dentry->d_inode);
+ if (S_ISDIR(new_dentry->d_inode->i_mode))
+ clear_nlink(new_dentry->d_inode);
+ else
+ drop_nlink(new_dentry->d_inode);
/* Don't oops if the victim was a dirent pointing to an
inode which didn't exist. */
if (victim_f->inocache) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 1c0a08d..3989f7e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -27,13 +27,20 @@
struct page **pagep, void **fsdata);
static int jffs2_readpage (struct file *filp, struct page *pg);
-int jffs2_fsync(struct file *filp, int datasync)
+int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
+ mutex_unlock(&inode->i_mutex);
return 0;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 65c6c43..9c25283 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@
extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
-int jffs2_fsync(struct file *, int);
+int jffs2_fsync(struct file *, loff_t, loff_t, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
/* ioctl.c */
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e5de942..8a0a066 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,11 +114,11 @@
return rc;
}
-int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int jfs_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2f3f531..7527855 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -28,19 +28,26 @@
#include "jfs_acl.h"
#include "jfs_debug.h"
-int jfs_fsync(struct file *file, int datasync)
+int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
int rc = 0;
+ rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (rc)
+ return rc;
+
+ mutex_lock(&inode->i_mutex);
if (!(inode->i_state & I_DIRTY) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
+ mutex_unlock(&inode->i_mutex);
return rc;
}
rc |= jfs_commit_inode(inode, 1);
+ mutex_unlock(&inode->i_mutex);
return rc ? -EIO : 0;
}
@@ -110,6 +117,8 @@
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
+ inode_dio_wait(inode);
+
rc = vmtruncate(inode, iattr->ia_size);
if (rc)
return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 1096559..77b69b2 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -329,8 +329,8 @@
struct inode *inode = file->f_mapping->host;
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, jfs_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ jfs_get_block);
/*
* In case of error extending write may have instantiated a few
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index f9285c4..54e0755 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
#ifdef CONFIG_JFS_POSIX_ACL
-int jfs_check_acl(struct inode *, int, unsigned int flags);
+int jfs_check_acl(struct inode *, int);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_acl_chmod(struct inode *inode);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index ec2fb8b..9271cfe 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -21,7 +21,7 @@
struct fid;
extern struct inode *ialloc(struct inode *, umode_t);
-extern int jfs_fsync(struct file *, int);
+extern int jfs_fsync(struct file *, loff_t, loff_t, int);
extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
extern struct inode *jfs_iget(struct super_block *, unsigned long);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index eaaf2b5..03787ef 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1456,34 +1456,23 @@
ino_t inum;
struct inode *ip;
struct component_name key;
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
int rc;
- jfs_info("jfs_lookup: name = %s", name);
+ jfs_info("jfs_lookup: name = %s", dentry->d_name.name);
- if ((name[0] == '.') && (len == 1))
- inum = dip->i_ino;
- else if (strcmp(name, "..") == 0)
- inum = PARENT(dip);
- else {
- if ((rc = get_UCSname(&key, dentry)))
- return ERR_PTR(rc);
- rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
- free_UCSname(&key);
- if (rc == -ENOENT) {
- d_add(dentry, NULL);
- return NULL;
- } else if (rc) {
- jfs_err("jfs_lookup: dtSearch returned %d", rc);
- return ERR_PTR(rc);
- }
- }
-
- ip = jfs_iget(dip->i_sb, inum);
- if (IS_ERR(ip)) {
- jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum);
- return ERR_CAST(ip);
+ if ((rc = get_UCSname(&key, dentry)))
+ return ERR_PTR(rc);
+ rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
+ free_UCSname(&key);
+ if (rc == -ENOENT) {
+ ip = NULL;
+ } else if (rc) {
+ jfs_err("jfs_lookup: dtSearch returned %d", rc);
+ ip = ERR_PTR(rc);
+ } else {
+ ip = jfs_iget(dip->i_sb, inum);
+ if (IS_ERR(ip))
+ jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
}
return d_splice_alias(ip, dentry);
@@ -1597,8 +1586,6 @@
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd && nd->flags & LOOKUP_RCU)
- return -ECHILD;
/*
* This is not negative dentry. Always valid.
*
@@ -1624,10 +1611,8 @@
* case sensitive name which is specified by user if this is
* for creation.
*/
- if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
- if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
- return 0;
- }
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
return 1;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index c88eab5..c18e9a1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
+#include "internal.h"
+
static inline int simple_positive(struct dentry *dentry)
{
return dentry->d_inode && !d_unhashed(dentry);
@@ -246,13 +248,11 @@
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
- dentry = d_alloc(NULL, &d_name);
+ dentry = __d_alloc(s, &d_name);
if (!dentry) {
iput(root);
goto Enomem;
}
- dentry->d_sb = s;
- dentry->d_parent = dentry;
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_d_op = dops;
@@ -328,8 +328,10 @@
if (new_dentry->d_inode) {
simple_unlink(new_dir, new_dentry);
- if (they_are_dirs)
+ if (they_are_dirs) {
+ drop_nlink(new_dentry->d_inode);
drop_nlink(old_dir);
+ }
} else if (they_are_dirs) {
drop_nlink(old_dir);
inc_nlink(new_dir);
@@ -822,7 +824,7 @@
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtol(attr->set_buf, NULL, 0);
+ val = simple_strtoll(attr->set_buf, NULL, 0);
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
@@ -905,21 +907,29 @@
* filesystems which track all non-inode metadata in the buffers list
* hanging off the address_space structure.
*/
-int generic_file_fsync(struct file *file, int datasync)
+int generic_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_mapping->host;
int err;
int ret;
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY))
- return ret;
+ goto out;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return ret;
+ goto out;
err = sync_inode_metadata(inode, 1);
if (ret == 0)
ret = err;
+out:
+ mutex_unlock(&inode->i_mutex);
return ret;
}
EXPORT_SYMBOL(generic_file_fsync);
@@ -956,7 +966,7 @@
/*
* No-op implementation of ->fsync for in-memory filesystems.
*/
-int noop_fsync(struct file *file, int datasync)
+int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index 0a4f50d..b286539 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -160,10 +160,28 @@
static struct kmem_cache *filelock_cache __read_mostly;
+static void locks_init_lock_always(struct file_lock *fl)
+{
+ fl->fl_next = NULL;
+ fl->fl_fasync = NULL;
+ fl->fl_owner = NULL;
+ fl->fl_pid = 0;
+ fl->fl_nspid = NULL;
+ fl->fl_file = NULL;
+ fl->fl_flags = 0;
+ fl->fl_type = 0;
+ fl->fl_start = fl->fl_end = 0;
+}
+
/* Allocate an empty lock structure. */
struct file_lock *locks_alloc_lock(void)
{
- return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
+ struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL);
+
+ if (fl)
+ locks_init_lock_always(fl);
+
+ return fl;
}
EXPORT_SYMBOL_GPL(locks_alloc_lock);
@@ -200,17 +218,9 @@
INIT_LIST_HEAD(&fl->fl_link);
INIT_LIST_HEAD(&fl->fl_block);
init_waitqueue_head(&fl->fl_wait);
- fl->fl_next = NULL;
- fl->fl_fasync = NULL;
- fl->fl_owner = NULL;
- fl->fl_pid = 0;
- fl->fl_nspid = NULL;
- fl->fl_file = NULL;
- fl->fl_flags = 0;
- fl->fl_type = 0;
- fl->fl_start = fl->fl_end = 0;
fl->fl_ops = NULL;
fl->fl_lmops = NULL;
+ locks_init_lock_always(fl);
}
EXPORT_SYMBOL(locks_init_lock);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1afae26..b3ff3d8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -371,11 +371,9 @@
page_cache_release(page);
inode = logfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
ino, dir->i_ino, index);
- return ERR_CAST(inode);
- }
return d_splice_alias(inode, dentry);
}
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index c2ad702..b548c87 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,11 +219,20 @@
}
}
-int logfs_fsync(struct file *file, int datasync)
+int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct super_block *sb = file->f_mapping->host->i_sb;
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
logfs_write_anchor(sb);
+ mutex_unlock(&inode->i_mutex);
+
return 0;
}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 57afd4a..f22d108 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@
extern const struct address_space_operations logfs_reg_aops;
int logfs_readpage(struct file *file, struct page *page);
long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int logfs_fsync(struct file *file, int datasync);
+int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
/* gc.c */
u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index adcdc0a..e7d23e2 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -596,8 +596,7 @@
int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
- struct inode *dir = dentry->d_parent->d_inode;
- struct super_block *sb = dir->i_sb;
+ struct super_block *sb = dentry->d_sb;
generic_fillattr(dentry->d_inode, stat);
if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
diff --git a/fs/namei.c b/fs/namei.c
index 0223c41..b7fad00 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -176,12 +176,12 @@
/*
* This does basic POSIX ACL permission checking
*/
-static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
- int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
+static int acl_permission_check(struct inode *inode, int mask)
{
+ int (*check_acl)(struct inode *inode, int mask);
unsigned int mode = inode->i_mode;
- mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
if (current_user_ns() != inode_userns(inode))
goto other_perms;
@@ -189,8 +189,9 @@
if (current_fsuid() == inode->i_uid)
mode >>= 6;
else {
+ check_acl = inode->i_op->check_acl;
if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
- int error = check_acl(inode, mask, flags);
+ int error = check_acl(inode, mask);
if (error != -EAGAIN)
return error;
}
@@ -203,7 +204,7 @@
/*
* If the DACs are ok we don't need any capability check.
*/
- if ((mask & ~mode) == 0)
+ if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
return 0;
return -EACCES;
}
@@ -212,8 +213,6 @@
* generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- * @check_acl: optional callback to check for Posix ACLs
- * @flags: IPERM_FLAG_ flags.
*
* Used to check for read/write/execute permissions on a file.
* We use "fsuid" for this, letting us set arbitrary permissions
@@ -224,24 +223,32 @@
* request cannot be satisfied (eg. requires blocking or too much complexity).
* It would then be called again in ref-walk mode.
*/
-int generic_permission(struct inode *inode, int mask, unsigned int flags,
- int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
+int generic_permission(struct inode *inode, int mask)
{
int ret;
/*
* Do the basic POSIX ACL permission checks.
*/
- ret = acl_permission_check(inode, mask, flags, check_acl);
+ ret = acl_permission_check(inode, mask);
if (ret != -EACCES)
return ret;
+ if (S_ISDIR(inode->i_mode)) {
+ /* DACs are overridable for directories */
+ if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
+ return 0;
+ if (!(mask & MAY_WRITE))
+ if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
+ return 0;
+ return -EACCES;
+ }
/*
* Read/write DACs are always overridable.
- * Executable DACs are overridable for all directories and
- * for non-directories that have least one exec bit set.
+ * Executable DACs are overridable when there is
+ * at least one exec bit set.
*/
- if (!(mask & MAY_EXEC) || execute_ok(inode))
+ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
return 0;
@@ -249,7 +256,7 @@
* Searching includes executable on directories, else just read.
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
- if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
+ if (mask == MAY_READ)
if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
return 0;
@@ -288,10 +295,9 @@
}
if (inode->i_op->permission)
- retval = inode->i_op->permission(inode, mask, 0);
+ retval = inode->i_op->permission(inode, mask);
else
- retval = generic_permission(inode, mask, 0,
- inode->i_op->check_acl);
+ retval = generic_permission(inode, mask);
if (retval)
return retval;
@@ -304,69 +310,6 @@
}
/**
- * file_permission - check for additional access rights to a given file
- * @file: file to check access rights for
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- *
- * Used to check for read/write/execute permissions on an already opened
- * file.
- *
- * Note:
- * Do not use this function in new code. All access checks should
- * be done using inode_permission().
- */
-int file_permission(struct file *file, int mask)
-{
- return inode_permission(file->f_path.dentry->d_inode, mask);
-}
-
-/*
- * get_write_access() gets write permission for a file.
- * put_write_access() releases this write permission.
- * This is used for regular files.
- * We cannot support write (and maybe mmap read-write shared) accesses and
- * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
- * can have the following values:
- * 0: no writers, no VM_DENYWRITE mappings
- * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
- * > 0: (i_writecount) users are writing to the file.
- *
- * Normally we operate on that counter with atomic_{inc,dec} and it's safe
- * except for the cases where we don't hold i_writecount yet. Then we need to
- * use {get,deny}_write_access() - these functions check the sign and refuse
- * to do the change if sign is wrong. Exclusion between them is provided by
- * the inode->i_lock spinlock.
- */
-
-int get_write_access(struct inode * inode)
-{
- spin_lock(&inode->i_lock);
- if (atomic_read(&inode->i_writecount) < 0) {
- spin_unlock(&inode->i_lock);
- return -ETXTBSY;
- }
- atomic_inc(&inode->i_writecount);
- spin_unlock(&inode->i_lock);
-
- return 0;
-}
-
-int deny_write_access(struct file * file)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
-
- spin_lock(&inode->i_lock);
- if (atomic_read(&inode->i_writecount) > 0) {
- spin_unlock(&inode->i_lock);
- return -ETXTBSY;
- }
- atomic_dec(&inode->i_writecount);
- spin_unlock(&inode->i_lock);
-
- return 0;
-}
-
-/**
* path_get - get a reference to a path
* @path: path to get the reference to
*
@@ -433,6 +376,8 @@
goto err_parent;
BUG_ON(nd->inode != parent->d_inode);
} else {
+ if (dentry->d_parent != parent)
+ goto err_parent;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_child;
@@ -490,28 +435,6 @@
return dentry->d_op->d_revalidate(dentry, nd);
}
-static struct dentry *
-do_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- int status = d_revalidate(dentry, nd);
- if (unlikely(status <= 0)) {
- /*
- * The dentry failed validation.
- * If d_revalidate returned 0 attempt to invalidate
- * the dentry otherwise d_revalidate is asking us
- * to return a fail status.
- */
- if (status < 0) {
- dput(dentry);
- dentry = ERR_PTR(status);
- } else if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- }
- return dentry;
-}
-
/**
* complete_walk - successful completion of path walk
* @nd: pointer nameidata
@@ -566,40 +489,6 @@
return status;
}
-/*
- * Short-cut version of permission(), for calling on directories
- * during pathname resolution. Combines parts of permission()
- * and generic_permission(), and tests ONLY for MAY_EXEC permission.
- *
- * If appropriate, check DAC only. If not appropriate, or
- * short-cut DAC fails, then call ->permission() to do more
- * complete permission check.
- */
-static inline int exec_permission(struct inode *inode, unsigned int flags)
-{
- int ret;
- struct user_namespace *ns = inode_userns(inode);
-
- if (inode->i_op->permission) {
- ret = inode->i_op->permission(inode, MAY_EXEC, flags);
- } else {
- ret = acl_permission_check(inode, MAY_EXEC, flags,
- inode->i_op->check_acl);
- }
- if (likely(!ret))
- goto ok;
- if (ret == -ECHILD)
- return ret;
-
- if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
- ns_capable(ns, CAP_DAC_READ_SEARCH))
- goto ok;
-
- return ret;
-ok:
- return security_inode_exec_permission(inode, flags);
-}
-
static __always_inline void set_root(struct nameidata *nd)
{
if (!nd->root.mnt)
@@ -774,7 +663,7 @@
/* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
* and this is the terminal part of the path.
*/
- if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
+ if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
return -EISDIR; /* we actually want to stop here */
/* We want to mount if someone is trying to open/create a file of any
@@ -786,7 +675,7 @@
* appended a '/' to the name.
*/
if (!(flags & LOOKUP_FOLLOW) &&
- !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
+ !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE)))
return -EISDIR;
@@ -805,7 +694,7 @@
* the path being looked up; if it wasn't then the remainder of
* the path is inaccessible and we should say so.
*/
- if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
+ if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
return -EREMOTE;
return PTR_ERR(mnt);
}
@@ -940,7 +829,6 @@
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
- *inode = path->dentry->d_inode;
if (unlikely(managed_dentry_might_block(path->dentry)))
return false;
@@ -953,6 +841,12 @@
path->mnt = mounted;
path->dentry = mounted->mnt_root;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ /*
+ * Update the inode too. We don't need to re-check the
+ * dentry sequence number here after this d_inode read,
+ * because a mount-point is always pinned.
+ */
+ *inode = path->dentry->d_inode;
}
return true;
}
@@ -1127,6 +1021,30 @@
}
/*
+ * We already have a dentry, but require a lookup to be performed on the parent
+ * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
+ * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
+ * child exists while under i_mutex.
+ */
+static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct inode *inode = parent->d_inode;
+ struct dentry *old;
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(inode)))
+ return ERR_PTR(-ENOENT);
+
+ old = inode->i_op->lookup(inode, dentry, nd);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
+}
+
+/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
@@ -1165,6 +1083,8 @@
goto unlazy;
}
}
+ if (unlikely(d_need_lookup(dentry)))
+ goto unlazy;
path->mnt = mnt;
path->dentry = dentry;
if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1179,6 +1099,10 @@
dentry = __d_lookup(parent, name);
}
+ if (dentry && unlikely(d_need_lookup(dentry))) {
+ dput(dentry);
+ dentry = NULL;
+ }
retry:
if (unlikely(!dentry)) {
struct inode *dir = parent->d_inode;
@@ -1195,6 +1119,15 @@
/* known good */
need_reval = 0;
status = 1;
+ } else if (unlikely(d_need_lookup(dentry))) {
+ dentry = d_inode_lookup(parent, dentry, nd);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->i_mutex);
+ return PTR_ERR(dentry);
+ }
+ /* known good */
+ need_reval = 0;
+ status = 1;
}
mutex_unlock(&dir->i_mutex);
}
@@ -1227,13 +1160,13 @@
static inline int may_lookup(struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU) {
- int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD)
return err;
if (unlazy_walk(nd, NULL))
return -ECHILD;
}
- return exec_permission(nd->inode, 0);
+ return inode_permission(nd->inode, MAY_EXEC);
}
static inline int handle_dots(struct nameidata *nd, int type)
@@ -1347,7 +1280,6 @@
{
struct path next;
int err;
- unsigned int lookup_flags = nd->flags;
while (*name=='/')
name++;
@@ -1361,8 +1293,6 @@
unsigned int c;
int type;
- nd->flags |= LOOKUP_CONTINUE;
-
err = may_lookup(nd);
if (err)
break;
@@ -1424,8 +1354,6 @@
/* here ends the main loop */
last_component:
- /* Clear LOOKUP_CONTINUE iff it was previously unset */
- nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
nd->last = this;
nd->last_type = type;
return 0;
@@ -1508,7 +1436,7 @@
if (!S_ISDIR(dentry->d_inode->i_mode))
goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
+ retval = inode_permission(dentry->d_inode, MAY_EXEC);
if (retval)
goto fput_fail;
}
@@ -1646,16 +1574,22 @@
* @mnt: pointer to vfs mount of the base directory
* @name: pointer to file name
* @flags: lookup flags
- * @nd: pointer to nameidata
+ * @path: pointer to struct path to fill
*/
int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
- struct nameidata *nd)
+ struct path *path)
{
- nd->root.dentry = dentry;
- nd->root.mnt = mnt;
+ struct nameidata nd;
+ int err;
+ nd.root.dentry = dentry;
+ nd.root.mnt = mnt;
+ BUG_ON(flags & LOOKUP_PARENT);
/* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
- return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
+ err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
+ if (!err)
+ *path = nd.path;
+ return err;
}
static struct dentry *__lookup_hash(struct qstr *name,
@@ -1665,7 +1599,7 @@
struct dentry *dentry;
int err;
- err = exec_permission(inode, 0);
+ err = inode_permission(inode, MAY_EXEC);
if (err)
return ERR_PTR(err);
@@ -1676,8 +1610,34 @@
*/
dentry = d_lookup(base, name);
- if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
- dentry = do_revalidate(dentry, nd);
+ if (dentry && d_need_lookup(dentry)) {
+ /*
+ * __lookup_hash is called with the parent dir's i_mutex already
+ * held, so we are good to go here.
+ */
+ dentry = d_inode_lookup(base, dentry, nd);
+ if (IS_ERR(dentry))
+ return dentry;
+ }
+
+ if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ int status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ /*
+ * The dentry failed validation.
+ * If d_revalidate returned 0 attempt to invalidate
+ * the dentry otherwise d_revalidate is asking us
+ * to return a fail status.
+ */
+ if (status < 0) {
+ dput(dentry);
+ return ERR_PTR(status);
+ } else if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ }
if (!dentry)
dentry = d_alloc_and_lookup(base, name, nd);
@@ -2005,27 +1965,10 @@
return error;
}
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- * 00 - read-only
- * 01 - write-only
- * 10 - read-write
- * 11 - special
- * it is changed into
- * 00 - no permissions needed
- * 01 - read-permission
- * 10 - write-permission
- * 11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
static inline int open_to_namei_flags(int flag)
{
- if ((flag+1) & O_ACCMODE)
- flag++;
+ if ((flag & O_ACCMODE) == 3)
+ flag--;
return flag;
}
@@ -2320,35 +2263,29 @@
return file;
}
-/**
- * lookup_create - lookup a dentry, creating it if it doesn't exist
- * @nd: nameidata info
- * @is_dir: directory flag
- *
- * Simple function to lookup and return a dentry and create it
- * if it doesn't exist. Is SMP-safe.
- *
- * Returns with nd->path.dentry->d_inode->i_mutex locked.
- */
-struct dentry *lookup_create(struct nameidata *nd, int is_dir)
+struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
+ struct nameidata nd;
+ int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+ if (error)
+ return ERR_PTR(error);
- mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /////)
*/
- if (nd->last_type != LAST_NORM)
- goto fail;
- nd->flags &= ~LOOKUP_PARENT;
- nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL;
- nd->intent.open.flags = O_EXCL;
+ if (nd.last_type != LAST_NORM)
+ goto out;
+ nd.flags &= ~LOOKUP_PARENT;
+ nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ nd.intent.open.flags = O_EXCL;
/*
* Do the final lookup.
*/
- dentry = lookup_hash(nd);
+ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_hash(&nd);
if (IS_ERR(dentry))
goto fail;
@@ -2360,18 +2297,35 @@
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
- if (unlikely(!is_dir && nd->last.name[nd->last.len])) {
+ if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
dput(dentry);
dentry = ERR_PTR(-ENOENT);
+ goto fail;
}
+ *path = nd.path;
return dentry;
eexist:
dput(dentry);
dentry = ERR_PTR(-EEXIST);
fail:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out:
+ path_put(&nd.path);
return dentry;
}
-EXPORT_SYMBOL_GPL(lookup_create);
+EXPORT_SYMBOL(kern_path_create);
+
+struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
+{
+ char *tmp = getname(pathname);
+ struct dentry *res;
+ if (IS_ERR(tmp))
+ return ERR_CAST(tmp);
+ res = kern_path_create(dfd, tmp, path, is_dir);
+ putname(tmp);
+ return res;
+}
+EXPORT_SYMBOL(user_path_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
@@ -2421,54 +2375,46 @@
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
unsigned, dev)
{
- int error;
- char *tmp;
struct dentry *dentry;
- struct nameidata nd;
+ struct path path;
+ int error;
if (S_ISDIR(mode))
return -EPERM;
- error = user_path_parent(dfd, filename, &nd, &tmp);
- if (error)
- return error;
+ dentry = user_path_create(dfd, filename, &path, 0);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
- dentry = lookup_create(&nd, 0);
- if (IS_ERR(dentry)) {
- error = PTR_ERR(dentry);
- goto out_unlock;
- }
- if (!IS_POSIXACL(nd.path.dentry->d_inode))
+ if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = may_mknod(mode);
if (error)
goto out_dput;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_dput;
- error = security_path_mknod(&nd.path, dentry, mode, dev);
+ error = security_path_mknod(&path, dentry, mode, dev);
if (error)
goto out_drop_write;
switch (mode & S_IFMT) {
case 0: case S_IFREG:
- error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
+ error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
break;
case S_IFCHR: case S_IFBLK:
- error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,
+ error = vfs_mknod(path.dentry->d_inode,dentry,mode,
new_decode_dev(dev));
break;
case S_IFIFO: case S_IFSOCK:
- error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
+ error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
break;
}
out_drop_write:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
out_dput:
dput(dentry);
-out_unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- path_put(&nd.path);
- putname(tmp);
+ mutex_unlock(&path.dentry->d_inode->i_mutex);
+ path_put(&path);
return error;
}
@@ -2501,38 +2447,29 @@
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
{
- int error = 0;
- char * tmp;
struct dentry *dentry;
- struct nameidata nd;
+ struct path path;
+ int error;
- error = user_path_parent(dfd, pathname, &nd, &tmp);
- if (error)
- goto out_err;
-
- dentry = lookup_create(&nd, 1);
- error = PTR_ERR(dentry);
+ dentry = user_path_create(dfd, pathname, &path, 1);
if (IS_ERR(dentry))
- goto out_unlock;
+ return PTR_ERR(dentry);
- if (!IS_POSIXACL(nd.path.dentry->d_inode))
+ if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_dput;
- error = security_path_mkdir(&nd.path, dentry, mode);
+ error = security_path_mkdir(&path, dentry, mode);
if (error)
goto out_drop_write;
- error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+ error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
out_drop_write:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
out_dput:
dput(dentry);
-out_unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- path_put(&nd.path);
- putname(tmp);
-out_err:
+ mutex_unlock(&path.dentry->d_inode->i_mutex);
+ path_put(&path);
return error;
}
@@ -2792,38 +2729,31 @@
{
int error;
char *from;
- char *to;
struct dentry *dentry;
- struct nameidata nd;
+ struct path path;
from = getname(oldname);
if (IS_ERR(from))
return PTR_ERR(from);
- error = user_path_parent(newdfd, newname, &nd, &to);
- if (error)
- goto out_putname;
-
- dentry = lookup_create(&nd, 0);
+ dentry = user_path_create(newdfd, newname, &path, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto out_unlock;
+ goto out_putname;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_dput;
- error = security_path_symlink(&nd.path, dentry, from);
+ error = security_path_symlink(&path, dentry, from);
if (error)
goto out_drop_write;
- error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+ error = vfs_symlink(path.dentry->d_inode, dentry, from);
out_drop_write:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
out_dput:
dput(dentry);
-out_unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- path_put(&nd.path);
- putname(to);
+ mutex_unlock(&path.dentry->d_inode->i_mutex);
+ path_put(&path);
out_putname:
putname(from);
return error;
@@ -2888,11 +2818,9 @@
int, newdfd, const char __user *, newname, int, flags)
{
struct dentry *new_dentry;
- struct nameidata nd;
- struct path old_path;
+ struct path old_path, new_path;
int how = 0;
int error;
- char *to;
if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
@@ -2914,32 +2842,27 @@
if (error)
return error;
- error = user_path_parent(newdfd, newname, &nd, &to);
- if (error)
- goto out;
- error = -EXDEV;
- if (old_path.mnt != nd.path.mnt)
- goto out_release;
- new_dentry = lookup_create(&nd, 0);
+ new_dentry = user_path_create(newdfd, newname, &new_path, 0);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
- goto out_unlock;
- error = mnt_want_write(nd.path.mnt);
+ goto out;
+
+ error = -EXDEV;
+ if (old_path.mnt != new_path.mnt)
+ goto out_dput;
+ error = mnt_want_write(new_path.mnt);
if (error)
goto out_dput;
- error = security_path_link(old_path.dentry, &nd.path, new_dentry);
+ error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_drop_write;
- error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+ error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
out_drop_write:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(new_path.mnt);
out_dput:
dput(new_dentry);
-out_unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-out_release:
- path_put(&nd.path);
- putname(to);
+ mutex_unlock(&new_path.dentry->d_inode->i_mutex);
+ path_put(&new_path);
out:
path_put(&old_path);
@@ -3345,11 +3268,9 @@
EXPORT_SYMBOL(__page_symlink);
EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(kern_path_parent);
EXPORT_SYMBOL(kern_path);
EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(file_permission);
EXPORT_SYMBOL(unlock_rename);
EXPORT_SYMBOL(vfs_create);
EXPORT_SYMBOL(vfs_follow_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fe59bd1..cda50fe 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -934,8 +934,8 @@
int res = 0;
br_read_lock(vfsmount_lock);
- if (p->event != ns->event) {
- p->event = ns->event;
+ if (p->m.poll_event != ns->event) {
+ p->m.poll_event = ns->event;
res = 1;
}
br_read_unlock(vfsmount_lock);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 0ed65e0..64a3264 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -20,9 +20,9 @@
#include "ncp_fs.h"
-static int ncp_fsync(struct file *file, int datasync)
+static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- return 0;
+ return filemap_write_and_wait_range(file->f_mapping, start, end);
}
/*
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 8469031..c98b439 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -113,19 +113,18 @@
int nfs_cache_register(struct cache_detail *cd)
{
- struct nameidata nd;
struct vfsmount *mnt;
+ struct path path;
int ret;
mnt = rpc_get_mount();
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
+ ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path);
if (ret)
goto err;
- ret = sunrpc_cache_register_pipefs(nd.path.dentry,
- cd->name, 0600, cd);
- path_put(&nd.path);
+ ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
+ path_put(&path);
if (!ret)
return ret;
err:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ededdbd..57f578e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -56,7 +56,7 @@
static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
static int nfs_rename(struct inode *, struct dentry *,
struct inode *, struct dentry *);
-static int nfs_fsync_dir(struct file *, int);
+static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
static void nfs_readdir_clear_array(struct page*);
@@ -945,15 +945,19 @@
* All directory operations under NFS are synchronous, so fsync()
* is a dummy operation.
*/
-static int nfs_fsync_dir(struct file *filp, int datasync)
+static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
+ mutex_lock(&inode->i_mutex);
nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
+ mutex_unlock(&inode->i_mutex);
return 0;
}
@@ -997,14 +1001,12 @@
* Return the intent data that applies to this particular path component
*
* Note that the current set of intents only apply to the very last
- * component of the path.
- * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
+ * component of the path and none of them is set before that last
+ * component.
*/
static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
unsigned int mask)
{
- if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
- return 0;
return nd->flags & mask;
}
@@ -1338,25 +1340,31 @@
return 0;
/* Are we trying to write to a read only partition? */
if (__mnt_is_readonly(nd->path.mnt) &&
- (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
return 0;
return 1;
}
-static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
+static fmode_t flags_to_mode(int flags)
{
- struct path path = {
- .mnt = nd->path.mnt,
- .dentry = dentry,
- };
+ fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
+ if ((flags & O_ACCMODE) != O_WRONLY)
+ res |= FMODE_READ;
+ if ((flags & O_ACCMODE) != O_RDONLY)
+ res |= FMODE_WRITE;
+ return res;
+}
+
+static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
+{
struct nfs_open_context *ctx;
struct rpc_cred *cred;
- fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+ fmode_t fmode = flags_to_mode(open_flags);
cred = rpc_lookup_cred();
if (IS_ERR(cred))
return ERR_CAST(cred);
- ctx = alloc_nfs_open_context(&path, cred, fmode);
+ ctx = alloc_nfs_open_context(dentry, cred, fmode);
put_rpccred(cred);
if (ctx == NULL)
return ERR_PTR(-ENOMEM);
@@ -1376,13 +1384,13 @@
/* If the open_intent is for execute, we have an extra check to make */
if (ctx->mode & FMODE_EXEC) {
- ret = nfs_may_open(ctx->path.dentry->d_inode,
+ ret = nfs_may_open(ctx->dentry->d_inode,
ctx->cred,
nd->intent.open.flags);
if (ret < 0)
goto out;
}
- filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
+ filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
if (IS_ERR(filp))
ret = PTR_ERR(filp);
else
@@ -1420,12 +1428,13 @@
goto out;
}
- ctx = nameidata_to_nfs_open_context(dentry, nd);
+ open_flags = nd->intent.open.flags;
+
+ ctx = create_nfs_open_context(dentry, open_flags);
res = ERR_CAST(ctx);
if (IS_ERR(ctx))
goto out;
- open_flags = nd->intent.open.flags;
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
attr.ia_valid = ATTR_MODE;
@@ -1463,8 +1472,8 @@
res = d_add_unique(dentry, inode);
nfs_unblock_sillyrename(dentry->d_parent);
if (res != NULL) {
- dput(ctx->path.dentry);
- ctx->path.dentry = dget(res);
+ dput(ctx->dentry);
+ ctx->dentry = dget(res);
dentry = res;
}
err = nfs_intent_set_file(nd, ctx);
@@ -1517,7 +1526,7 @@
/* We can't create new files, or truncate existing ones here */
openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
- ctx = nameidata_to_nfs_open_context(dentry, nd);
+ ctx = create_nfs_open_context(dentry, openflags);
ret = PTR_ERR(ctx);
if (IS_ERR(ctx))
goto out;
@@ -1570,7 +1579,7 @@
struct nfs_open_context *ctx = NULL;
struct iattr attr;
int error;
- int open_flags = 0;
+ int open_flags = O_CREAT|O_EXCL;
dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1578,27 +1587,27 @@
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
- if ((nd->flags & LOOKUP_CREATE) != 0) {
+ if (nd)
open_flags = nd->intent.open.flags;
- ctx = nameidata_to_nfs_open_context(dentry, nd);
- error = PTR_ERR(ctx);
- if (IS_ERR(ctx))
- goto out_err_drop;
- }
+ ctx = create_nfs_open_context(dentry, open_flags);
+ error = PTR_ERR(ctx);
+ if (IS_ERR(ctx))
+ goto out_err_drop;
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
if (error != 0)
goto out_put_ctx;
- if (ctx != NULL) {
+ if (nd) {
error = nfs_intent_set_file(nd, ctx);
if (error < 0)
goto out_err;
+ } else {
+ put_nfs_open_context(ctx);
}
return 0;
out_put_ctx:
- if (ctx != NULL)
- put_nfs_open_context(ctx);
+ put_nfs_open_context(ctx);
out_err_drop:
d_drop(dentry);
out_err:
@@ -1660,7 +1669,7 @@
{
struct iattr attr;
int error;
- int open_flags = 0;
+ int open_flags = O_CREAT|O_EXCL;
dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1668,7 +1677,7 @@
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
- if ((nd->flags & LOOKUP_CREATE) != 0)
+ if (nd)
open_flags = nd->intent.open.flags;
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
@@ -2259,11 +2268,11 @@
{
int mask = 0;
- if (openflags & FMODE_READ)
+ if ((openflags & O_ACCMODE) != O_WRONLY)
mask |= MAY_READ;
- if (openflags & FMODE_WRITE)
+ if ((openflags & O_ACCMODE) != O_RDONLY)
mask |= MAY_WRITE;
- if (openflags & FMODE_EXEC)
+ if (openflags & __FMODE_EXEC)
mask |= MAY_EXEC;
return mask;
}
@@ -2273,12 +2282,12 @@
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
-int nfs_permission(struct inode *inode, int mask, unsigned int flags)
+int nfs_permission(struct inode *inode, int mask)
{
struct rpc_cred *cred;
int res = 0;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
@@ -2328,7 +2337,7 @@
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
- res = generic_permission(inode, mask, flags, NULL);
+ res = generic_permission(inode, mask);
goto out;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8eea253..b35d25b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -284,7 +284,7 @@
loff_t pos)
{
struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
@@ -715,7 +715,7 @@
loff_t pos, int sync)
{
struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
struct rpc_task *task;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2f093ed..28b8c3f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -55,7 +55,7 @@
static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *, fl_owner_t id);
-static int nfs_file_fsync(struct file *, int datasync);
+static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -187,8 +187,11 @@
filp->f_path.dentry->d_name.name,
offset, origin);
- /* origin == SEEK_END => we must revalidate the cached file length */
- if (origin == SEEK_END) {
+ /*
+ * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+ * the cached file length
+ */
+ if (origin != SEEK_SET || origin != SEEK_CUR) {
struct inode *inode = filp->f_mapping->host;
int retval = nfs_revalidate_file_size(inode, filp);
@@ -305,7 +308,7 @@
* fall back to doing a synchronous write.
*/
static int
-nfs_file_fsync(struct file *file, int datasync)
+nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -313,11 +316,15 @@
int have_error, status;
int ret = 0;
-
dprintk("NFS: fsync file(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
@@ -329,6 +336,7 @@
if (!ret && !datasync)
/* application has asked for meta-data sync */
ret = pnfs_layoutcommit_inode(inode, true);
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ce153a6..419119c 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -259,12 +259,10 @@
dfprintk(FSCACHE,
"NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
- /* Need to invalidate any mapped pages that were read in before
- * turning off the cache.
+ /* Need to uncache any pages attached to this inode that
+ * fscache knows about before turning off the cache.
*/
- if (inode->i_mapping && inode->i_mapping->nrpages)
- invalidate_inode_pages2(inode->i_mapping);
-
+ fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
nfs_fscache_zap_inode_cookie(inode);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f4850d..fe12037 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -567,7 +567,7 @@
struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
{
struct nfs_lock_context *res, *new = NULL;
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
@@ -594,7 +594,7 @@
void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
{
struct nfs_open_context *ctx = l_ctx->open_context;
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
return;
@@ -620,7 +620,7 @@
return;
if (!is_sync)
return;
- inode = ctx->path.dentry->d_inode;
+ inode = ctx->dentry->d_inode;
if (!list_empty(&NFS_I(inode)->open_files))
return;
server = NFS_SERVER(inode);
@@ -629,14 +629,14 @@
nfs_revalidate_inode(server, inode);
}
-struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
{
struct nfs_open_context *ctx;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
- ctx->path = *path;
- path_get(&ctx->path);
+ nfs_sb_active(dentry->d_sb);
+ ctx->dentry = dget(dentry);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->mode = f_mode;
@@ -658,7 +658,8 @@
static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
{
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
+ struct super_block *sb = ctx->dentry->d_sb;
if (!list_empty(&ctx->list)) {
if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
@@ -671,7 +672,8 @@
NFS_PROTO(inode)->close_context(ctx, is_sync);
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
- path_put(&ctx->path);
+ dput(ctx->dentry);
+ nfs_sb_deactive(sb);
kfree(ctx);
}
@@ -741,7 +743,7 @@
cred = rpc_lookup_cred();
if (IS_ERR(cred))
return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
+ ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
put_rpccred(cred);
if (ctx == NULL)
return -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c30aed2..1909ee8 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -241,7 +241,7 @@
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
+extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -344,8 +344,8 @@
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
-extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
+extern void nfs4_close_state(struct nfs4_state *, fmode_t);
+extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
@@ -378,8 +378,8 @@
#else
-#define nfs4_close_state(a, b, c) do { } while (0)
-#define nfs4_close_sync(a, b, c) do { } while (0)
+#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_sync(a, b) do { } while (0)
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 93ef776..079614d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -766,8 +766,8 @@
struct nfs_open_confirmres c_res;
struct nfs_fattr f_attr;
struct nfs_fattr dir_attr;
- struct path path;
struct dentry *dir;
+ struct dentry *dentry;
struct nfs4_state_owner *owner;
struct nfs4_state *state;
struct iattr attrs;
@@ -789,12 +789,12 @@
nfs_fattr_init(&p->dir_attr);
}
-static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct nfs4_state_owner *sp, fmode_t fmode, int flags,
const struct iattr *attrs,
gfp_t gfp_mask)
{
- struct dentry *parent = dget_parent(path->dentry);
+ struct dentry *parent = dget_parent(dentry);
struct inode *dir = parent->d_inode;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_opendata *p;
@@ -805,8 +805,8 @@
p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
if (p->o_arg.seqid == NULL)
goto err_free;
- path_get(path);
- p->path = *path;
+ nfs_sb_active(dentry->d_sb);
+ p->dentry = dget(dentry);
p->dir = parent;
p->owner = sp;
atomic_inc(&sp->so_count);
@@ -815,7 +815,7 @@
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
p->o_arg.clientid = server->nfs_client->cl_clientid;
p->o_arg.id = sp->so_owner_id.id;
- p->o_arg.name = &p->path.dentry->d_name;
+ p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
p->o_arg.bitmask = server->attr_bitmask;
p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
@@ -845,13 +845,15 @@
{
struct nfs4_opendata *p = container_of(kref,
struct nfs4_opendata, kref);
+ struct super_block *sb = p->dentry->d_sb;
nfs_free_seqid(p->o_arg.seqid);
if (p->state != NULL)
nfs4_put_open_state(p->state);
nfs4_put_state_owner(p->owner);
dput(p->dir);
- path_put(&p->path);
+ dput(p->dentry);
+ nfs_sb_deactive(sb);
kfree(p);
}
@@ -1133,7 +1135,7 @@
{
struct nfs4_opendata *opendata;
- opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
+ opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS);
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
@@ -1157,7 +1159,7 @@
newstate = nfs4_opendata_to_nfs4_state(opendata);
if (IS_ERR(newstate))
return PTR_ERR(newstate);
- nfs4_close_state(&opendata->path, newstate, fmode);
+ nfs4_close_state(newstate, fmode);
*res = newstate;
return 0;
}
@@ -1355,7 +1357,7 @@
goto out_free;
state = nfs4_opendata_to_nfs4_state(data);
if (!IS_ERR(state))
- nfs4_close_state(&data->path, state, data->o_arg.fmode);
+ nfs4_close_state(state, data->o_arg.fmode);
out_free:
nfs4_opendata_put(data);
}
@@ -1500,7 +1502,7 @@
goto out_free;
state = nfs4_opendata_to_nfs4_state(data);
if (!IS_ERR(state))
- nfs4_close_state(&data->path, state, data->o_arg.fmode);
+ nfs4_close_state(state, data->o_arg.fmode);
out_free:
nfs4_opendata_put(data);
}
@@ -1651,7 +1653,7 @@
return PTR_ERR(opendata);
ret = nfs4_open_recover(opendata, state);
if (ret == -ESTALE)
- d_drop(ctx->path.dentry);
+ d_drop(ctx->dentry);
nfs4_opendata_put(opendata);
return ret;
}
@@ -1723,7 +1725,7 @@
/*
* Returns a referenced nfs4_state
*/
-static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
@@ -1740,15 +1742,15 @@
status = nfs4_recover_expired_lease(server);
if (status != 0)
goto err_put_state_owner;
- if (path->dentry->d_inode != NULL)
- nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
+ if (dentry->d_inode != NULL)
+ nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
status = -ENOMEM;
- opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
+ opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL);
if (opendata == NULL)
goto err_put_state_owner;
- if (path->dentry->d_inode != NULL)
- opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
+ if (dentry->d_inode != NULL)
+ opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
status = _nfs4_proc_open(opendata);
if (status != 0)
@@ -1786,14 +1788,14 @@
}
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
do {
- status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res);
+ status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1890,7 +1892,6 @@
}
struct nfs4_closedata {
- struct path path;
struct inode *inode;
struct nfs4_state *state;
struct nfs_closeargs arg;
@@ -1905,13 +1906,14 @@
{
struct nfs4_closedata *calldata = data;
struct nfs4_state_owner *sp = calldata->state->owner;
+ struct super_block *sb = calldata->state->inode->i_sb;
if (calldata->roc)
pnfs_roc_release(calldata->state->inode);
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
- path_put(&calldata->path);
+ nfs_sb_deactive(sb);
kfree(calldata);
}
@@ -2031,7 +2033,7 @@
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
+int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -2067,8 +2069,7 @@
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
calldata->roc = roc;
- path_get(path);
- calldata->path = *path;
+ nfs_sb_active(calldata->inode->i_sb);
msg.rpc_argp = &calldata->arg;
msg.rpc_resp = &calldata->res;
@@ -2097,7 +2098,7 @@
struct nfs4_state *state;
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
+ state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
if (IS_ERR(state))
return ERR_CAST(state);
ctx->state = state;
@@ -2109,9 +2110,9 @@
if (ctx->state == NULL)
return;
if (is_sync)
- nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
+ nfs4_close_sync(ctx->state, ctx->mode);
else
- nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+ nfs4_close_state(ctx->state, ctx->mode);
}
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
@@ -2634,10 +2635,7 @@
nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags, struct nfs_open_context *ctx)
{
- struct path my_path = {
- .dentry = dentry,
- };
- struct path *path = &my_path;
+ struct dentry *de = dentry;
struct nfs4_state *state;
struct rpc_cred *cred = NULL;
fmode_t fmode = 0;
@@ -2645,11 +2643,11 @@
if (ctx != NULL) {
cred = ctx->cred;
- path = &ctx->path;
+ de = ctx->dentry;
fmode = ctx->mode;
}
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
+ state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -2660,7 +2658,7 @@
if (ctx != NULL)
ctx->state = state;
else
- nfs4_close_sync(path, state, fmode);
+ nfs4_close_sync(state, fmode);
out:
return status;
}
@@ -4312,7 +4310,7 @@
memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
sizeof(data->lsp->ls_stateid.data));
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
- renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
}
out:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5d744a5..72ab97e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -641,7 +641,7 @@
/*
* Close the current file.
*/
-static void __nfs4_close(struct path *path, struct nfs4_state *state,
+static void __nfs4_close(struct nfs4_state *state,
fmode_t fmode, gfp_t gfp_mask, int wait)
{
struct nfs4_state_owner *owner = state->owner;
@@ -685,18 +685,18 @@
} else {
bool roc = pnfs_roc(state->inode);
- nfs4_do_close(path, state, gfp_mask, wait, roc);
+ nfs4_do_close(state, gfp_mask, wait, roc);
}
}
-void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
{
- __nfs4_close(path, state, fmode, GFP_NOFS, 0);
+ __nfs4_close(state, fmode, GFP_NOFS, 0);
}
-void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
+void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
{
- __nfs4_close(path, state, fmode, GFP_KERNEL, 1);
+ __nfs4_close(state, fmode, GFP_KERNEL, 1);
}
/*
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7139dbf..b60970c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -114,7 +114,7 @@
if (!nfs_lock_request_dontget(req))
return 0;
if (test_bit(PG_MAPPED, &req->wb_flags))
- radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
@@ -124,7 +124,7 @@
void nfs_clear_page_tag_locked(struct nfs_page *req)
{
if (test_bit(PG_MAPPED, &req->wb_flags)) {
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cba228..2171c04 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -159,7 +159,7 @@
static void nfs_readpage_release(struct nfs_page *req)
{
- struct inode *d_inode = req->wb_context->path.dentry->d_inode;
+ struct inode *d_inode = req->wb_context->dentry->d_inode;
if (PageUptodate(req->wb_page))
nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
@@ -167,8 +167,8 @@
unlock_page(req->wb_page);
dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
- req->wb_context->path.dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
@@ -220,7 +220,7 @@
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
data->req = req;
data->inode = inode;
@@ -243,7 +243,7 @@
static int nfs_do_read(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
- struct inode *inode = data->args.context->path.dentry->d_inode;
+ struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce40e5c..b961cea 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2773,16 +2773,12 @@
static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
const char *export_path)
{
- struct nameidata *nd = NULL;
struct mnt_namespace *ns_private;
struct super_block *s;
struct dentry *dentry;
+ struct path path;
int ret;
- nd = kmalloc(sizeof(*nd), GFP_KERNEL);
- if (nd == NULL)
- return ERR_PTR(-ENOMEM);
-
ns_private = create_mnt_ns(root_mnt);
ret = PTR_ERR(ns_private);
if (IS_ERR(ns_private))
@@ -2793,7 +2789,7 @@
goto out_put_mnt_ns;
ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
- export_path, LOOKUP_FOLLOW, nd);
+ export_path, LOOKUP_FOLLOW, &path);
nfs_referral_loop_unprotect();
put_mnt_ns(ns_private);
@@ -2801,12 +2797,11 @@
if (ret != 0)
goto out_err;
- s = nd->path.mnt->mnt_sb;
+ s = path.mnt->mnt_sb;
atomic_inc(&s->s_active);
- dentry = dget(nd->path.dentry);
+ dentry = dget(path.dentry);
- path_put(&nd->path);
- kfree(nd);
+ path_put(&path);
down_write(&s->s_umount);
return dentry;
out_put_mnt_ns:
@@ -2814,7 +2809,6 @@
out_mntput:
mntput(root_mnt);
out_err:
- kfree(nd);
return ERR_PTR(ret);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9fba527..ebed518 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -409,7 +409,7 @@
*/
static void nfs_inode_remove_request(struct nfs_page *req)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
BUG_ON (!NFS_WBACK_BUSY(req));
@@ -438,7 +438,7 @@
static void
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
@@ -850,13 +850,13 @@
unsigned int count, unsigned int offset,
int how)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
data->req = req;
- data->inode = inode = req->wb_context->path.dentry->d_inode;
+ data->inode = inode = req->wb_context->dentry->d_inode;
data->cred = req->wb_context->cred;
data->args.fh = NFS_FH(inode);
@@ -889,7 +889,7 @@
const struct rpc_call_ops *call_ops,
int how)
{
- struct inode *inode = data->args.context->path.dentry->d_inode;
+ struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
@@ -1081,9 +1081,9 @@
dprintk("NFS: %5u write(%s/%lld %d@%lld)",
task->tk_pid,
- data->req->wb_context->path.dentry->d_inode->i_sb->s_id,
+ data->req->wb_context->dentry->d_inode->i_sb->s_id,
(long long)
- NFS_FILEID(data->req->wb_context->path.dentry->d_inode),
+ NFS_FILEID(data->req->wb_context->dentry->d_inode),
data->req->wb_bytes, (long long)req_offset(data->req));
nfs_writeback_done(task, data);
@@ -1176,8 +1176,8 @@
dprintk("NFS: %5u write (%s/%lld %d@%lld)",
data->task.tk_pid,
- req->wb_context->path.dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
@@ -1375,7 +1375,7 @@
struct pnfs_layout_segment *lseg)
{
struct nfs_page *first = nfs_list_entry(head->next);
- struct inode *inode = first->wb_context->path.dentry->d_inode;
+ struct inode *inode = first->wb_context->dentry->d_inode;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1463,8 +1463,8 @@
nfs_clear_request_commit(req);
dprintk("NFS: commit (%s/%lld %d@%lld)",
- req->wb_context->path.dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_context->dentry->d_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ffb59ef..29d77f6 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -191,52 +191,42 @@
}
static int
-nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
+nfsd4_list_rec_dir(recdir_func *f)
{
const struct cred *original_cred;
- struct file *filp;
+ struct dentry *dir = rec_file->f_path.dentry;
LIST_HEAD(names);
- struct name_list *entry;
- struct dentry *dentry;
int status;
- if (!rec_file)
- return 0;
-
status = nfs4_save_creds(&original_cred);
if (status < 0)
return status;
- filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY,
- current_cred());
- status = PTR_ERR(filp);
- if (IS_ERR(filp))
- goto out;
- status = vfs_readdir(filp, nfsd4_build_namelist, &names);
- fput(filp);
+ status = vfs_llseek(rec_file, 0, SEEK_SET);
+ if (status < 0) {
+ nfs4_reset_creds(original_cred);
+ return status;
+ }
+
+ status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
while (!list_empty(&names)) {
+ struct name_list *entry;
entry = list_entry(names.next, struct name_list, list);
-
- dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
- if (IS_ERR(dentry)) {
- status = PTR_ERR(dentry);
- break;
+ if (!status) {
+ struct dentry *dentry;
+ dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ break;
+ }
+ status = f(dir, dentry);
+ dput(dentry);
}
- status = f(dir, dentry);
- dput(dentry);
- if (status)
- break;
list_del(&entry->list);
kfree(entry);
}
mutex_unlock(&dir->d_inode->i_mutex);
-out:
- while (!list_empty(&names)) {
- entry = list_entry(names.next, struct name_list, list);
- list_del(&entry->list);
- kfree(entry);
- }
nfs4_reset_creds(original_cred);
return status;
}
@@ -322,7 +312,7 @@
status = mnt_want_write(rec_file->f_path.mnt);
if (status)
goto out;
- status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old);
+ status = nfsd4_list_rec_dir(purge_old);
if (status == 0)
vfs_fsync(rec_file, 0);
mnt_drop_write(rec_file->f_path.mnt);
@@ -352,7 +342,7 @@
if (!rec_file)
return 0;
- status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir);
+ status = nfsd4_list_rec_dir(load_recdir);
if (status)
printk("nfsd4: failed loading clients from recovery"
" directory %s\n", rec_file->f_path.dentry->d_name.name);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index d7eeca6..2660152 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -27,7 +27,7 @@
#include "nilfs.h"
#include "segment.h"
-int nilfs_sync_file(struct file *file, int datasync)
+int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
/*
* Called from fsync() system call
@@ -40,8 +40,15 @@
struct inode *inode = file->f_mapping->host;
int err;
- if (!nilfs_inode_dirty(inode))
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&inode->i_mutex);
+
+ if (!nilfs_inode_dirty(inode)) {
+ mutex_unlock(&inode->i_mutex);
return 0;
+ }
if (datasync)
err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0,
@@ -49,6 +56,7 @@
else
err = nilfs_construct_segment(inode->i_sb);
+ mutex_unlock(&inode->i_mutex);
return err;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9b45fc..666628b 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -259,8 +259,8 @@
return 0;
/* Needs synchronization with the cleaner */
- size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, nilfs_get_block, NULL);
+ size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ nilfs_get_block);
/*
* In case of error extending write may have instantiated a few
@@ -778,6 +778,8 @@
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
+ inode_dio_wait(inode);
+
err = vmtruncate(inode, iattr->ia_size);
if (unlikely(err))
goto out_err;
@@ -799,14 +801,14 @@
return err;
}
-int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
+int nilfs_permission(struct inode *inode, int mask)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
if ((mask & MAY_WRITE) && root &&
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(inode, mask, flags, NULL);
+ return generic_permission(inode, mask);
}
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 546849b..a314199 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -72,12 +72,7 @@
return ERR_PTR(-ENAMETOOLONG);
ino = nilfs_inode_by_name(dir, &dentry->d_name);
- inode = NULL;
- if (ino) {
- inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
+ inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
return d_splice_alias(inode, dentry);
}
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f02b9ad..255d5e1 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -235,7 +235,7 @@
struct page *, struct inode *);
/* file.c */
-extern int nilfs_sync_file(struct file *, int);
+extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
/* ioctl.c */
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
@@ -264,7 +264,7 @@
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
extern int nilfs_setattr(struct dentry *, struct iattr *);
-int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
+int nilfs_permission(struct inode *inode, int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 0f48e7c..99e3610 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,13 +1527,20 @@
* this problem for now. We do write the $BITMAP attribute if it is present
* which is the important one for a directory so things are not too bad.
*/
-static int ntfs_dir_fsync(struct file *filp, int datasync)
+static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *bmp_vi, *vi = filp->f_mapping->host;
int err, ret;
ntfs_attr na;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(!S_ISDIR(vi->i_mode));
/* If the bitmap attribute inode is in memory sync it, too. */
na.mft_no = vi->i_ino;
@@ -1555,6 +1562,7 @@
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f4b1057..c587e2d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1832,9 +1832,8 @@
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
- down_write(&vi->i_alloc_sem);
+ inode_dio_wait(vi);
err = ntfs_truncate(vi);
- up_write(&vi->i_alloc_sem);
if (err || NInoTruncateFailed(ni)) {
if (!err)
err = -EIO;
@@ -2153,12 +2152,19 @@
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now.
*/
-static int ntfs_file_fsync(struct file *filp, int datasync)
+static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *vi = filp->f_mapping->host;
int err, ret = 0;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
ret = __ntfs_write_inode(vi, 1);
@@ -2176,6 +2182,7 @@
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c05d6dc..1371487 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2357,12 +2357,7 @@
*
* Returns 0 on success or -errno on error.
*
- * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for
- * writing. The only case in the kernel where ->i_alloc_sem is not held is
- * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset. The analogous place in NTFS is in
- * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
- * without holding ->i_alloc_sem.
+ * Called with ->i_mutex held.
*/
int ntfs_truncate(struct inode *vi)
{
@@ -2887,8 +2882,7 @@
* We also abort all changes of user, group, and mode as we do not implement
* the NTFS ACLs yet.
*
- * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also
- * called with ->i_alloc_sem held for writing.
+ * Called with ->i_mutex held.
*/
int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
{
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e913ad1..1cee970 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -290,14 +290,14 @@
return ret;
}
-int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
+int ocfs2_check_acl(struct inode *inode, int mask)
{
struct ocfs2_super *osb;
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret = -EAGAIN;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
osb = OCFS2_SB(inode->i_sb);
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 4fe7c9c..5c5d31f 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@
__le32 e_id;
};
-extern int ocfs2_check_acl(struct inode *, int, unsigned int);
+extern int ocfs2_check_acl(struct inode *, int);
extern int ocfs2_acl_chmod(struct inode *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ac97bca..c1efe93 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -551,9 +551,8 @@
/*
* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
- * particularly interested in the aio/dio case. Like the core uses
- * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
- * truncation on another.
+ * particularly interested in the aio/dio case. We use the rw_lock DLM lock
+ * to protect io on one node from truncation on another.
*/
static void ocfs2_dio_end_io(struct kiocb *iocb,
loff_t offset,
@@ -568,10 +567,8 @@
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
- if (ocfs2_iocb_is_sem_locked(iocb)) {
- up_read(&inode->i_alloc_sem);
+ if (ocfs2_iocb_is_sem_locked(iocb))
ocfs2_iocb_clear_sem_locked(iocb);
- }
ocfs2_iocb_clear_rw_locked(iocb);
@@ -580,6 +577,7 @@
if (is_async)
aio_complete(iocb, ret, 0);
+ inode_dio_done(inode);
}
/*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b1e35a3..0fc2bd3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -171,7 +171,8 @@
return 0;
}
-static int ocfs2_sync_file(struct file *file, int datasync)
+static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
int err = 0;
journal_t *journal;
@@ -184,6 +185,16 @@
file->f_path.dentry->d_name.name,
(unsigned long long)datasync);
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ /*
+ * Probably don't need the i_mutex at all in here, just putting it here
+ * to be consistent with how fsync used to be called, someone more
+ * familiar with the fs could possibly remove it.
+ */
+ mutex_lock(&inode->i_mutex);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
/*
* We still have to flush drive's caches to get data to the
@@ -200,6 +211,7 @@
bail:
if (err)
mlog_errno(err);
+ mutex_unlock(&inode->i_mutex);
return (err < 0) ? -EIO : 0;
}
@@ -1142,6 +1154,8 @@
if (status)
goto bail_unlock;
+ inode_dio_wait(inode);
+
if (i_size_read(inode) > attr->ia_size) {
if (ocfs2_should_order_data(inode)) {
status = ocfs2_begin_ordered_truncate(inode,
@@ -1279,11 +1293,11 @@
return err;
}
-int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
+int ocfs2_permission(struct inode *inode, int mask)
{
int ret;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1293,7 +1307,7 @@
goto out;
}
- ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
+ ret = generic_permission(inode, mask);
ocfs2_inode_unlock(inode, 0);
out:
@@ -2236,9 +2250,8 @@
ocfs2_iocb_clear_sem_locked(iocb);
relock:
- /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
+ /* to match setattr's i_mutex -> rw_lock ordering */
if (direct_io) {
- down_read(&inode->i_alloc_sem);
have_alloc_sem = 1;
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2303,6 @@
*/
if (direct_io && !can_do_direct) {
ocfs2_rw_unlock(inode, rw_level);
- up_read(&inode->i_alloc_sem);
have_alloc_sem = 0;
rw_level = -1;
@@ -2361,8 +2373,7 @@
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
- * it can unlock our rw lock. (it's the clustered equivalent of
- * i_alloc_sem; protects truncate from racing with pending ios).
+ * it can unlock our rw lock.
* Unfortunately there are error cases which call end_io and others
* that don't. so we don't have to unlock the rw_lock if either an
* async dio is going to do it in the future or an end_io after an
@@ -2378,10 +2389,8 @@
ocfs2_rw_unlock(inode, rw_level);
out_sems:
- if (have_alloc_sem) {
- up_read(&inode->i_alloc_sem);
+ if (have_alloc_sem)
ocfs2_iocb_clear_sem_locked(iocb);
- }
mutex_unlock(&inode->i_mutex);
@@ -2531,7 +2540,6 @@
* need locks to protect pending reads from racing with truncate.
*/
if (filp->f_flags & O_DIRECT) {
- down_read(&inode->i_alloc_sem);
have_alloc_sem = 1;
ocfs2_iocb_set_sem_locked(iocb);
@@ -2574,10 +2582,9 @@
}
bail:
- if (have_alloc_sem) {
- up_read(&inode->i_alloc_sem);
+ if (have_alloc_sem)
ocfs2_iocb_clear_sem_locked(iocb);
- }
+
if (rw_level != -1)
ocfs2_rw_unlock(inode, rw_level);
@@ -2593,12 +2600,14 @@
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
.fiemap = ocfs2_fiemap,
+ .check_acl = ocfs2_check_acl,
};
const struct inode_operations ocfs2_special_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
+ .check_acl = ocfs2_check_acl,
};
/*
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index f5afbbe..97bf761 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
+int ocfs2_permission(struct inode *inode, int mask);
int ocfs2_should_update_atime(struct inode *inode,
struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e5d738c..33889dc 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2498,4 +2498,5 @@
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
.fiemap = ocfs2_fiemap,
+ .check_acl = ocfs2_check_acl,
};
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ebfd382..cf78233 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4368,25 +4368,6 @@
return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}
-/* copied from user_path_parent. */
-static int ocfs2_user_path_parent(const char __user *path,
- struct nameidata *nd, char **name)
-{
- char *s = getname(path);
- int error;
-
- if (IS_ERR(s))
- return PTR_ERR(s);
-
- error = kern_path_parent(s, nd);
- if (error)
- putname(s);
- else
- *name = s;
-
- return error;
-}
-
/**
* ocfs2_vfs_reflink - Create a reference-counted link
*
@@ -4460,10 +4441,8 @@
bool preserve)
{
struct dentry *new_dentry;
- struct nameidata nd;
- struct path old_path;
+ struct path old_path, new_path;
int error;
- char *to = NULL;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
@@ -4474,39 +4453,33 @@
return error;
}
- error = ocfs2_user_path_parent(newname, &nd, &to);
- if (error) {
+ new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
+ error = PTR_ERR(new_dentry);
+ if (IS_ERR(new_dentry)) {
mlog_errno(error);
goto out;
}
error = -EXDEV;
- if (old_path.mnt != nd.path.mnt)
- goto out_release;
- new_dentry = lookup_create(&nd, 0);
- error = PTR_ERR(new_dentry);
- if (IS_ERR(new_dentry)) {
+ if (old_path.mnt != new_path.mnt) {
mlog_errno(error);
- goto out_unlock;
+ goto out_dput;
}
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(new_path.mnt);
if (error) {
mlog_errno(error);
goto out_dput;
}
error = ocfs2_vfs_reflink(old_path.dentry,
- nd.path.dentry->d_inode,
+ new_path.dentry->d_inode,
new_dentry, preserve);
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(new_path.mnt);
out_dput:
dput(new_dentry);
-out_unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-out_release:
- path_put(&nd.path);
- putname(to);
+ mutex_unlock(&new_path.dentry->d_inode->i_mutex);
+ path_put(&new_path);
out:
path_put(&old_path);
diff --git a/fs/open.c b/fs/open.c
index b52cf01..739b751 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -793,7 +793,7 @@
return nd->intent.open.file;
out_err:
release_open_intent(nd);
- nd->intent.open.file = (struct file *)dentry;
+ nd->intent.open.file = ERR_CAST(dentry);
goto out;
}
EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b45ee8..3a1dafd 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -172,7 +172,7 @@
task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
tpid = 0;
if (pid_alive(p)) {
- struct task_struct *tracer = tracehook_tracer_task(p);
+ struct task_struct *tracer = ptrace_parent(p);
if (tracer)
tpid = task_pid_nr_ns(tracer, ns);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8a84210..91fb655 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -216,7 +216,7 @@
if (task_is_stopped_or_traced(task)) {
int match;
rcu_read_lock();
- match = (tracehook_tracer_task(task) == current);
+ match = (ptrace_parent(task) == current);
rcu_read_unlock();
if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
return mm;
@@ -673,7 +673,7 @@
p->m.private = p;
p->ns = ns;
p->root = root;
- p->event = ns->event;
+ p->m.poll_event = ns->event;
return 0;
@@ -2167,9 +2167,9 @@
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
-static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
+static int proc_fd_permission(struct inode *inode, int mask)
{
- int rv = generic_permission(inode, mask, flags, NULL);
+ int rv = generic_permission(inode, mask);
if (rv == 0)
return 0;
if (task_pid(current) == proc_pid(inode))
@@ -2708,6 +2708,9 @@
struct task_io_accounting acct = task->ioac;
unsigned long flags;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ return -EACCES;
+
if (whole && lock_task_sighand(task, &flags)) {
struct task_struct *t = task;
@@ -2839,7 +2842,7 @@
REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, proc_tgid_io_accounting),
+ INF("io", S_IRUSR, proc_tgid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
@@ -3181,7 +3184,7 @@
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, proc_tid_io_accounting),
+ INF("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d167de3..1a77dbe 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -294,7 +294,7 @@
return ret;
}
-static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
+static int proc_sys_permission(struct inode *inode, int mask)
{
/*
* sysctl entries that are not writeable,
@@ -316,7 +316,7 @@
if (!table) /* global root - r-xr-xr-x */
error = mask & MAY_WRITE ? -EACCES : 0;
else /* Use the permissions on the sysctl table entry */
- error = sysctl_perm(head->root, table, mask);
+ error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
sysctl_head_finish(head);
return error;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8a..5907b49 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -64,6 +64,23 @@
return file->f_pos;
offset += file->f_pos;
break;
+ case SEEK_DATA:
+ /*
+ * In the generic case the entire file is data, so as long as
+ * offset isn't at the end of the file then the offset is data.
+ */
+ if (offset >= inode->i_size)
+ return -ENXIO;
+ break;
+ case SEEK_HOLE:
+ /*
+ * There is a virtual hole at the end of the file, so as long as
+ * offset isn't i_size or larger, return i_size.
+ */
+ if (offset >= inode->i_size)
+ return -ENXIO;
+ offset = inode->i_size;
+ break;
}
if (offset < 0 && !unsigned_offsets(file))
@@ -128,12 +145,13 @@
loff_t default_llseek(struct file *file, loff_t offset, int origin)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
loff_t retval;
- mutex_lock(&file->f_dentry->d_inode->i_mutex);
+ mutex_lock(&inode->i_mutex);
switch (origin) {
case SEEK_END:
- offset += i_size_read(file->f_path.dentry->d_inode);
+ offset += i_size_read(inode);
break;
case SEEK_CUR:
if (offset == 0) {
@@ -141,6 +159,26 @@
goto out;
}
offset += file->f_pos;
+ break;
+ case SEEK_DATA:
+ /*
+ * In the generic case the entire file is data, so as
+ * long as offset isn't at the end of the file then the
+ * offset is data.
+ */
+ if (offset >= inode->i_size)
+ return -ENXIO;
+ break;
+ case SEEK_HOLE:
+ /*
+ * There is a virtual hole at the end of the file, so
+ * as long as offset isn't i_size or larger, return
+ * i_size.
+ */
+ if (offset >= inode->i_size)
+ return -ENXIO;
+ offset = inode->i_size;
+ break;
}
retval = -EINVAL;
if (offset >= 0 || unsigned_offsets(file)) {
@@ -151,7 +189,7 @@
retval = offset;
}
out:
- mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
return retval;
}
EXPORT_SYMBOL(default_llseek);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 198dabf..133e935 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -14,7 +14,8 @@
extern const struct reiserfs_key MIN_KEY;
static int reiserfs_readdir(struct file *, void *, filldir_t);
-static int reiserfs_dir_fsync(struct file *filp, int datasync);
+static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync);
const struct file_operations reiserfs_dir_operations = {
.llseek = generic_file_llseek,
@@ -27,13 +28,21 @@
#endif
};
-static int reiserfs_dir_fsync(struct file *filp, int datasync)
+static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = filp->f_mapping->host;
int err;
+
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
+ mutex_unlock(&inode->i_mutex);
if (err < 0)
return err;
return 0;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 91f080c..c7156dc 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -140,12 +140,18 @@
* be removed...
*/
-static int reiserfs_sync_file(struct file *filp, int datasync)
+static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = filp->f_mapping->host;
int err;
int barrier_done;
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
BUG_ON(!S_ISREG(inode->i_mode));
err = sync_mapping_buffers(inode->i_mapping);
reiserfs_write_lock(inode->i_sb);
@@ -153,6 +159,7 @@
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ mutex_unlock(&inode->i_mutex);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
@@ -312,4 +319,5 @@
.listxattr = reiserfs_listxattr,
.removexattr = reiserfs_removexattr,
.permission = reiserfs_permission,
+ .check_acl = reiserfs_check_acl,
};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 4fd5bb3..2922b90 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3068,9 +3068,8 @@
struct inode *inode = file->f_mapping->host;
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- reiserfs_get_blocks_direct_io, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ reiserfs_get_blocks_direct_io);
/*
* In case of error extending write may have instantiated a few
@@ -3114,6 +3113,9 @@
error = -EFBIG;
goto out;
}
+
+ inode_dio_wait(inode);
+
/* fill in hole pointers in the expanding truncate case. */
if (attr->ia_size > inode->i_size) {
error = generic_cont_expand_simple(inode, attr->ia_size);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 1186626..551f1b7 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1529,6 +1529,7 @@
.listxattr = reiserfs_listxattr,
.removexattr = reiserfs_removexattr,
.permission = reiserfs_permission,
+ .check_acl = reiserfs_check_acl,
};
/*
@@ -1545,6 +1546,7 @@
.listxattr = reiserfs_listxattr,
.removexattr = reiserfs_removexattr,
.permission = reiserfs_permission,
+ .check_acl = reiserfs_check_acl,
};
@@ -1558,5 +1560,5 @@
.listxattr = reiserfs_listxattr,
.removexattr = reiserfs_removexattr,
.permission = reiserfs_permission,
-
+ .check_acl = reiserfs_check_acl,
};
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aa91089..14363b9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1643,6 +1643,7 @@
/* Set default values for options: non-aggressive tails, RO on errors */
REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
+ REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
/* no preallocation minimum, be smart in
reiserfs_file_write instead */
REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d780896..6938d8c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -555,11 +555,10 @@
reiserfs_write_unlock(inode->i_sb);
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
- down_write(&dentry->d_inode->i_alloc_sem);
+ inode_dio_wait(dentry->d_inode);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_setattr(dentry, &newattrs);
- up_write(&dentry->d_inode->i_alloc_sem);
mutex_unlock(&dentry->d_inode->i_mutex);
} else
update_ctime(inode);
@@ -868,12 +867,18 @@
return err;
}
-static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int reiserfs_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
int error = -EAGAIN; /* do regular unix permission checks by default */
- if (flags & IPERM_FLAG_RCU)
+ /*
+ * Stat data v1 doesn't support ACLs.
+ */
+ if (get_inode_sd_version(inode) == STAT_DATA_V1)
+ return -EAGAIN;
+
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
@@ -952,7 +957,7 @@
return 0;
}
-int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
+int reiserfs_permission(struct inode *inode, int mask)
{
/*
* We don't do permission checks on the internal objects.
@@ -961,15 +966,7 @@
if (IS_PRIVATE(inode))
return 0;
-#ifdef CONFIG_REISERFS_FS_XATTR
- /*
- * Stat data v1 doesn't support ACLs.
- */
- if (get_inode_sd_version(inode) != STAT_DATA_V1)
- return generic_permission(inode, mask, flags,
- reiserfs_check_acl);
-#endif
- return generic_permission(inode, mask, flags, NULL);
+ return generic_permission(inode, mask);
}
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 4bc63ac..0682b38 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -220,11 +220,6 @@
blk, off, ino_num);
inode = squashfs_iget(dir->i_sb, ino, ino_num);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto failed;
- }
-
goto exit_lookup;
}
}
@@ -232,10 +227,7 @@
exit_lookup:
kfree(dire);
- if (inode)
- return d_splice_alias(inode, dentry);
- d_add(dentry, inode);
- return ERR_PTR(0);
+ return d_splice_alias(inode, dentry);
data_error:
err = -EIO;
diff --git a/fs/super.c b/fs/super.c
index ab3d672..7943f04 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,69 @@
LIST_HEAD(super_blocks);
DEFINE_SPINLOCK(sb_lock);
+/*
+ * One thing we have to be careful of with a per-sb shrinker is that we don't
+ * drop the last active reference to the superblock from within the shrinker.
+ * If that happens we could trigger unregistering the shrinker from within the
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
+ * take a passive reference to the superblock to avoid this from occurring.
+ */
+static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct super_block *sb;
+ int fs_objects = 0;
+ int total_objects;
+
+ sb = container_of(shrink, struct super_block, s_shrink);
+
+ /*
+ * Deadlock avoidance. We may hold various FS locks, and we don't want
+ * to recurse into the FS that called us in clear_inode() and friends..
+ */
+ if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+ return -1;
+
+ if (!grab_super_passive(sb))
+ return -1;
+
+ if (sb->s_op && sb->s_op->nr_cached_objects)
+ fs_objects = sb->s_op->nr_cached_objects(sb);
+
+ total_objects = sb->s_nr_dentry_unused +
+ sb->s_nr_inodes_unused + fs_objects + 1;
+
+ if (sc->nr_to_scan) {
+ int dentries;
+ int inodes;
+
+ /* proportion the scan between the caches */
+ dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
+ total_objects;
+ inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
+ total_objects;
+ if (fs_objects)
+ fs_objects = (sc->nr_to_scan * fs_objects) /
+ total_objects;
+ /*
+ * prune the dcache first as the icache is pinned by it, then
+ * prune the icache, followed by the filesystem specific caches
+ */
+ prune_dcache_sb(sb, dentries);
+ prune_icache_sb(sb, inodes);
+
+ if (fs_objects && sb->s_op->free_cached_objects) {
+ sb->s_op->free_cached_objects(sb, fs_objects);
+ fs_objects = sb->s_op->nr_cached_objects(sb);
+ }
+ total_objects = sb->s_nr_dentry_unused +
+ sb->s_nr_inodes_unused + fs_objects;
+ }
+
+ total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
+ drop_super(sb);
+ return total_objects;
+}
+
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
@@ -77,6 +140,8 @@
INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_inode_lru);
+ spin_lock_init(&s->s_inode_lru_lock);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -114,6 +179,10 @@
s->s_op = &default_op;
s->s_time_gran = 1000000000;
s->cleancache_poolid = -1;
+
+ s->s_shrink.seeks = DEFAULT_SEEKS;
+ s->s_shrink.shrink = prune_super;
+ s->s_shrink.batch = 1024;
}
out:
return s;
@@ -181,6 +250,10 @@
if (atomic_dec_and_test(&s->s_active)) {
cleancache_flush_fs(s);
fs->kill_sb(s);
+
+ /* caches are now gone, we can safely kill the shrinker now */
+ unregister_shrinker(&s->s_shrink);
+
/*
* We need to call rcu_barrier so all the delayed rcu free
* inodes are flushed before we release the fs module.
@@ -241,6 +314,39 @@
}
/*
+ * grab_super_passive - acquire a passive reference
+ * @s: reference we are trying to grab
+ *
+ * Tries to acquire a passive reference. This is used in places where we
+ * cannot take an active reference but we need to ensure that the
+ * superblock does not go away while we are working on it. It returns
+ * false if a reference was not gained, and returns true with the s_umount
+ * lock held in read mode if a reference is gained. On successful return,
+ * the caller must drop the s_umount lock and the passive reference when
+ * done.
+ */
+bool grab_super_passive(struct super_block *sb)
+{
+ spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ if (down_read_trylock(&sb->s_umount)) {
+ if (sb->s_root)
+ return true;
+ up_read(&sb->s_umount);
+ }
+
+ put_super(sb);
+ return false;
+}
+
+/*
* Superblock locking. We really ought to get rid of these two.
*/
void lock_super(struct super_block * sb)
@@ -276,7 +382,6 @@
{
const struct super_operations *sop = sb->s_op;
-
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
@@ -364,6 +469,7 @@
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
+ register_shrinker(&s->s_shrink);
return s;
}
@@ -452,6 +558,42 @@
}
/**
+ * iterate_supers_type - call function for superblocks of given type
+ * @type: fs type
+ * @f: function to call
+ * @arg: argument to pass to it
+ *
+ * Scans the superblock list and calls given function, passing it
+ * locked superblock and given argument.
+ */
+void iterate_supers_type(struct file_system_type *type,
+ void (*f)(struct super_block *, void *), void *arg)
+{
+ struct super_block *sb, *p = NULL;
+
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &type->fs_supers, s_instances) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ down_read(&sb->s_umount);
+ if (sb->s_root)
+ f(sb, arg);
+ up_read(&sb->s_umount);
+
+ spin_lock(&sb_lock);
+ if (p)
+ __put_super(p);
+ p = sb;
+ }
+ if (p)
+ __put_super(p);
+ spin_unlock(&sb_lock);
+}
+
+EXPORT_SYMBOL(iterate_supers_type);
+
+/**
* get_super - get the superblock of a device
* @bdev: device to get the superblock for
*
@@ -657,7 +799,7 @@
static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
static int unnamed_dev_start = 0; /* don't bother trying below it */
-int set_anon_super(struct super_block *s, void *data)
+int get_anon_bdev(dev_t *p)
{
int dev;
int error;
@@ -684,23 +826,37 @@
spin_unlock(&unnamed_dev_lock);
return -EMFILE;
}
- s->s_dev = MKDEV(0, dev & MINORMASK);
- s->s_bdi = &noop_backing_dev_info;
+ *p = MKDEV(0, dev & MINORMASK);
return 0;
}
+EXPORT_SYMBOL(get_anon_bdev);
+
+void free_anon_bdev(dev_t dev)
+{
+ int slot = MINOR(dev);
+ spin_lock(&unnamed_dev_lock);
+ ida_remove(&unnamed_dev_ida, slot);
+ if (slot < unnamed_dev_start)
+ unnamed_dev_start = slot;
+ spin_unlock(&unnamed_dev_lock);
+}
+EXPORT_SYMBOL(free_anon_bdev);
+
+int set_anon_super(struct super_block *s, void *data)
+{
+ int error = get_anon_bdev(&s->s_dev);
+ if (!error)
+ s->s_bdi = &noop_backing_dev_info;
+ return error;
+}
EXPORT_SYMBOL(set_anon_super);
void kill_anon_super(struct super_block *sb)
{
- int slot = MINOR(sb->s_dev);
-
+ dev_t dev = sb->s_dev;
generic_shutdown_super(sb);
- spin_lock(&unnamed_dev_lock);
- ida_remove(&unnamed_dev_ida, slot);
- if (slot < unnamed_dev_start)
- unnamed_dev_start = slot;
- spin_unlock(&unnamed_dev_lock);
+ free_anon_bdev(dev);
}
EXPORT_SYMBOL(kill_anon_super);
diff --git a/fs/sync.c b/fs/sync.c
index c38ec16..c98a747 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -165,28 +165,9 @@
*/
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct address_space *mapping = file->f_mapping;
- int err, ret;
-
- if (!file->f_op || !file->f_op->fsync) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = filemap_write_and_wait_range(mapping, start, end);
-
- /*
- * We need to protect against concurrent writers, which could cause
- * livelocks in fsync_buffers_list().
- */
- mutex_lock(&mapping->host->i_mutex);
- err = file->f_op->fsync(file, datasync);
- if (!ret)
- ret = err;
- mutex_unlock(&mapping->host->i_mutex);
-
-out:
- return ret;
+ if (!file->f_op || !file->f_op->fsync)
+ return -EINVAL;
+ return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0a12eb8..e3f091a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -349,11 +349,11 @@
return -ENOENT;
}
-int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
+int sysfs_permission(struct inode *inode, int mask)
{
struct sysfs_dirent *sd;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
sd = inode->i_private;
@@ -362,5 +362,5 @@
sysfs_refresh_inode(sd, inode);
mutex_unlock(&sysfs_mutex);
- return generic_permission(inode, mask, flags, NULL);
+ return generic_permission(inode, mask);
}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 2ed2404..845ab3a 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -201,7 +201,7 @@
struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
void sysfs_evict_inode(struct inode *inode);
int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
-int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
+int sysfs_permission(struct inode *inode, int mask);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 87cd0ea..fb3b5c8 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -78,7 +78,7 @@
* If the root TNC node is dirty, we definitely have something to
* commit.
*/
- if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
+ if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
return 0;
/*
@@ -418,7 +418,7 @@
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_BROKEN) {
- err = -EINVAL;
+ err = -EROFS;
goto out;
}
@@ -444,7 +444,7 @@
* re-check it.
*/
if (c->cmt_state == COMMIT_BROKEN) {
- err = -EINVAL;
+ err = -EROFS;
goto out_cmt_unlock;
}
@@ -576,7 +576,7 @@
struct idx_node *i;
size_t sz;
- if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
+ if (!dbg_is_chk_index(c))
return 0;
INIT_LIST_HEAD(&list);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0bb2bce..eef109a 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -27,13 +27,12 @@
* various local functions of those subsystems.
*/
-#define UBIFS_DBG_PRESERVE_UBI
-
-#include "ubifs.h"
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/math64.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include "ubifs.h"
#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -42,15 +41,6 @@
static char dbg_key_buf0[128];
static char dbg_key_buf1[128];
-unsigned int ubifs_chk_flags;
-unsigned int ubifs_tst_flags;
-
-module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
-module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
-
-MODULE_PARM_DESC(debug_chks, "Debug check flags");
-MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
-
static const char *get_key_fmt(int fmt)
{
switch (fmt) {
@@ -91,6 +81,28 @@
}
}
+static const char *get_dent_type(int type)
+{
+ switch (type) {
+ case UBIFS_ITYPE_REG:
+ return "file";
+ case UBIFS_ITYPE_DIR:
+ return "dir";
+ case UBIFS_ITYPE_LNK:
+ return "symlink";
+ case UBIFS_ITYPE_BLK:
+ return "blkdev";
+ case UBIFS_ITYPE_CHR:
+ return "char dev";
+ case UBIFS_ITYPE_FIFO:
+ return "fifo";
+ case UBIFS_ITYPE_SOCK:
+ return "socket";
+ default:
+ return "unknown/invalid type";
+ }
+}
+
static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
char *buffer)
{
@@ -234,9 +246,13 @@
printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len));
}
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
+void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
{
const struct ubifs_inode *ui = ubifs_inode(inode);
+ struct qstr nm = { .name = NULL };
+ union ubifs_key key;
+ struct ubifs_dent_node *dent, *pdent = NULL;
+ int count = 2;
printk(KERN_DEBUG "Dump in-memory inode:");
printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino);
@@ -270,6 +286,32 @@
printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row);
printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len);
+
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ printk(KERN_DEBUG "List of directory entries:\n");
+ ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
+
+ lowest_dent_key(c, &key, inode->i_ino);
+ while (1) {
+ dent = ubifs_tnc_next_ent(c, &key, &nm);
+ if (IS_ERR(dent)) {
+ if (PTR_ERR(dent) != -ENOENT)
+ printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent));
+ break;
+ }
+
+ printk(KERN_DEBUG "\t%d: %s (%s)\n",
+ count++, dent->name, get_dent_type(dent->type));
+
+ nm.name = dent->name;
+ nm.len = le16_to_cpu(dent->nlen);
+ kfree(pdent);
+ pdent = dent;
+ key_read(c, &dent->key, &key);
+ }
+ kfree(pdent);
}
void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -278,7 +320,7 @@
union ubifs_key key;
const struct ubifs_ch *ch = node;
- if (dbg_failure_mode)
+ if (dbg_is_tst_rcvry(c))
return;
/* If the magic is incorrect, just hexdump the first bytes */
@@ -834,7 +876,7 @@
struct ubifs_scan_node *snod;
void *buf;
- if (dbg_failure_mode)
+ if (dbg_is_tst_rcvry(c))
return;
printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
@@ -1080,6 +1122,7 @@
/**
* dbg_check_synced_i_size - check synchronized inode size.
+ * @c: UBIFS file-system description object
* @inode: inode to check
*
* If inode is clean, synchronized inode size has to be equivalent to current
@@ -1087,12 +1130,12 @@
* has to be locked). Returns %0 if synchronized inode size if correct, and
* %-EINVAL if not.
*/
-int dbg_check_synced_i_size(struct inode *inode)
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
{
int err = 0;
struct ubifs_inode *ui = ubifs_inode(inode);
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
@@ -1125,7 +1168,7 @@
* Note, it is good idea to make sure the @dir->i_mutex is locked before
* calling this function.
*/
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
{
unsigned int nlink = 2;
union ubifs_key key;
@@ -1133,7 +1176,7 @@
struct qstr nm = { .name = NULL };
loff_t size = UBIFS_INO_NODE_SZ;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (!S_ISDIR(dir->i_mode))
@@ -1167,12 +1210,14 @@
"but calculated size is %llu", dir->i_ino,
(unsigned long long)i_size_read(dir),
(unsigned long long)size);
+ dbg_dump_inode(c, dir);
dump_stack();
return -EINVAL;
}
if (dir->i_nlink != nlink) {
ubifs_err("directory inode %lu has nlink %u, but calculated "
"nlink is %u", dir->i_ino, dir->i_nlink, nlink);
+ dbg_dump_inode(c, dir);
dump_stack();
return -EINVAL;
}
@@ -1489,7 +1534,7 @@
long clean_cnt = 0, dirty_cnt = 0;
int err, last;
- if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
+ if (!dbg_is_chk_index(c))
return 0;
ubifs_assert(mutex_is_locked(&c->tnc_mutex));
@@ -1736,7 +1781,7 @@
int err;
long long calc = 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
+ if (!dbg_is_chk_index(c))
return 0;
err = dbg_walk_index(c, NULL, add_size, &calc);
@@ -2312,7 +2357,7 @@
int err;
struct fsck_data fsckd;
- if (!(ubifs_chk_flags & UBIFS_CHK_FS))
+ if (!dbg_is_chk_fs(c))
return 0;
fsckd.inodes = RB_ROOT;
@@ -2347,7 +2392,7 @@
struct list_head *cur;
struct ubifs_scan_node *sa, *sb;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2414,7 +2459,7 @@
struct list_head *cur;
struct ubifs_scan_node *sa, *sb;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2491,214 +2536,141 @@
return 0;
}
-int dbg_force_in_the_gaps(void)
+static inline int chance(unsigned int n, unsigned int out_of)
{
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
- return 0;
+ return !!((random32() % out_of) + 1 <= n);
- return !(random32() & 7);
}
-/* Failure mode for recovery testing */
-
-#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
-
-struct failure_mode_info {
- struct list_head list;
- struct ubifs_info *c;
-};
-
-static LIST_HEAD(fmi_list);
-static DEFINE_SPINLOCK(fmi_lock);
-
-static unsigned int next;
-
-static int simple_rand(void)
+static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
{
- if (next == 0)
- next = current->pid;
- next = next * 1103515245 + 12345;
- return (next >> 16) & 32767;
-}
+ struct ubifs_debug_info *d = c->dbg;
-static void failure_mode_init(struct ubifs_info *c)
-{
- struct failure_mode_info *fmi;
+ ubifs_assert(dbg_is_tst_rcvry(c));
- fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
- if (!fmi) {
- ubifs_err("Failed to register failure mode - no memory");
- return;
- }
- fmi->c = c;
- spin_lock(&fmi_lock);
- list_add_tail(&fmi->list, &fmi_list);
- spin_unlock(&fmi_lock);
-}
-
-static void failure_mode_exit(struct ubifs_info *c)
-{
- struct failure_mode_info *fmi, *tmp;
-
- spin_lock(&fmi_lock);
- list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
- if (fmi->c == c) {
- list_del(&fmi->list);
- kfree(fmi);
- }
- spin_unlock(&fmi_lock);
-}
-
-static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
-{
- struct failure_mode_info *fmi;
-
- spin_lock(&fmi_lock);
- list_for_each_entry(fmi, &fmi_list, list)
- if (fmi->c->ubi == desc) {
- struct ubifs_info *c = fmi->c;
-
- spin_unlock(&fmi_lock);
- return c;
- }
- spin_unlock(&fmi_lock);
- return NULL;
-}
-
-static int in_failure_mode(struct ubi_volume_desc *desc)
-{
- struct ubifs_info *c = dbg_find_info(desc);
-
- if (c && dbg_failure_mode)
- return c->dbg->failure_mode;
- return 0;
-}
-
-static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
-{
- struct ubifs_info *c = dbg_find_info(desc);
- struct ubifs_debug_info *d;
-
- if (!c || !dbg_failure_mode)
- return 0;
- d = c->dbg;
- if (d->failure_mode)
- return 1;
- if (!d->fail_cnt) {
- /* First call - decide delay to failure */
+ if (!d->pc_cnt) {
+ /* First call - decide delay to the power cut */
if (chance(1, 2)) {
- unsigned int delay = 1 << (simple_rand() >> 11);
+ unsigned long delay;
if (chance(1, 2)) {
- d->fail_delay = 1;
- d->fail_timeout = jiffies +
- msecs_to_jiffies(delay);
- dbg_rcvry("failing after %ums", delay);
+ d->pc_delay = 1;
+ /* Fail withing 1 minute */
+ delay = random32() % 60000;
+ d->pc_timeout = jiffies;
+ d->pc_timeout += msecs_to_jiffies(delay);
+ ubifs_warn("failing after %lums", delay);
} else {
- d->fail_delay = 2;
- d->fail_cnt_max = delay;
- dbg_rcvry("failing after %u calls", delay);
+ d->pc_delay = 2;
+ delay = random32() % 10000;
+ /* Fail within 10000 operations */
+ d->pc_cnt_max = delay;
+ ubifs_warn("failing after %lu calls", delay);
}
}
- d->fail_cnt += 1;
+
+ d->pc_cnt += 1;
}
+
/* Determine if failure delay has expired */
- if (d->fail_delay == 1) {
- if (time_before(jiffies, d->fail_timeout))
+ if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout))
return 0;
- } else if (d->fail_delay == 2)
- if (d->fail_cnt++ < d->fail_cnt_max)
+ if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max)
return 0;
+
if (lnum == UBIFS_SB_LNUM) {
- if (write) {
- if (chance(1, 2))
- return 0;
- } else if (chance(19, 20))
+ if (write && chance(1, 2))
return 0;
- dbg_rcvry("failing in super block LEB %d", lnum);
+ if (chance(19, 20))
+ return 0;
+ ubifs_warn("failing in super block LEB %d", lnum);
} else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
if (chance(19, 20))
return 0;
- dbg_rcvry("failing in master LEB %d", lnum);
+ ubifs_warn("failing in master LEB %d", lnum);
} else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
- if (write) {
- if (chance(99, 100))
- return 0;
- } else if (chance(399, 400))
+ if (write && chance(99, 100))
return 0;
- dbg_rcvry("failing in log LEB %d", lnum);
+ if (chance(399, 400))
+ return 0;
+ ubifs_warn("failing in log LEB %d", lnum);
} else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
- if (write) {
- if (chance(7, 8))
- return 0;
- } else if (chance(19, 20))
+ if (write && chance(7, 8))
return 0;
- dbg_rcvry("failing in LPT LEB %d", lnum);
+ if (chance(19, 20))
+ return 0;
+ ubifs_warn("failing in LPT LEB %d", lnum);
} else if (lnum >= c->orph_first && lnum <= c->orph_last) {
- if (write) {
- if (chance(1, 2))
- return 0;
- } else if (chance(9, 10))
+ if (write && chance(1, 2))
return 0;
- dbg_rcvry("failing in orphan LEB %d", lnum);
+ if (chance(9, 10))
+ return 0;
+ ubifs_warn("failing in orphan LEB %d", lnum);
} else if (lnum == c->ihead_lnum) {
if (chance(99, 100))
return 0;
- dbg_rcvry("failing in index head LEB %d", lnum);
+ ubifs_warn("failing in index head LEB %d", lnum);
} else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
if (chance(9, 10))
return 0;
- dbg_rcvry("failing in GC head LEB %d", lnum);
+ ubifs_warn("failing in GC head LEB %d", lnum);
} else if (write && !RB_EMPTY_ROOT(&c->buds) &&
!ubifs_search_bud(c, lnum)) {
if (chance(19, 20))
return 0;
- dbg_rcvry("failing in non-bud LEB %d", lnum);
+ ubifs_warn("failing in non-bud LEB %d", lnum);
} else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
c->cmt_state == COMMIT_RUNNING_REQUIRED) {
if (chance(999, 1000))
return 0;
- dbg_rcvry("failing in bud LEB %d commit running", lnum);
+ ubifs_warn("failing in bud LEB %d commit running", lnum);
} else {
if (chance(9999, 10000))
return 0;
- dbg_rcvry("failing in bud LEB %d commit not running", lnum);
+ ubifs_warn("failing in bud LEB %d commit not running", lnum);
}
- ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
- d->failure_mode = 1;
+
+ d->pc_happened = 1;
+ ubifs_warn("========== Power cut emulated ==========");
dump_stack();
return 1;
}
-static void cut_data(const void *buf, int len)
+static void cut_data(const void *buf, unsigned int len)
{
- int flen, i;
+ unsigned int from, to, i, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- flen = (len * (long long)simple_rand()) >> 15;
- for (i = flen; i < len; i++)
- p[i] = 0xff;
+ from = random32() % (len + 1);
+ if (chance(1, 2))
+ to = random32() % (len - from + 1);
+ else
+ to = len;
+
+ if (from < to)
+ ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
+ ffs ? "0xFFs" : "random data");
+
+ if (ffs)
+ for (i = from; i < to; i++)
+ p[i] = 0xFF;
+ else
+ for (i = from; i < to; i++)
+ p[i] = random32() % 0x100;
}
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
- int len, int check)
-{
- if (in_failure_mode(desc))
- return -EROFS;
- return ubi_leb_read(desc, lnum, buf, offset, len, check);
-}
-
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int offset, int len, int dtype)
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
+ int offs, int len, int dtype)
{
int err, failing;
- if (in_failure_mode(desc))
+ if (c->dbg->pc_happened)
return -EROFS;
- failing = do_fail(desc, lnum, 1);
+
+ failing = power_cut_emulated(c, lnum, 1);
if (failing)
cut_data(buf, len);
- err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
if (err)
return err;
if (failing)
@@ -2706,162 +2678,207 @@
return 0;
}
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf,
int len, int dtype)
{
int err;
- if (do_fail(desc, lnum, 1))
+ if (c->dbg->pc_happened)
return -EROFS;
- err = ubi_leb_change(desc, lnum, buf, len, dtype);
+ if (power_cut_emulated(c, lnum, 1))
+ return -EROFS;
+ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
if (err)
return err;
- if (do_fail(desc, lnum, 1))
+ if (power_cut_emulated(c, lnum, 1))
return -EROFS;
return 0;
}
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_unmap(struct ubifs_info *c, int lnum)
{
int err;
- if (do_fail(desc, lnum, 0))
+ if (c->dbg->pc_happened)
return -EROFS;
- err = ubi_leb_erase(desc, lnum);
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
+ err = ubi_leb_unmap(c->ubi, lnum);
if (err)
return err;
- if (do_fail(desc, lnum, 0))
+ if (power_cut_emulated(c, lnum, 0))
return -EROFS;
return 0;
}
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype)
{
int err;
- if (do_fail(desc, lnum, 0))
+ if (c->dbg->pc_happened)
return -EROFS;
- err = ubi_leb_unmap(desc, lnum);
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
+ err = ubi_leb_map(c->ubi, lnum, dtype);
if (err)
return err;
- if (do_fail(desc, lnum, 0))
+ if (power_cut_emulated(c, lnum, 0))
return -EROFS;
return 0;
}
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
-{
- if (in_failure_mode(desc))
- return -EROFS;
- return ubi_is_mapped(desc, lnum);
-}
-
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
-{
- int err;
-
- if (do_fail(desc, lnum, 0))
- return -EROFS;
- err = ubi_leb_map(desc, lnum, dtype);
- if (err)
- return err;
- if (do_fail(desc, lnum, 0))
- return -EROFS;
- return 0;
-}
-
-/**
- * ubifs_debugging_init - initialize UBIFS debugging.
- * @c: UBIFS file-system description object
- *
- * This function initializes debugging-related data for the file system.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-int ubifs_debugging_init(struct ubifs_info *c)
-{
- c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
- if (!c->dbg)
- return -ENOMEM;
-
- failure_mode_init(c);
- return 0;
-}
-
-/**
- * ubifs_debugging_exit - free debugging data.
- * @c: UBIFS file-system description object
- */
-void ubifs_debugging_exit(struct ubifs_info *c)
-{
- failure_mode_exit(c);
- kfree(c->dbg);
-}
-
/*
* Root directory for UBIFS stuff in debugfs. Contains sub-directories which
* contain the stuff specific to particular file-system mounts.
*/
static struct dentry *dfs_rootdir;
-/**
- * dbg_debugfs_init - initialize debugfs file-system.
- *
- * UBIFS uses debugfs file-system to expose various debugging knobs to
- * user-space. This function creates "ubifs" directory in the debugfs
- * file-system. Returns zero in case of success and a negative error code in
- * case of failure.
- */
-int dbg_debugfs_init(void)
-{
- dfs_rootdir = debugfs_create_dir("ubifs", NULL);
- if (IS_ERR(dfs_rootdir)) {
- int err = PTR_ERR(dfs_rootdir);
- ubifs_err("cannot create \"ubifs\" debugfs directory, "
- "error %d\n", err);
- return err;
- }
-
- return 0;
-}
-
-/**
- * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
- */
-void dbg_debugfs_exit(void)
-{
- debugfs_remove(dfs_rootdir);
-}
-
-static int open_debugfs_file(struct inode *inode, struct file *file)
+static int dfs_file_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return nonseekable_open(inode, file);
}
-static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+/**
+ * provide_user_output - provide output to the user reading a debugfs file.
+ * @val: boolean value for the answer
+ * @u: the buffer to store the answer at
+ * @count: size of the buffer
+ * @ppos: position in the @u output buffer
+ *
+ * This is a simple helper function which stores @val boolean value in the user
+ * buffer when the user reads one of UBIFS debugfs files. Returns amount of
+ * bytes written to @u in case of success and a negative error code in case of
+ * failure.
+ */
+static int provide_user_output(int val, char __user *u, size_t count,
+ loff_t *ppos)
+{
+ char buf[3];
+
+ if (val)
+ buf[0] = '1';
+ else
+ buf[0] = '0';
+ buf[1] = '\n';
+ buf[2] = 0x00;
+
+ return simple_read_from_buffer(u, count, ppos, buf, 2);
+}
+
+static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
+ loff_t *ppos)
+{
+ struct dentry *dent = file->f_path.dentry;
+ struct ubifs_info *c = file->private_data;
+ struct ubifs_debug_info *d = c->dbg;
+ int val;
+
+ if (dent == d->dfs_chk_gen)
+ val = d->chk_gen;
+ else if (dent == d->dfs_chk_index)
+ val = d->chk_index;
+ else if (dent == d->dfs_chk_orph)
+ val = d->chk_orph;
+ else if (dent == d->dfs_chk_lprops)
+ val = d->chk_lprops;
+ else if (dent == d->dfs_chk_fs)
+ val = d->chk_fs;
+ else if (dent == d->dfs_tst_rcvry)
+ val = d->tst_rcvry;
+ else
+ return -EINVAL;
+
+ return provide_user_output(val, u, count, ppos);
+}
+
+/**
+ * interpret_user_input - interpret user debugfs file input.
+ * @u: user-provided buffer with the input
+ * @count: buffer size
+ *
+ * This is a helper function which interpret user input to a boolean UBIFS
+ * debugfs file. Returns %0 or %1 in case of success and a negative error code
+ * in case of failure.
+ */
+static int interpret_user_input(const char __user *u, size_t count)
+{
+ size_t buf_size;
+ char buf[8];
+
+ buf_size = min_t(size_t, count, (sizeof(buf) - 1));
+ if (copy_from_user(buf, u, buf_size))
+ return -EFAULT;
+
+ if (buf[0] == '1')
+ return 1;
+ else if (buf[0] == '0')
+ return 0;
+
+ return -EINVAL;
+}
+
+static ssize_t dfs_file_write(struct file *file, const char __user *u,
+ size_t count, loff_t *ppos)
{
struct ubifs_info *c = file->private_data;
struct ubifs_debug_info *d = c->dbg;
+ struct dentry *dent = file->f_path.dentry;
+ int val;
- if (file->f_path.dentry == d->dfs_dump_lprops)
+ /*
+ * TODO: this is racy - the file-system might have already been
+ * unmounted and we'd oops in this case. The plan is to fix it with
+ * help of 'iterate_supers_type()' which we should have in v3.0: when
+ * a debugfs opened, we rember FS's UUID in file->private_data. Then
+ * whenever we access the FS via a debugfs file, we iterate all UBIFS
+ * superblocks and fine the one with the same UUID, and take the
+ * locking right.
+ *
+ * The other way to go suggested by Al Viro is to create a separate
+ * 'ubifs-debug' file-system instead.
+ */
+ if (file->f_path.dentry == d->dfs_dump_lprops) {
dbg_dump_lprops(c);
- else if (file->f_path.dentry == d->dfs_dump_budg)
+ return count;
+ }
+ if (file->f_path.dentry == d->dfs_dump_budg) {
dbg_dump_budg(c, &c->bi);
- else if (file->f_path.dentry == d->dfs_dump_tnc) {
+ return count;
+ }
+ if (file->f_path.dentry == d->dfs_dump_tnc) {
mutex_lock(&c->tnc_mutex);
dbg_dump_tnc(c);
mutex_unlock(&c->tnc_mutex);
- } else
+ return count;
+ }
+
+ val = interpret_user_input(u, count);
+ if (val < 0)
+ return val;
+
+ if (dent == d->dfs_chk_gen)
+ d->chk_gen = val;
+ else if (dent == d->dfs_chk_index)
+ d->chk_index = val;
+ else if (dent == d->dfs_chk_orph)
+ d->chk_orph = val;
+ else if (dent == d->dfs_chk_lprops)
+ d->chk_lprops = val;
+ else if (dent == d->dfs_chk_fs)
+ d->chk_fs = val;
+ else if (dent == d->dfs_tst_rcvry)
+ d->tst_rcvry = val;
+ else
return -EINVAL;
return count;
}
static const struct file_operations dfs_fops = {
- .open = open_debugfs_file,
- .write = write_debugfs_file,
+ .open = dfs_file_open,
+ .read = dfs_file_read,
+ .write = dfs_file_write,
.owner = THIS_MODULE,
.llseek = no_llseek,
};
@@ -2880,12 +2897,20 @@
*/
int dbg_debugfs_init_fs(struct ubifs_info *c)
{
- int err;
+ int err, n;
const char *fname;
struct dentry *dent;
struct ubifs_debug_info *d = c->dbg;
- sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+ n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
+ c->vi.ubi_num, c->vi.vol_id);
+ if (n == UBIFS_DFS_DIR_LEN) {
+ /* The array size is too small */
+ fname = UBIFS_DFS_DIR_NAME;
+ dent = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
fname = d->dfs_dir_name;
dent = debugfs_create_dir(fname, dfs_rootdir);
if (IS_ERR_OR_NULL(dent))
@@ -2910,13 +2935,55 @@
goto out_remove;
d->dfs_dump_tnc = dent;
+ fname = "chk_general";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_gen = dent;
+
+ fname = "chk_index";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_index = dent;
+
+ fname = "chk_orphans";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_orph = dent;
+
+ fname = "chk_lprops";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_lprops = dent;
+
+ fname = "chk_fs";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_fs = dent;
+
+ fname = "tst_recovery";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_tst_rcvry = dent;
+
return 0;
out_remove:
debugfs_remove_recursive(d->dfs_dir);
out:
err = dent ? PTR_ERR(dent) : -ENODEV;
- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
fname, err);
return err;
}
@@ -2930,4 +2997,179 @@
debugfs_remove_recursive(c->dbg->dfs_dir);
}
+struct ubifs_global_debug_info ubifs_dbg;
+
+static struct dentry *dfs_chk_gen;
+static struct dentry *dfs_chk_index;
+static struct dentry *dfs_chk_orph;
+static struct dentry *dfs_chk_lprops;
+static struct dentry *dfs_chk_fs;
+static struct dentry *dfs_tst_rcvry;
+
+static ssize_t dfs_global_file_read(struct file *file, char __user *u,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file->f_path.dentry;
+ int val;
+
+ if (dent == dfs_chk_gen)
+ val = ubifs_dbg.chk_gen;
+ else if (dent == dfs_chk_index)
+ val = ubifs_dbg.chk_index;
+ else if (dent == dfs_chk_orph)
+ val = ubifs_dbg.chk_orph;
+ else if (dent == dfs_chk_lprops)
+ val = ubifs_dbg.chk_lprops;
+ else if (dent == dfs_chk_fs)
+ val = ubifs_dbg.chk_fs;
+ else if (dent == dfs_tst_rcvry)
+ val = ubifs_dbg.tst_rcvry;
+ else
+ return -EINVAL;
+
+ return provide_user_output(val, u, count, ppos);
+}
+
+static ssize_t dfs_global_file_write(struct file *file, const char __user *u,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file->f_path.dentry;
+ int val;
+
+ val = interpret_user_input(u, count);
+ if (val < 0)
+ return val;
+
+ if (dent == dfs_chk_gen)
+ ubifs_dbg.chk_gen = val;
+ else if (dent == dfs_chk_index)
+ ubifs_dbg.chk_index = val;
+ else if (dent == dfs_chk_orph)
+ ubifs_dbg.chk_orph = val;
+ else if (dent == dfs_chk_lprops)
+ ubifs_dbg.chk_lprops = val;
+ else if (dent == dfs_chk_fs)
+ ubifs_dbg.chk_fs = val;
+ else if (dent == dfs_tst_rcvry)
+ ubifs_dbg.tst_rcvry = val;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static const struct file_operations dfs_global_fops = {
+ .read = dfs_global_file_read,
+ .write = dfs_global_file_write,
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+};
+
+/**
+ * dbg_debugfs_init - initialize debugfs file-system.
+ *
+ * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * user-space. This function creates "ubifs" directory in the debugfs
+ * file-system. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int dbg_debugfs_init(void)
+{
+ int err;
+ const char *fname;
+ struct dentry *dent;
+
+ fname = "ubifs";
+ dent = debugfs_create_dir(fname, NULL);
+ if (IS_ERR_OR_NULL(dent))
+ goto out;
+ dfs_rootdir = dent;
+
+ fname = "chk_general";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_gen = dent;
+
+ fname = "chk_index";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_index = dent;
+
+ fname = "chk_orphans";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_orph = dent;
+
+ fname = "chk_lprops";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_lprops = dent;
+
+ fname = "chk_fs";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_fs = dent;
+
+ fname = "tst_recovery";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_tst_rcvry = dent;
+
+ return 0;
+
+out_remove:
+ debugfs_remove_recursive(dfs_rootdir);
+out:
+ err = dent ? PTR_ERR(dent) : -ENODEV;
+ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+ fname, err);
+ return err;
+}
+
+/**
+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+ */
+void dbg_debugfs_exit(void)
+{
+ debugfs_remove_recursive(dfs_rootdir);
+}
+
+/**
+ * ubifs_debugging_init - initialize UBIFS debugging.
+ * @c: UBIFS file-system description object
+ *
+ * This function initializes debugging-related data for the file system.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_debugging_init(struct ubifs_info *c)
+{
+ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+ if (!c->dbg)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * ubifs_debugging_exit - free debugging data.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_debugging_exit(struct ubifs_info *c)
+{
+ kfree(c->dbg);
+}
+
#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a811ac4..45174b5 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,18 +31,25 @@
#ifdef CONFIG_UBIFS_FS_DEBUG
-#include <linux/random.h>
+/*
+ * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
+ * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
+ */
+#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
+#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
/**
* ubifs_debug_info - per-FS debugging information.
* @old_zroot: old index root - used by 'dbg_check_old_index()'
* @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
* @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
- * @failure_mode: failure mode for recovery testing
- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
- * @fail_timeout: time in jiffies when delay of failure mode expires
- * @fail_cnt: current number of calls to failure mode I/O functions
- * @fail_cnt_max: number of calls by which to delay failure mode
+ *
+ * @pc_happened: non-zero if an emulated power cut happened
+ * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @pc_timeout: time in jiffies when delay of failure mode expires
+ * @pc_cnt: current number of calls to failure mode I/O functions
+ * @pc_cnt_max: number of calls by which to delay failure mode
+ *
* @chk_lpt_sz: used by LPT tree size checker
* @chk_lpt_sz2: used by LPT tree size checker
* @chk_lpt_wastage: used by LPT tree size checker
@@ -56,21 +63,36 @@
* @saved_free: saved amount of free space
* @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
*
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ *
* @dfs_dir_name: name of debugfs directory containing this file-system's files
* @dfs_dir: direntry object of the file-system debugfs directory
* @dfs_dump_lprops: "dump lprops" debugfs knob
* @dfs_dump_budg: "dump budgeting information" debugfs knob
* @dfs_dump_tnc: "dump TNC" debugfs knob
+ * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
+ * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
+ * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
+ * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
+ * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
+ * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
*/
struct ubifs_debug_info {
struct ubifs_zbranch old_zroot;
int old_zroot_level;
unsigned long long old_zroot_sqnum;
- int failure_mode;
- int fail_delay;
- unsigned long fail_timeout;
- unsigned int fail_cnt;
- unsigned int fail_cnt_max;
+
+ int pc_happened;
+ int pc_delay;
+ unsigned long pc_timeout;
+ unsigned int pc_cnt;
+ unsigned int pc_cnt_max;
+
long long chk_lpt_sz;
long long chk_lpt_sz2;
long long chk_lpt_wastage;
@@ -84,11 +106,43 @@
long long saved_free;
int saved_idx_gc_cnt;
- char dfs_dir_name[100];
+ unsigned int chk_gen:1;
+ unsigned int chk_index:1;
+ unsigned int chk_orph:1;
+ unsigned int chk_lprops:1;
+ unsigned int chk_fs:1;
+ unsigned int tst_rcvry:1;
+
+ char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
struct dentry *dfs_dir;
struct dentry *dfs_dump_lprops;
struct dentry *dfs_dump_budg;
struct dentry *dfs_dump_tnc;
+ struct dentry *dfs_chk_gen;
+ struct dentry *dfs_chk_index;
+ struct dentry *dfs_chk_orph;
+ struct dentry *dfs_chk_lprops;
+ struct dentry *dfs_chk_fs;
+ struct dentry *dfs_tst_rcvry;
+};
+
+/**
+ * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
+ *
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ */
+struct ubifs_global_debug_info {
+ unsigned int chk_gen:1;
+ unsigned int chk_index:1;
+ unsigned int chk_orph:1;
+ unsigned int chk_lprops:1;
+ unsigned int chk_fs:1;
+ unsigned int tst_rcvry:1;
};
#define ubifs_assert(expr) do { \
@@ -127,6 +181,8 @@
#define DBGKEY(key) dbg_key_str0(c, (key))
#define DBGKEY1(key) dbg_key_str1(c, (key))
+extern spinlock_t dbg_lock;
+
#define ubifs_dbg_msg(type, fmt, ...) do { \
spin_lock(&dbg_lock); \
pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
@@ -162,41 +218,36 @@
/* Additional recovery messages */
#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
-/*
- * Debugging check flags.
- *
- * UBIFS_CHK_GEN: general checks
- * UBIFS_CHK_TNC: check TNC
- * UBIFS_CHK_IDX_SZ: check index size
- * UBIFS_CHK_ORPH: check orphans
- * UBIFS_CHK_OLD_IDX: check the old index
- * UBIFS_CHK_LPROPS: check lprops
- * UBIFS_CHK_FS: check the file-system
- */
-enum {
- UBIFS_CHK_GEN = 0x1,
- UBIFS_CHK_TNC = 0x2,
- UBIFS_CHK_IDX_SZ = 0x4,
- UBIFS_CHK_ORPH = 0x8,
- UBIFS_CHK_OLD_IDX = 0x10,
- UBIFS_CHK_LPROPS = 0x20,
- UBIFS_CHK_FS = 0x40,
-};
+extern struct ubifs_global_debug_info ubifs_dbg;
-/*
- * Special testing flags.
- *
- * UBIFS_TST_RCVRY: failure mode for recovery testing
- */
-enum {
- UBIFS_TST_RCVRY = 0x4,
-};
-
-extern spinlock_t dbg_lock;
-
-extern unsigned int ubifs_msg_flags;
-extern unsigned int ubifs_chk_flags;
-extern unsigned int ubifs_tst_flags;
+static inline int dbg_is_chk_gen(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
+}
+static inline int dbg_is_chk_index(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
+}
+static inline int dbg_is_chk_orph(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
+}
+static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
+}
+static inline int dbg_is_chk_fs(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
+}
+static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
+}
+static inline int dbg_is_power_cut(const struct ubifs_info *c)
+{
+ return !!c->dbg->pc_happened;
+}
int ubifs_debugging_init(struct ubifs_info *c);
void ubifs_debugging_exit(struct ubifs_info *c);
@@ -207,7 +258,7 @@
const char *dbg_jhead(int jhead);
const char *dbg_get_key_dump(const struct ubifs_info *c,
const union ubifs_key *key);
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
+void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
void dbg_dump_node(const struct ubifs_info *c, const void *node);
void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
int offs);
@@ -240,8 +291,8 @@
int dbg_check_ltab(struct ubifs_info *c);
int dbg_chk_lpt_free_spc(struct ubifs_info *c);
int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
-int dbg_check_synced_i_size(struct inode *inode);
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
int dbg_check_tnc(struct ubifs_info *c, int extra);
int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
int dbg_check_filesystem(struct ubifs_info *c);
@@ -254,54 +305,12 @@
int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
-/* Force the use of in-the-gaps method for testing */
-static inline int dbg_force_in_the_gaps_enabled(void)
-{
- return ubifs_chk_flags & UBIFS_CHK_GEN;
-}
-int dbg_force_in_the_gaps(void);
-
-/* Failure mode for recovery testing */
-#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
-
-#ifndef UBIFS_DBG_PRESERVE_UBI
-#define ubi_leb_read dbg_leb_read
-#define ubi_leb_write dbg_leb_write
-#define ubi_leb_change dbg_leb_change
-#define ubi_leb_erase dbg_leb_erase
-#define ubi_leb_unmap dbg_leb_unmap
-#define ubi_is_mapped dbg_is_mapped
-#define ubi_leb_map dbg_leb_map
-#endif
-
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
- int len, int check);
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int offset, int len, int dtype);
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int len, int dtype);
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
-
-static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
- int offset, int len)
-{
- return dbg_leb_read(desc, lnum, buf, offset, len, 0);
-}
-
-static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
- const void *buf, int offset, int len)
-{
- return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
-}
-
-static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
- const void *buf, int len)
-{
- return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
-}
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len, int dtype);
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
+ int dtype);
+int dbg_leb_unmap(struct ubifs_info *c, int lnum);
+int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype);
/* Debugfs-related stuff */
int dbg_debugfs_init(void);
@@ -313,7 +322,7 @@
/* Use "if (0)" to make compiler check arguments even if debugging is off */
#define ubifs_assert(expr) do { \
- if (0 && (expr)) \
+ if (0) \
printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
__func__, __LINE__, current->pid); \
} while (0)
@@ -323,6 +332,9 @@
ubifs_err(fmt, ##__VA_ARGS__); \
} while (0)
+#define DBGKEY(key) ((char *)(key))
+#define DBGKEY1(key) ((char *)(key))
+
#define ubifs_dbg_msg(fmt, ...) do { \
if (0) \
pr_debug(fmt "\n", ##__VA_ARGS__); \
@@ -346,9 +358,6 @@
#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define DBGKEY(key) ((char *)(key))
-#define DBGKEY1(key) ((char *)(key))
-
static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
static inline const char *dbg_ntype(int type) { return ""; }
@@ -357,7 +366,7 @@
static inline const char *
dbg_get_key_dump(const struct ubifs_info *c,
const union ubifs_key *key) { return ""; }
-static inline void dbg_dump_inode(const struct ubifs_info *c,
+static inline void dbg_dump_inode(struct ubifs_info *c,
const struct inode *inode) { return; }
static inline void dbg_dump_node(const struct ubifs_info *c,
const void *node) { return; }
@@ -409,9 +418,11 @@
static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
int action, int len) { return 0; }
-static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
-static inline int dbg_check_dir_size(struct ubifs_info *c,
- const struct inode *dir) { return 0; }
+static inline int
+dbg_check_synced_i_size(const struct ubifs_info *c,
+ struct inode *inode) { return 0; }
+static inline int dbg_check_dir(struct ubifs_info *c,
+ const struct inode *dir) { return 0; }
static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
static inline int dbg_check_idx_size(struct ubifs_info *c,
long long idx_size) { return 0; }
@@ -431,9 +442,23 @@
dbg_check_nondata_nodes_order(struct ubifs_info *c,
struct list_head *head) { return 0; }
-static inline int dbg_force_in_the_gaps(void) { return 0; }
-#define dbg_force_in_the_gaps_enabled() 0
-#define dbg_failure_mode 0
+static inline int dbg_leb_write(struct ubifs_info *c, int lnum,
+ const void *buf, int offset,
+ int len, int dtype) { return 0; }
+static inline int dbg_leb_change(struct ubifs_info *c, int lnum,
+ const void *buf, int len,
+ int dtype) { return 0; }
+static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; }
+static inline int dbg_leb_map(struct ubifs_info *c, int lnum,
+ int dtype) { return 0; }
+
+static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; }
+static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; }
static inline int dbg_debugfs_init(void) { return 0; }
static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ef5abd3..6834920 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -102,7 +102,7 @@
* UBIFS has to fully control "clean <-> dirty" transitions of inodes
* to make budgeting work.
*/
- inode->i_flags |= (S_NOCMTIME);
+ inode->i_flags |= S_NOCMTIME;
inode_init_owner(inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
@@ -172,9 +172,11 @@
#ifdef CONFIG_UBIFS_FS_DEBUG
-static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
+static int dbg_check_name(const struct ubifs_info *c,
+ const struct ubifs_dent_node *dent,
+ const struct qstr *nm)
{
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (le16_to_cpu(dent->nlen) != nm->len)
return -EINVAL;
@@ -185,7 +187,7 @@
#else
-#define dbg_check_name(dent, nm) 0
+#define dbg_check_name(c, dent, nm) 0
#endif
@@ -219,7 +221,7 @@
goto out;
}
- if (dbg_check_name(dent, &dentry->d_name)) {
+ if (dbg_check_name(c, dent, &dentry->d_name)) {
err = -EINVAL;
goto out;
}
@@ -522,7 +524,7 @@
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- err = dbg_check_synced_i_size(inode);
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -577,7 +579,7 @@
inode->i_nlink, dir->i_ino);
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- err = dbg_check_synced_i_size(inode);
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5e7fccf..f9c234b 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1263,7 +1263,7 @@
if (err)
return err;
- err = dbg_check_synced_i_size(inode);
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -1304,7 +1304,7 @@
return NULL;
}
-int ubifs_fsync(struct file *file, int datasync)
+int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1319,14 +1319,16 @@
*/
return 0;
- /*
- * VFS has already synchronized dirty pages for this inode. Synchronize
- * the inode unless this is a 'datasync()' call.
- */
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&inode->i_mutex);
+
+ /* Synchronize the inode unless this is a 'datasync()' call. */
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
- return err;
+ goto out;
}
/*
@@ -1334,10 +1336,9 @@
* them.
*/
err = ubifs_sync_wbufs_by_inode(c, inode);
- if (err)
- return err;
-
- return 0;
+out:
+ mutex_unlock(&inode->i_mutex);
+ return err;
}
/**
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3be645e..9228950 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -86,8 +86,125 @@
c->no_chk_data_crc = 0;
c->vfs_sb->s_flags |= MS_RDONLY;
ubifs_warn("switched to read-only mode, error %d", err);
+ dump_stack();
+ }
+}
+
+/*
+ * Below are simple wrappers over UBI I/O functions which include some
+ * additional checks and UBIFS debugging stuff. See corresponding UBI function
+ * for more information.
+ */
+
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+ int len, int even_ebadmsg)
+{
+ int err;
+
+ err = ubi_read(c->ubi, lnum, buf, offs, len);
+ /*
+ * In case of %-EBADMSG print the error message only if the
+ * @even_ebadmsg is true.
+ */
+ if (err && (err != -EBADMSG || even_ebadmsg)) {
+ ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
+ len, lnum, offs, err);
dbg_dump_stack();
}
+ return err;
+}
+
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len, int dtype)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+ else
+ err = dbg_leb_write(c, lnum, buf, offs, len, dtype);
+ if (err) {
+ ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
+ len, lnum, offs, err);
+ ubifs_ro_mode(c, err);
+ dbg_dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
+ int dtype)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+ else
+ err = dbg_leb_change(c, lnum, buf, len, dtype);
+ if (err) {
+ ubifs_err("changing %d bytes in LEB %d failed, error %d",
+ len, lnum, err);
+ ubifs_ro_mode(c, err);
+ dbg_dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_unmap(c->ubi, lnum);
+ else
+ err = dbg_leb_unmap(c, lnum);
+ if (err) {
+ ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+ ubifs_ro_mode(c, err);
+ dbg_dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_map(c->ubi, lnum, dtype);
+ else
+ err = dbg_leb_map(c, lnum, dtype);
+ if (err) {
+ ubifs_err("mapping LEB %d failed, error %d", lnum, err);
+ ubifs_ro_mode(c, err);
+ dbg_dump_stack();
+ }
+ return err;
+}
+
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
+{
+ int err;
+
+ err = ubi_is_mapped(c->ubi, lnum);
+ if (err < 0) {
+ ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
+ lnum, err);
+ dbg_dump_stack();
+ }
+ return err;
}
/**
@@ -406,14 +523,10 @@
dirt = sync_len - wbuf->used;
if (dirt)
ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
- sync_len, wbuf->dtype);
- if (err) {
- ubifs_err("cannot write %d bytes to LEB %d:%d",
- sync_len, wbuf->lnum, wbuf->offs);
- dbg_dump_stack();
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len,
+ wbuf->dtype);
+ if (err)
return err;
- }
spin_lock(&wbuf->lock);
wbuf->offs += sync_len;
@@ -605,9 +718,9 @@
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
- wbuf->offs, wbuf->size,
- wbuf->dtype);
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
+ wbuf->offs, wbuf->size,
+ wbuf->dtype);
if (err)
goto out;
@@ -642,8 +755,8 @@
dbg_io("flush jhead %s wbuf to LEB %d:%d",
dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
- wbuf->size, wbuf->dtype);
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
+ wbuf->size, wbuf->dtype);
if (err)
goto out;
@@ -661,8 +774,8 @@
*/
dbg_io("write %d bytes to LEB %d:%d",
wbuf->size, wbuf->lnum, wbuf->offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
- wbuf->size, wbuf->dtype);
+ err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
+ wbuf->size, wbuf->dtype);
if (err)
goto out;
@@ -683,8 +796,8 @@
n <<= c->max_write_shift;
dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
wbuf->offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
- wbuf->offs, n, wbuf->dtype);
+ err = ubifs_leb_write(c, wbuf->lnum, buf + written,
+ wbuf->offs, n, wbuf->dtype);
if (err)
goto out;
wbuf->offs += n;
@@ -766,13 +879,9 @@
return -EROFS;
ubifs_prepare_node(c, buf, len, 1);
- err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
- if (err) {
- ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
- buf_len, lnum, offs, err);
+ err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype);
+ if (err)
dbg_dump_node(c, buf);
- dbg_dump_stack();
- }
return err;
}
@@ -824,13 +933,9 @@
if (rlen > 0) {
/* Read everything that goes before write-buffer */
- err = ubi_read(c->ubi, lnum, buf, offs, rlen);
- if (err && err != -EBADMSG) {
- ubifs_err("failed to read node %d from LEB %d:%d, "
- "error %d", type, lnum, offs, err);
- dbg_dump_stack();
+ err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
+ if (err && err != -EBADMSG)
return err;
- }
}
if (type != ch->node_type) {
@@ -885,12 +990,9 @@
ubifs_assert(!(offs & 7) && offs < c->leb_size);
ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
- err = ubi_read(c->ubi, lnum, buf, offs, len);
- if (err && err != -EBADMSG) {
- ubifs_err("cannot read node %d from LEB %d:%d, error %d",
- type, lnum, offs, err);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
+ if (err && err != -EBADMSG)
return err;
- }
if (type != ch->node_type) {
ubifs_err("bad node type (%d but expected %d)",
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index affea94..f9fd068 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -262,7 +262,7 @@
* an unclean reboot, because the target LEB might have been
* unmapped, but not yet physically erased.
*/
- err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
+ err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM);
if (err)
goto out_unlock;
}
@@ -283,8 +283,6 @@
return 0;
out_unlock:
- if (err != -EAGAIN)
- ubifs_ro_mode(c, err);
mutex_unlock(&c->log_mutex);
kfree(ref);
kfree(bud);
@@ -752,7 +750,7 @@
struct ubifs_bud *bud;
long long bud_bytes = 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
spin_lock(&c->buds_lock);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 667884f..f8a181e 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -504,7 +504,7 @@
pnode = (struct ubifs_pnode *)container_of(lprops - pos,
struct ubifs_pnode,
lprops[0]);
- return !test_bit(COW_ZNODE, &pnode->flags) &&
+ return !test_bit(COW_CNODE, &pnode->flags) &&
test_bit(DIRTY_CNODE, &pnode->flags);
}
@@ -860,7 +860,7 @@
struct list_head *pos;
int i, cat;
- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
return 0;
list_for_each_entry(lprops, &c->empty_list, list) {
@@ -958,7 +958,7 @@
{
int i = 0, j, err = 0;
- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
return;
for (i = 0; i < heap->cnt; i++) {
@@ -1262,7 +1262,7 @@
int i, err;
struct ubifs_lp_stats lst;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
/*
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ef5155e..6189c74 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -701,8 +701,8 @@
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen,
+ UBI_SHORTTERM);
if (err)
goto out;
p = buf;
@@ -732,8 +732,8 @@
set_ltab(c, lnum, c->leb_size - alen,
alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen,
+ UBI_SHORTTERM);
if (err)
goto out;
p = buf;
@@ -780,8 +780,8 @@
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen,
+ UBI_SHORTTERM);
if (err)
goto out;
p = buf;
@@ -806,7 +806,7 @@
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM);
if (err)
goto out;
p = buf;
@@ -826,7 +826,7 @@
/* Write remaining buffer */
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM);
if (err)
goto out;
@@ -1222,7 +1222,7 @@
if (c->big_lpt)
nnode->num = calc_nnode_num_from_parent(c, parent, iip);
} else {
- err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
+ err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1);
if (err)
goto out;
err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1247,6 +1247,7 @@
out:
ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
+ dbg_dump_stack();
kfree(nnode);
return err;
}
@@ -1290,7 +1291,7 @@
lprops->flags = ubifs_categorize_lprops(c, lprops);
}
} else {
- err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
+ err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1);
if (err)
goto out;
err = unpack_pnode(c, buf, pnode);
@@ -1312,6 +1313,7 @@
out:
ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
dbg_dump_pnode(c, pnode, parent, iip);
+ dbg_dump_stack();
dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
kfree(pnode);
return err;
@@ -1331,7 +1333,7 @@
buf = vmalloc(c->ltab_sz);
if (!buf)
return -ENOMEM;
- err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
+ err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
if (err)
goto out;
err = unpack_ltab(c, buf);
@@ -1354,7 +1356,8 @@
buf = vmalloc(c->lsave_sz);
if (!buf)
return -ENOMEM;
- err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
+ err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs,
+ c->lsave_sz, 1);
if (err)
goto out;
err = unpack_lsave(c, buf);
@@ -1814,8 +1817,8 @@
if (c->big_lpt)
nnode->num = calc_nnode_num_from_parent(c, parent, iip);
} else {
- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
- c->nnode_sz);
+ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
+ c->nnode_sz, 1);
if (err)
return ERR_PTR(err);
err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1883,8 +1886,8 @@
ubifs_assert(branch->lnum >= c->lpt_first &&
branch->lnum <= c->lpt_last);
ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
- c->pnode_sz);
+ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
+ c->pnode_sz, 1);
if (err)
return ERR_PTR(err);
err = unpack_pnode(c, buf, pnode);
@@ -2224,7 +2227,7 @@
struct ubifs_cnode *cn;
int num, iip = 0, err;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
while (cnode) {
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index dfcb574..cddd6bd 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -27,6 +27,7 @@
#include <linux/crc16.h>
#include <linux/slab.h>
+#include <linux/random.h>
#include "ubifs.h"
#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -116,8 +117,8 @@
return 0;
cnt += 1;
while (1) {
- ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
- __set_bit(COW_ZNODE, &cnode->flags);
+ ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
+ __set_bit(COW_CNODE, &cnode->flags);
cnext = next_dirty_cnode(cnode);
if (!cnext) {
cnode->cnext = c->lpt_cnext;
@@ -465,7 +466,7 @@
*/
clear_bit(DIRTY_CNODE, &cnode->flags);
smp_mb__before_clear_bit();
- clear_bit(COW_ZNODE, &cnode->flags);
+ clear_bit(COW_CNODE, &cnode->flags);
smp_mb__after_clear_bit();
offs += len;
dbg_chk_lpt_sz(c, 1, len);
@@ -1160,11 +1161,11 @@
void *buf = c->lpt_buf;
dbg_lp("LEB %d", lnum);
- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
- if (err) {
- ubifs_err("cannot read LEB %d, error %d", lnum, err);
+
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
return err;
- }
+
while (1) {
if (!is_a_node(c, buf, len)) {
int pad_len;
@@ -1640,7 +1641,7 @@
int ret;
void *buf, *p;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -1650,11 +1651,11 @@
}
dbg_lp("LEB %d", lnum);
- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
- if (err) {
- dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
+
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
goto out;
- }
+
while (1) {
if (!is_a_node(c, p, len)) {
int i, pad_len;
@@ -1711,7 +1712,7 @@
{
int lnum, err, i, cnt;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
/* Bring the entire tree into memory */
@@ -1754,7 +1755,7 @@
long long free = 0;
int i;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
for (i = 0; i < c->lpt_lebs; i++) {
@@ -1796,7 +1797,7 @@
long long chk_lpt_sz, lpt_sz;
int err = 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
switch (action) {
@@ -1901,11 +1902,10 @@
return;
}
- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
- if (err) {
- ubifs_err("cannot read LEB %d, error %d", lnum, err);
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
goto out;
- }
+
while (1) {
offs = c->leb_size - len;
if (!is_a_node(c, p, len)) {
@@ -2019,7 +2019,7 @@
struct ubifs_lpt_heap *heap;
int i;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (random32() & 3)
return 0;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 0b5296a..ee7cb5e 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -39,6 +39,29 @@
}
/**
+ * ubifs_zn_obsolete - check if znode is obsolete.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is obsolete and %0 otherwise.
+ */
+static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
+{
+ return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
+}
+
+/**
+ * ubifs_zn_cow - check if znode has to be copied on write.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is has COW flag set and %0
+ * otherwise.
+ */
+static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
+{
+ return !!test_bit(COW_ZNODE, &znode->flags);
+}
+
+/**
* ubifs_wake_up_bgt - wake up background thread.
* @c: UBIFS file-system description object
*/
@@ -122,86 +145,6 @@
}
/**
- * ubifs_leb_unmap - unmap an LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to unmap
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
-{
- int err;
-
- ubifs_assert(!c->ro_media && !c->ro_mount);
- if (c->ro_error)
- return -EROFS;
- err = ubi_leb_unmap(c->ubi, lnum);
- if (err) {
- ubifs_err("unmap LEB %d failed, error %d", lnum, err);
- return err;
- }
-
- return 0;
-}
-
-/**
- * ubifs_leb_write - write to a LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @offs: offset within LEB to write to
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
- const void *buf, int offs, int len, int dtype)
-{
- int err;
-
- ubifs_assert(!c->ro_media && !c->ro_mount);
- if (c->ro_error)
- return -EROFS;
- err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
- if (err) {
- ubifs_err("writing %d bytes at %d:%d, error %d",
- len, lnum, offs, err);
- return err;
- }
-
- return 0;
-}
-
-/**
- * ubifs_leb_change - atomic LEB change.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
- const void *buf, int len, int dtype)
-{
- int err;
-
- ubifs_assert(!c->ro_media && !c->ro_mount);
- if (c->ro_error)
- return -EROFS;
- err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
- if (err) {
- ubifs_err("changing %d bytes in LEB %d, error %d",
- len, lnum, err);
- return err;
- }
-
- return 0;
-}
-
-/**
* ubifs_encode_dev - encode device node IDs.
* @dev: UBIFS device node information
* @rdev: device IDs to encode
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index a5422ff..c542c73 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -929,7 +929,7 @@
struct check_info ci;
int err;
- if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
+ if (!dbg_is_chk_orph(c))
return 0;
ci.last_ino = 0;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 783d8e0..af02790 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -117,7 +117,7 @@
if (!sbuf)
return -ENOMEM;
- err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
+ err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
if (err && err != -EBADMSG)
goto out_free;
@@ -213,10 +213,10 @@
mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
- err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM);
if (err)
goto out;
- err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM);
if (err)
goto out;
out:
@@ -274,7 +274,8 @@
if (cor1)
goto out_err;
mst = mst1;
- } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
+ } else if (offs1 == 0 &&
+ c->leb_size - offs2 - sz < sz) {
/* 1st LEB was unmapped and written, 2nd not */
if (cor1)
goto out_err;
@@ -539,8 +540,8 @@
int len = ALIGN(endpt, c->min_io_size);
if (start) {
- err = ubi_read(c->ubi, lnum, sleb->buf, 0,
- start);
+ err = ubifs_leb_read(c, lnum, sleb->buf, 0,
+ start, 1);
if (err)
return err;
}
@@ -554,8 +555,8 @@
ubifs_pad(c, buf, pad_len);
}
}
- err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
- UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, sleb->buf, len,
+ UBI_UNKNOWN);
if (err)
return err;
}
@@ -819,7 +820,8 @@
return -ENOMEM;
if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
goto out_err;
- err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
+ err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
+ UBIFS_CS_NODE_SZ, 0);
if (err && err != -EBADMSG)
goto out_free;
ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
@@ -919,8 +921,7 @@
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int recover_head(const struct ubifs_info *c, int lnum, int offs,
- void *sbuf)
+static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
{
int len = c->max_write_size, err;
@@ -931,15 +932,15 @@
return 0;
/* Read at the head location and check it is empty flash */
- err = ubi_read(c->ubi, lnum, sbuf, offs, len);
+ err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
if (err || !is_empty(sbuf, len)) {
dbg_rcvry("cleaning head at %d:%d", lnum, offs);
if (offs == 0)
return ubifs_leb_unmap(c, lnum);
- err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
+ err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
if (err)
return err;
- return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
+ return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN);
}
return 0;
@@ -962,7 +963,7 @@
*
* This function returns %0 on success and a negative error code on failure.
*/
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
{
int err;
@@ -993,7 +994,7 @@
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int clean_an_unclean_leb(const struct ubifs_info *c,
+static int clean_an_unclean_leb(struct ubifs_info *c,
struct ubifs_unclean_leb *ucleb, void *sbuf)
{
int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
@@ -1009,7 +1010,7 @@
return 0;
}
- err = ubi_read(c->ubi, lnum, buf, offs, len);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
if (err && err != -EBADMSG)
return err;
@@ -1069,7 +1070,7 @@
}
/* Write back the LEB atomically */
- err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN);
if (err)
return err;
@@ -1089,7 +1090,7 @@
*
* This function returns %0 on success and a negative error code on failure.
*/
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
{
dbg_rcvry("recovery");
while (!list_empty(&c->unclean_leb_list)) {
@@ -1454,7 +1455,7 @@
if (i_size >= e->d_size)
return 0;
/* Read the LEB */
- err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
+ err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
if (err)
goto out;
/* Change the size field and recalculate the CRC */
@@ -1470,7 +1471,7 @@
len -= 1;
len = ALIGN(len + 1, c->min_io_size);
/* Atomically write the fixed LEB back again */
- err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
if (err)
goto out;
dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 5e97161..ccabaf1 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -523,8 +523,7 @@
if (!list_is_last(&next->list, &jh->buds_list))
return 0;
- err = ubi_read(c->ubi, next->lnum, (char *)&data,
- next->start, 4);
+ err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
if (err)
return 0;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c606f01..93d938a 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -674,15 +674,15 @@
if (len == 0) {
dbg_mnt("unmap empty LEB %d", lnum);
- return ubi_leb_unmap(c->ubi, lnum);
+ return ubifs_leb_unmap(c, lnum);
}
dbg_mnt("fixup LEB %d, data len %d", lnum, len);
- err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
+ err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
if (err)
return err;
- return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+ return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
}
/**
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 36216b4..37383e8 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -148,7 +148,7 @@
INIT_LIST_HEAD(&sleb->nodes);
sleb->buf = sbuf;
- err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
+ err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
if (err && err != -EBADMSG) {
ubifs_err("cannot read %d bytes from LEB %d:%d,"
" error %d", c->leb_size - offs, lnum, offs, err);
@@ -240,7 +240,7 @@
int len;
ubifs_err("corruption at LEB %d:%d", lnum, offs);
- if (dbg_failure_mode)
+ if (dbg_is_tst_rcvry(c))
return;
len = c->leb_size - offs;
if (len > 8192)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 529be05..b281212 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -85,7 +85,7 @@
if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
return 4;
- if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
+ if (ui->xattr && !S_ISREG(inode->i_mode))
return 5;
if (!ubifs_compr_present(ui->compr_type)) {
@@ -94,7 +94,7 @@
ubifs_compr_name(ui->compr_type));
}
- err = dbg_check_dir_size(c, inode);
+ err = dbg_check_dir(c, inode);
return err;
}
@@ -914,7 +914,7 @@
c->empty = 1;
for (lnum = 0; lnum < c->leb_cnt; lnum++) {
- err = ubi_is_mapped(c->ubi, lnum);
+ err = ubifs_is_mapped(c, lnum);
if (unlikely(err < 0))
return err;
if (err == 1) {
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 91b4213..0667386 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -223,7 +223,7 @@
__set_bit(DIRTY_ZNODE, &zn->flags);
__clear_bit(COW_ZNODE, &zn->flags);
- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_obsolete(znode));
__set_bit(OBSOLETE_ZNODE, &znode->flags);
if (znode->level != 0) {
@@ -271,7 +271,7 @@
struct ubifs_znode *zn;
int err;
- if (!test_bit(COW_ZNODE, &znode->flags)) {
+ if (!ubifs_zn_cow(znode)) {
/* znode is not being committed */
if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
atomic_long_inc(&c->dirty_zn_cnt);
@@ -462,7 +462,7 @@
dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
- err = ubi_read(c->ubi, lnum, buf, offs, len);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
if (err) {
ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
type, lnum, offs, err);
@@ -1666,7 +1666,7 @@
if (!overlap) {
/* We may safely unlock the write-buffer and read the data */
spin_unlock(&wbuf->lock);
- return ubi_read(c->ubi, lnum, buf, offs, len);
+ return ubifs_leb_read(c, lnum, buf, offs, len, 0);
}
/* Don't read under wbuf */
@@ -1680,7 +1680,7 @@
if (rlen > 0)
/* Read everything that goes before write-buffer */
- return ubi_read(c->ubi, lnum, buf, offs, rlen);
+ return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
return 0;
}
@@ -1767,7 +1767,7 @@
if (wbuf)
err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
else
- err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
+ err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
/* Check for a race with GC */
if (maybe_leb_gced(c, lnum, bu->gc_seq))
@@ -2423,7 +2423,7 @@
*/
do {
- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_obsolete(znode));
ubifs_assert(ubifs_zn_dirty(znode));
zp = znode->parent;
@@ -2479,9 +2479,8 @@
c->zroot.offs = zbr->offs;
c->zroot.len = zbr->len;
c->zroot.znode = znode;
- ubifs_assert(!test_bit(OBSOLETE_ZNODE,
- &zp->flags));
- ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
+ ubifs_assert(!ubifs_zn_obsolete(zp));
+ ubifs_assert(ubifs_zn_dirty(zp));
atomic_long_dec(&c->dirty_zn_cnt);
if (zp->cnext) {
@@ -2865,7 +2864,7 @@
struct ubifs_znode *znode = cnext;
cnext = cnext->cnext;
- if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+ if (ubifs_zn_obsolete(znode))
kfree(znode);
} while (cnext && cnext != c->cnext);
}
@@ -3301,7 +3300,7 @@
if (!S_ISREG(inode->i_mode))
return 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
@@ -3337,9 +3336,10 @@
ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
"(data key %s)", (unsigned long)inode->i_ino, size,
((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
+ mutex_unlock(&c->tnc_mutex);
dbg_dump_inode(c, inode);
dbg_dump_stack();
- err = -EINVAL;
+ return -EINVAL;
out_unlock:
mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 41920f3..4c15f07 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -22,6 +22,7 @@
/* This file implements TNC functions for committing */
+#include <linux/random.h>
#include "ubifs.h"
/**
@@ -87,8 +88,12 @@
atomic_long_dec(&c->dirty_zn_cnt);
ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(ubifs_zn_cow(znode));
+ /*
+ * Note, unlike 'write_index()' we do not add memory barriers here
+ * because this function is called with @c->tnc_mutex locked.
+ */
__clear_bit(DIRTY_ZNODE, &znode->flags);
__clear_bit(COW_ZNODE, &znode->flags);
@@ -377,7 +382,7 @@
c->gap_lebs = NULL;
return err;
}
- if (dbg_force_in_the_gaps_enabled()) {
+ if (!dbg_is_chk_index(c)) {
/*
* Do not print scary warnings if the debugging
* option which forces in-the-gaps is enabled.
@@ -491,25 +496,6 @@
else
next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
- if (c->min_io_size == 1) {
- buf_offs += ALIGN(len, 8);
- if (next_len) {
- if (buf_offs + next_len <= c->leb_size)
- continue;
- err = ubifs_update_one_lp(c, lnum, 0,
- c->leb_size - buf_offs, 0, 0);
- if (err)
- return err;
- lnum = -1;
- continue;
- }
- err = ubifs_update_one_lp(c, lnum,
- c->leb_size - buf_offs, 0, 0, 0);
- if (err)
- return err;
- break;
- }
-
/* Update buffer positions */
wlen = used + len;
used += ALIGN(len, 8);
@@ -658,7 +644,7 @@
}
cnt += 1;
while (1) {
- ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_cow(znode));
__set_bit(COW_ZNODE, &znode->flags);
znode->alt = 0;
cnext = find_next_dirty(znode);
@@ -704,7 +690,7 @@
c->ilebs[c->ileb_cnt++] = lnum;
dbg_cmt("LEB %d", lnum);
}
- if (dbg_force_in_the_gaps())
+ if (dbg_is_chk_index(c) && !(random32() & 7))
return -ENOSPC;
return 0;
}
@@ -830,7 +816,7 @@
struct ubifs_idx_node *idx;
struct ubifs_znode *znode, *cnext;
int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
- int avail, wlen, err, lnum_pos = 0;
+ int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
cnext = c->enext;
if (!cnext)
@@ -907,7 +893,7 @@
cnext = znode->cnext;
ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(ubifs_zn_cow(znode));
/*
* It is important that other threads should see %DIRTY_ZNODE
@@ -922,6 +908,28 @@
clear_bit(COW_ZNODE, &znode->flags);
smp_mb__after_clear_bit();
+ /*
+ * We have marked the znode as clean but have not updated the
+ * @c->clean_zn_cnt counter. If this znode becomes dirty again
+ * before 'free_obsolete_znodes()' is called, then
+ * @c->clean_zn_cnt will be decremented before it gets
+ * incremented (resulting in 2 decrements for the same znode).
+ * This means that @c->clean_zn_cnt may become negative for a
+ * while.
+ *
+ * Q: why we cannot increment @c->clean_zn_cnt?
+ * A: because we do not have the @c->tnc_mutex locked, and the
+ * following code would be racy and buggy:
+ *
+ * if (!ubifs_zn_obsolete(znode)) {
+ * atomic_long_inc(&c->clean_zn_cnt);
+ * atomic_long_inc(&ubifs_clean_zn_cnt);
+ * }
+ *
+ * Thus, we just delay the @c->clean_zn_cnt update until we
+ * have the mutex locked.
+ */
+
/* Do not access znode from this point on */
/* Update buffer positions */
@@ -938,65 +946,38 @@
else
next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
- if (c->min_io_size == 1) {
- /*
- * Write the prepared index node immediately if there is
- * no minimum IO size
- */
- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
- wlen, UBI_SHORTTERM);
- if (err)
- return err;
- buf_offs += ALIGN(wlen, 8);
- if (next_len) {
- used = 0;
- avail = buf_len;
- if (buf_offs + next_len > c->leb_size) {
- err = ubifs_update_one_lp(c, lnum,
- LPROPS_NC, 0, 0, LPROPS_TAKEN);
- if (err)
- return err;
- lnum = -1;
- }
+ nxt_offs = buf_offs + used + next_len;
+ if (next_len && nxt_offs <= c->leb_size) {
+ if (avail > 0)
continue;
- }
+ else
+ blen = buf_len;
} else {
- int blen, nxt_offs = buf_offs + used + next_len;
+ wlen = ALIGN(wlen, 8);
+ blen = ALIGN(wlen, c->min_io_size);
+ ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+ }
- if (next_len && nxt_offs <= c->leb_size) {
- if (avail > 0)
- continue;
- else
- blen = buf_len;
- } else {
- wlen = ALIGN(wlen, 8);
- blen = ALIGN(wlen, c->min_io_size);
- ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+ /* The buffer is full or there are no more znodes to do */
+ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen,
+ UBI_SHORTTERM);
+ if (err)
+ return err;
+ buf_offs += blen;
+ if (next_len) {
+ if (nxt_offs > c->leb_size) {
+ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
+ 0, LPROPS_TAKEN);
+ if (err)
+ return err;
+ lnum = -1;
}
- /*
- * The buffer is full or there are no more znodes
- * to do
- */
- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
- blen, UBI_SHORTTERM);
- if (err)
- return err;
- buf_offs += blen;
- if (next_len) {
- if (nxt_offs > c->leb_size) {
- err = ubifs_update_one_lp(c, lnum,
- LPROPS_NC, 0, 0, LPROPS_TAKEN);
- if (err)
- return err;
- lnum = -1;
- }
- used -= blen;
- if (used < 0)
- used = 0;
- avail = buf_len - used;
- memmove(c->cbuf, c->cbuf + blen, used);
- continue;
- }
+ used -= blen;
+ if (used < 0)
+ used = 0;
+ avail = buf_len - used;
+ memmove(c->cbuf, c->cbuf + blen, used);
+ continue;
}
break;
}
@@ -1029,7 +1010,7 @@
do {
znode = cnext;
cnext = znode->cnext;
- if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+ if (ubifs_zn_obsolete(znode))
kfree(znode);
else {
znode->cnext = NULL;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f79983d..27f2255 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -230,14 +230,14 @@
* LPT cnode flag bits.
*
* DIRTY_CNODE: cnode is dirty
- * COW_CNODE: cnode is being committed and must be copied before writing
* OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
- * so it can (and must) be freed when the commit is finished
+ * so it can (and must) be freed when the commit is finished
+ * COW_CNODE: cnode is being committed and must be copied before writing
*/
enum {
DIRTY_CNODE = 0,
- COW_CNODE = 1,
- OBSOLETE_CNODE = 2,
+ OBSOLETE_CNODE = 1,
+ COW_CNODE = 2,
};
/*
@@ -1468,6 +1468,15 @@
/* io.c */
void ubifs_ro_mode(struct ubifs_info *c, int err);
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+ int len, int even_ebadmsg);
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len, int dtype);
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
+ int dtype);
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
+int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype);
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
int dtype);
@@ -1720,7 +1729,7 @@
int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
-int ubifs_fsync(struct file *file, int datasync);
+int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
/* dir.c */
@@ -1747,8 +1756,8 @@
int offs, void *sbuf, int jhead);
struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf);
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
int ubifs_rcvry_gc_commit(struct ubifs_info *c);
int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
int deletion, loff_t new_size);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2a346bb..d8ffa7c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -150,7 +150,7 @@
long old_block, new_block;
int result = -EINVAL;
- if (file_permission(filp, MAY_READ) != 0) {
+ if (inode_permission(inode, MAY_READ) != 0) {
udf_debug("no permission to access inode %lu\n", inode->i_ino);
result = -EPERM;
goto out;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 29309e2..639d491 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -56,16 +56,10 @@
lock_ufs(dir->i_sb);
ino = ufs_inode_by_name(dir, &dentry->d_name);
- if (ino) {
+ if (ino)
inode = ufs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- unlock_ufs(dir->i_sb);
- return ERR_CAST(inode);
- }
- }
unlock_ufs(dir->i_sb);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 284a7c8..75bb316 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,8 +88,6 @@
xfs_vnodeops.o \
xfs_rw.o
-xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
-
# Objects in linux/
xfs-y += $(addprefix $(XFS_LINUX)/, \
kmem.o \
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 39f4f80..cac48fe 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,7 +219,7 @@
}
int
-xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
+xfs_check_acl(struct inode *inode, int mask)
{
struct xfs_inode *ip;
struct posix_acl *acl;
@@ -235,7 +235,7 @@
if (!XFS_IFORK_Q(ip))
return -EAGAIN;
- if (flags & IPERM_FLAG_RCU) {
+ if (mask & MAY_NOT_BLOCK) {
if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
return -ECHILD;
return -EAGAIN;
@@ -264,7 +264,7 @@
iattr.ia_mode = mode;
iattr.ia_ctime = current_fs_time(inode->i_sb);
- error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
return error;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 79ce38b..63e971e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@
isize = xfs_ioend_new_eof(ioend);
if (isize) {
+ trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
ip->i_d.di_size = isize;
xfs_mark_inode_dirty(ip);
}
@@ -894,11 +895,6 @@
* For unwritten space on the page we need to start the conversion to
* regular allocated space.
* For any other dirty buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush the page, we
- * have to check the process flags first, if we are already in a transaction
- * or disk I/O during allocations is off, we need to fail the writepage and
- * redirty the page.
*/
STATIC int
xfs_vm_writepage(
@@ -906,7 +902,6 @@
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
- int delalloc, unwritten;
struct buffer_head *bh, *head;
struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
@@ -938,15 +933,10 @@
goto redirty;
/*
- * We need a transaction if there are delalloc or unwritten buffers
- * on the page.
- *
- * If we need a transaction and the process flags say we are already
- * in a transaction, or no IO is allowed then mark the page dirty
- * again and leave the page as is.
+ * Given that we do not allow direct reclaim to call us, we should
+ * never be called while in a filesystem transaction.
*/
- xfs_count_page_state(page, &delalloc, &unwritten);
- if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
+ if (WARN_ON(current->flags & PF_FSTRANS))
goto redirty;
/* Is this page beyond the end of the file? */
@@ -970,7 +960,7 @@
offset = page_offset(page);
type = IO_OVERWRITE;
- if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
+ if (wbc->sync_mode == WB_SYNC_NONE)
nonblocking = 1;
do {
@@ -1339,6 +1329,9 @@
} else {
xfs_finish_ioend_sync(ioend);
}
+
+ /* XXX: probably should move into the real I/O completion handler */
+ inode_dio_done(ioend->io_inode);
}
STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5e68099..b2b4119 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -499,16 +499,14 @@
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
- if (xfs_buf_cond_lock(bp)) {
- /* failed, so wait for the lock if requested. */
- if (!(flags & XBF_TRYLOCK)) {
- xfs_buf_lock(bp);
- XFS_STATS_INC(xb_get_locked_waited);
- } else {
+ if (!xfs_buf_trylock(bp)) {
+ if (flags & XBF_TRYLOCK) {
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
+ xfs_buf_lock(bp);
+ XFS_STATS_INC(xb_get_locked_waited);
}
/*
@@ -594,10 +592,8 @@
ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
- bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+ bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
status = xfs_buf_iorequest(bp);
if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
@@ -681,7 +677,6 @@
return NULL;
/* set up the buffer for a read IO */
- xfs_buf_lock(bp);
XFS_BUF_SET_ADDR(bp, daddr);
XFS_BUF_READ(bp);
XFS_BUF_BUSY(bp);
@@ -816,8 +811,6 @@
goto fail_free_mem;
}
- xfs_buf_unlock(bp);
-
trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
@@ -896,8 +889,8 @@
* to push on stale inode buffers.
*/
int
-xfs_buf_cond_lock(
- xfs_buf_t *bp)
+xfs_buf_trylock(
+ struct xfs_buf *bp)
{
int locked;
@@ -907,15 +900,8 @@
else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
xfs_log_force(bp->b_target->bt_mount, 0);
- trace_xfs_buf_cond_lock(bp, _RET_IP_);
- return locked ? 0 : -EBUSY;
-}
-
-int
-xfs_buf_lock_value(
- xfs_buf_t *bp)
-{
- return bp->b_sema.count;
+ trace_xfs_buf_trylock(bp, _RET_IP_);
+ return locked;
}
/*
@@ -929,7 +915,7 @@
*/
void
xfs_buf_lock(
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
trace_xfs_buf_lock(bp, _RET_IP_);
@@ -950,7 +936,7 @@
*/
void
xfs_buf_unlock(
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
atomic_inc(&bp->b_hold);
@@ -1121,7 +1107,7 @@
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_iodone = NULL;
if (!(fl & XBF_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
@@ -1223,23 +1209,21 @@
total_nr_pages = bp->b_page_count;
map_i = 0;
- if (bp->b_flags & XBF_ORDERED) {
- ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_FLUSH_FUA;
- } else if (bp->b_flags & XBF_LOG_BUFFER) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
- } else if (bp->b_flags & _XBF_RUN_QUEUES) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
+ if (bp->b_flags & XBF_WRITE) {
+ if (bp->b_flags & XBF_SYNCIO)
+ rw = WRITE_SYNC;
+ else
+ rw = WRITE;
+ if (bp->b_flags & XBF_FUA)
+ rw |= REQ_FUA;
+ if (bp->b_flags & XBF_FLUSH)
+ rw |= REQ_FLUSH;
+ } else if (bp->b_flags & XBF_READ_AHEAD) {
+ rw = READA;
} else {
- rw = (bp->b_flags & XBF_WRITE) ? WRITE :
- (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
+ rw = READ;
}
-
next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1694,15 +1678,14 @@
list_for_each_entry_safe(bp, n, dwq, b_list) {
ASSERT(bp->b_flags & XBF_DELWRI);
- if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
if (!force &&
time_before(jiffies, bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
break;
}
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
- _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
trace_xfs_buf_delwri_split(bp, _RET_IP_);
@@ -1738,14 +1721,6 @@
return 0;
}
-void
-xfs_buf_delwri_sort(
- xfs_buftarg_t *target,
- struct list_head *list)
-{
- list_sort(NULL, list, xfs_buf_cmp);
-}
-
STATIC int
xfsbufd(
void *data)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5f..6a83b46 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -46,43 +46,46 @@
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
-#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_ORDERED (1 << 11)/* use ordered writes */
-#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
-#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
/* flags used only as arguments to access routines */
-#define XBF_LOCK (1 << 14)/* lock requested */
-#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
+#define XBF_LOCK (1 << 15)/* lock requested */
+#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
/* flags used only internally */
-#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
-#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
-#define _XBF_KMEM (1 << 20)/* backed by heap memory */
-#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
+#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
{ XBF_READ, "READ" }, \
{ XBF_WRITE, "WRITE" }, \
+ { XBF_READ_AHEAD, "READ_AHEAD" }, \
{ XBF_MAPPED, "MAPPED" }, \
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_DELWRI, "DELWRI" }, \
{ XBF_STALE, "STALE" }, \
- { XBF_ORDERED, "ORDERED" }, \
- { XBF_READ_AHEAD, "READ_AHEAD" }, \
+ { XBF_SYNCIO, "SYNCIO" }, \
+ { XBF_FUA, "FUA" }, \
+ { XBF_FLUSH, "FLUSH" }, \
{ XBF_LOCK, "LOCK" }, /* should never be set */\
{ XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
{ XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
- { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }
@@ -91,11 +94,6 @@
XBT_FORCE_FLUSH = 1,
} xfs_buftarg_flags_t;
-typedef struct xfs_bufhash {
- struct list_head bh_list;
- spinlock_t bh_lock;
-} xfs_bufhash_t;
-
typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
@@ -151,7 +149,7 @@
xfs_buf_iodone_t b_iodone; /* I/O completion function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
- void *b_fspriv2;
+ struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
unsigned long b_queuetime; /* time buffer was queued */
@@ -192,10 +190,11 @@
extern void xfs_buf_rele(xfs_buf_t *);
/* Locking and Unlocking Buffers */
-extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int xfs_buf_lock_value(xfs_buf_t *);
+extern int xfs_buf_trylock(xfs_buf_t *);
extern void xfs_buf_lock(xfs_buf_t *);
extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+ ((bp)->b_sema.count <= 0)
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
@@ -234,8 +233,9 @@
#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
- ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
+#define XFS_BUF_ZEROFLAGS(bp) \
+ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+ XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
@@ -267,10 +267,6 @@
#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
-#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
-#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
-
#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -280,14 +276,6 @@
#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
-#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
-#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-
-#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
-#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
#define XFS_BUF_SET_START(bp) do { } while (0)
#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
@@ -313,10 +301,6 @@
#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
-#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878f..75e5d32 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -151,14 +151,14 @@
* We don't use ESTALE directly down the chain to not
* confuse applications using bulkstat that expect EINVAL.
*/
- if (error == EINVAL)
+ if (error == EINVAL || error == ENOENT)
error = ESTALE;
return ERR_PTR(-error);
}
if (ip->i_d.di_gen != generation) {
IRELE(ip);
- return ERR_PTR(-ENOENT);
+ return ERR_PTR(-ESTALE);
}
return VFS_I(ip);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 7f782af..cca00f4 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -127,6 +127,8 @@
STATIC int
xfs_file_fsync(
struct file *file,
+ loff_t start,
+ loff_t end,
int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -138,6 +140,10 @@
trace_xfs_file_fsync(ip);
+ error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (error)
+ return error;
+
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
@@ -875,18 +881,11 @@
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
- int error, error2;
xfs_rw_iunlock(ip, iolock);
- error = filemap_write_and_wait_range(mapping, pos, end);
+ ret = -xfs_file_fsync(file, pos, end,
+ (file->f_flags & __O_SYNC) ? 0 : 1);
xfs_rw_ilock(ip, iolock);
-
- error2 = -xfs_file_fsync(file,
- (file->f_flags & __O_SYNC) ? 0 : 1);
- if (error)
- ret = error;
- else if (error2)
- ret = error2;
}
out_unlock:
@@ -944,7 +943,7 @@
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+ error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
}
out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d44d92c..501e4f6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -39,6 +39,7 @@
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include <linux/capability.h>
@@ -497,12 +498,442 @@
return 0;
}
+int
+xfs_setattr_nonsize(
+ struct xfs_inode *ip,
+ struct iattr *iattr,
+ int flags)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ int mask = iattr->ia_valid;
+ xfs_trans_t *tp;
+ int error;
+ uid_t uid = 0, iuid = 0;
+ gid_t gid = 0, igid = 0;
+ struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL;
+
+ trace_xfs_setattr(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ error = -inode_change_ok(inode, iattr);
+ if (error)
+ return XFS_ERROR(error);
+
+ ASSERT((mask & ATTR_SIZE) == 0);
+
+ /*
+ * If disk quotas is on, we make sure that the dquots do exist on disk,
+ * before we start any other transactions. Trying to do this later
+ * is messy. We don't care to take a readlock to look at the ids
+ * in inode here, because we can't hold it across the trans_reserve.
+ * If the IDs do change before we take the ilock, we're covered
+ * because the i_*dquot fields will get updated anyway.
+ */
+ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+ uint qflags = 0;
+
+ if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+ uid = iattr->ia_uid;
+ qflags |= XFS_QMOPT_UQUOTA;
+ } else {
+ uid = ip->i_d.di_uid;
+ }
+ if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+ gid = iattr->ia_gid;
+ qflags |= XFS_QMOPT_GQUOTA;
+ } else {
+ gid = ip->i_d.di_gid;
+ }
+
+ /*
+ * We take a reference when we initialize udqp and gdqp,
+ * so it is important that we never blindly double trip on
+ * the same variable. See xfs_create() for an example.
+ */
+ ASSERT(udqp == NULL);
+ ASSERT(gdqp == NULL);
+ error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+ qflags, &udqp, &gdqp);
+ if (error)
+ return error;
+ }
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (error)
+ goto out_dqrele;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ */
+ if (mask & (ATTR_UID|ATTR_GID)) {
+ /*
+ * These IDs could have changed since we last looked at them.
+ * But, we're assured that if the ownership did change
+ * while we didn't have the inode locked, inode's dquot(s)
+ * would have changed also.
+ */
+ iuid = ip->i_d.di_uid;
+ igid = ip->i_d.di_gid;
+ gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+ uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+ /*
+ * Do a quota reservation only if uid/gid is actually
+ * going to change.
+ */
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+ ASSERT(tp);
+ error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+ capable(CAP_FOWNER) ?
+ XFS_QMOPT_FORCE_RES : 0);
+ if (error) /* out of quota */
+ goto out_trans_cancel;
+ }
+ }
+
+ xfs_trans_ijoin(tp, ip);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ */
+ if (mask & (ATTR_UID|ATTR_GID)) {
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The set-user-ID and set-group-ID bits of a file will be
+ * cleared upon successful return from chown()
+ */
+ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ !capable(CAP_FSETID))
+ ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+ */
+ if (iuid != uid) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(mask & ATTR_UID);
+ ASSERT(udqp);
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_udquot, udqp);
+ }
+ ip->i_d.di_uid = uid;
+ inode->i_uid = uid;
+ }
+ if (igid != gid) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(!XFS_IS_PQUOTA_ON(mp));
+ ASSERT(mask & ATTR_GID);
+ ASSERT(gdqp);
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+ ip->i_d.di_gid = gid;
+ inode->i_gid = gid;
+ }
+ }
+
+ /*
+ * Change file access modes.
+ */
+ if (mask & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+
+ ip->i_d.di_mode &= S_IFMT;
+ ip->i_d.di_mode |= mode & ~S_IFMT;
+
+ inode->i_mode &= S_IFMT;
+ inode->i_mode |= mode & ~S_IFMT;
+ }
+
+ /*
+ * Change file access or modified times.
+ */
+ if (mask & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_CTIME) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Release any dquot(s) the inode had kept before chown.
+ */
+ xfs_qm_dqrele(olddquot1);
+ xfs_qm_dqrele(olddquot2);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+
+ if (error)
+ return XFS_ERROR(error);
+
+ /*
+ * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+ * update. We could avoid this with linked transactions
+ * and passing down the transaction pointer all the way
+ * to attr_set. No previous user of the generic
+ * Posix ACL code seems to care about this issue either.
+ */
+ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+ error = -xfs_acl_chmod(inode);
+ if (error)
+ return XFS_ERROR(error);
+ }
+
+ return 0;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp, 0);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ return error;
+}
+
+/*
+ * Truncate file. Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+ struct xfs_inode *ip,
+ struct iattr *iattr,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ int mask = iattr->ia_valid;
+ struct xfs_trans *tp;
+ int error;
+ uint lock_flags;
+ uint commit_flags = 0;
+
+ trace_xfs_setattr(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ error = -inode_change_ok(inode, iattr);
+ if (error)
+ return XFS_ERROR(error);
+
+ ASSERT(S_ISREG(ip->i_d.di_mode));
+ ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+ ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+ ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+ lock_flags = XFS_ILOCK_EXCL;
+ if (!(flags & XFS_ATTR_NOLOCK))
+ lock_flags |= XFS_IOLOCK_EXCL;
+ xfs_ilock(ip, lock_flags);
+
+ /*
+ * Short circuit the truncate case for zero length files.
+ */
+ if (iattr->ia_size == 0 &&
+ ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+ if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+ goto out_unlock;
+
+ /*
+ * Use the regular setattr path to update the timestamps.
+ */
+ xfs_iunlock(ip, lock_flags);
+ iattr->ia_valid &= ~ATTR_SIZE;
+ return xfs_setattr_nonsize(ip, iattr, 0);
+ }
+
+ /*
+ * Make sure that the dquots are attached to the inode.
+ */
+ error = xfs_qm_dqattach_locked(ip, 0);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Now we can make the changes. Before we join the inode to the
+ * transaction, take care of the part of the truncation that must be
+ * done without the inode lock. This needs to be done before joining
+ * the inode to the transaction, because the inode cannot be unlocked
+ * once it is a part of the transaction.
+ */
+ if (iattr->ia_size > ip->i_size) {
+ /*
+ * Do the first part of growing a file: zero any data in the
+ * last block that is beyond the old EOF. We need to do this
+ * before the inode is joined to the transaction to modify
+ * i_size.
+ */
+ error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+ if (error)
+ goto out_unlock;
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ lock_flags &= ~XFS_ILOCK_EXCL;
+
+ /*
+ * We are going to log the inode size change in this transaction so
+ * any previous writes that are beyond the on disk EOF and the new
+ * EOF that have not been written out need to be written here. If we
+ * do not write the data out, we expose ourselves to the null files
+ * problem.
+ *
+ * Only flush from the on disk size to the smaller of the in memory
+ * file size or the new size as that's the range we really care about
+ * here and prevents waiting for other data not within the range we
+ * care about here.
+ */
+ if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+ error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+ XBF_ASYNC, FI_NONE);
+ if (error)
+ goto out_unlock;
+ }
+
+ /*
+ * Wait for all I/O to complete.
+ */
+ xfs_ioend_wait(ip);
+
+ error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+ xfs_get_blocks);
+ if (error)
+ goto out_unlock;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+ error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT);
+ if (error)
+ goto out_trans_cancel;
+
+ truncate_setsize(inode, iattr->ia_size);
+
+ commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+ lock_flags |= XFS_ILOCK_EXCL;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin(tp, ip);
+
+ /*
+ * Only change the c/mtime if we are changing the size or we are
+ * explicitly asked to change it. This handles the semantic difference
+ * between truncate() and ftruncate() as implemented in the VFS.
+ *
+ * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+ * special case where we need to update the times despite not having
+ * these flags set. For all other operations the VFS set these flags
+ * explicitly if it wants a timestamp update.
+ */
+ if (iattr->ia_size != ip->i_size &&
+ (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ iattr->ia_ctime = iattr->ia_mtime =
+ current_fs_time(inode->i_sb);
+ mask |= ATTR_CTIME | ATTR_MTIME;
+ }
+
+ if (iattr->ia_size > ip->i_size) {
+ ip->i_d.di_size = iattr->ia_size;
+ ip->i_size = iattr->ia_size;
+ } else if (iattr->ia_size <= ip->i_size ||
+ (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+ error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+ if (error)
+ goto out_trans_abort;
+
+ /*
+ * Truncated "down", so we're removing references to old data
+ * here - if we delay flushing for a long time, we expose
+ * ourselves unduly to the notorious NULL files problem. So,
+ * we mark this inode and flush it when the file is closed,
+ * and do not wait the usual (long) time for writeout.
+ */
+ xfs_iflags_set(ip, XFS_ITRUNCATED);
+ }
+
+ if (mask & ATTR_CTIME) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+ return error;
+
+out_trans_abort:
+ commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+ xfs_trans_cancel(tp, commit_flags);
+ goto out_unlock;
+}
+
STATIC int
xfs_vn_setattr(
struct dentry *dentry,
struct iattr *iattr)
{
- return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
+ if (iattr->ia_valid & ATTR_SIZE)
+ return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+ return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521..d42f814 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -33,7 +33,6 @@
#endif
#include <xfs_types.h>
-#include <xfs_arch.h>
#include <kmem.h>
#include <mrlock.h>
@@ -88,6 +87,12 @@
#include <xfs_buf.h>
#include <xfs_message.h>
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
/*
* Feature macros (disable/enable)
*/
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a1a881e..9a72dda 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -33,7 +33,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
@@ -1025,11 +1024,6 @@
{
struct xfs_mount *mp = XFS_M(sb);
- /*
- * Unregister the memory shrinker before we tear down the mount
- * structure so we don't have memory reclaim racing with us here.
- */
- xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
/*
@@ -1412,36 +1406,31 @@
sb->s_time_gran = 1;
set_posix_acl_flag(sb);
- error = xfs_syncd_init(mp);
+ error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
- xfs_inode_shrinker_register(mp);
-
- error = xfs_mountfs(mp);
+ error = xfs_syncd_init(mp);
if (error)
- goto out_syncd_stop;
+ goto out_unmount;
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = ENOENT;
- goto fail_unmount;
+ goto out_syncd_stop;
}
if (is_bad_inode(root)) {
error = EINVAL;
- goto fail_vnrele;
+ goto out_syncd_stop;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
error = ENOMEM;
- goto fail_vnrele;
+ goto out_iput;
}
return 0;
- out_syncd_stop:
- xfs_inode_shrinker_unregister(mp);
- xfs_syncd_stop(mp);
out_filestream_unmount:
xfs_filestream_unmount(mp);
out_free_sb:
@@ -1456,18 +1445,11 @@
out:
return -error;
- fail_vnrele:
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- } else {
- iput(root);
- }
-
- fail_unmount:
- xfs_inode_shrinker_unregister(mp);
+ out_iput:
+ iput(root);
+ out_syncd_stop:
xfs_syncd_stop(mp);
-
+ out_unmount:
/*
* Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive
@@ -1491,6 +1473,21 @@
return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
+static int
+xfs_fs_nr_cached_objects(
+ struct super_block *sb)
+{
+ return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+ struct super_block *sb,
+ int nr_to_scan)
+{
+ xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
@@ -1504,6 +1501,8 @@
.statfs = xfs_fs_statfs,
.remount_fs = xfs_fs_remount,
.show_options = xfs_fs_show_options,
+ .nr_cached_objects = xfs_fs_nr_cached_objects,
+ .free_cached_objects = xfs_fs_free_cached_objects,
};
static struct file_system_type xfs_fs_type = {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 8ecad5f..e4c938a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -179,6 +179,8 @@
if (error == EFSCORRUPTED)
break;
+ cond_resched();
+
} while (nr_found && !done);
if (skipped) {
@@ -359,14 +361,12 @@
{
int error, error2 = 0;
- /* push non-blocking */
- xfs_sync_data(mp, 0);
xfs_qm_sync(mp, SYNC_TRYLOCK);
-
- /* push and block till complete */
- xfs_sync_data(mp, SYNC_WAIT);
xfs_qm_sync(mp, SYNC_WAIT);
+ /* force out the newly dirtied log buffers */
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp);
@@ -436,7 +436,7 @@
WARN_ON(atomic_read(&mp->m_active_trans) != 0);
/* Push the superblock and write an unmount record */
- error = xfs_log_sbcount(mp, 1);
+ error = xfs_log_sbcount(mp);
if (error)
xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
"Frozen image may not be consistent.");
@@ -986,6 +986,8 @@
*nr_to_scan -= XFS_LOOKUP_BATCH;
+ cond_resched();
+
} while (nr_found && !done && *nr_to_scan > 0);
if (trylock && !done)
@@ -1003,7 +1005,7 @@
* ensure that when we get more reclaimers than AGs we block rather
* than spin trying to execute reclaim.
*/
- if (trylock && skipped && *nr_to_scan > 0) {
+ if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
trylock = 0;
goto restart;
}
@@ -1021,44 +1023,38 @@
}
/*
- * Inode cache shrinker.
+ * Scan a certain number of inodes for reclaim.
*
* When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * progress, while we will throttle the speed of reclaim via doing synchronous
* reclaim of inodes. That means if we come across dirty inodes, we wait for
* them to be cleaned, which we hope will not be very long due to the
* background walker having already kicked the IO off on those dirty inodes.
*/
-static int
-xfs_reclaim_inode_shrink(
- struct shrinker *shrink,
- struct shrink_control *sc)
+void
+xfs_reclaim_inodes_nr(
+ struct xfs_mount *mp,
+ int nr_to_scan)
{
- struct xfs_mount *mp;
- struct xfs_perag *pag;
- xfs_agnumber_t ag;
- int reclaimable;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
+ /* kick background reclaimer and push the AIL */
+ xfs_syncd_queue_reclaim(mp);
+ xfs_ail_push_all(mp->m_ail);
- mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
- if (nr_to_scan) {
- /* kick background reclaimer and push the AIL */
- xfs_syncd_queue_reclaim(mp);
- xfs_ail_push_all(mp->m_ail);
+ xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
- if (!(gfp_mask & __GFP_FS))
- return -1;
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t ag = 0;
+ int reclaimable = 0;
- xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
- &nr_to_scan);
- /* terminate if we don't exhaust the scan */
- if (nr_to_scan > 0)
- return -1;
- }
-
- reclaimable = 0;
- ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable;
@@ -1067,18 +1063,3 @@
return reclaimable;
}
-void
-xfs_inode_shrinker_register(
- struct xfs_mount *mp)
-{
- mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
- mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
- register_shrinker(&mp->m_inode_shrink);
-}
-
-void
-xfs_inode_shrinker_unregister(
- struct xfs_mount *mp)
-{
- unregister_shrinker(&mp->m_inode_shrink);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad2..941202e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,14 +21,6 @@
struct xfs_mount;
struct xfs_perag;
-typedef struct xfs_sync_work {
- struct list_head w_list;
- struct xfs_mount *w_mount;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct xfs_mount *, void *);
- struct completion *w_completion;
-} xfs_sync_work_t;
-
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
@@ -43,6 +35,8 @@
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
@@ -54,7 +48,4 @@
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags);
-void xfs_inode_shrinker_register(struct xfs_mount *mp);
-void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
-
#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d48b7a5..fda0708 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -293,7 +293,7 @@
__entry->buffer_length = bp->b_buffer_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip;
),
@@ -323,7 +323,7 @@
DEFINE_BUF_EVENT(xfs_buf_bdwrite);
DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_cond_lock);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
DEFINE_BUF_EVENT(xfs_buf_unlock);
DEFINE_BUF_EVENT(xfs_buf_iowait);
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
@@ -366,7 +366,7 @@
__entry->flags = flags;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
@@ -409,7 +409,7 @@
__entry->buffer_length = bp->b_buffer_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->error = error;
__entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip;
@@ -454,7 +454,7 @@
__entry->buf_flags = bip->bli_buf->b_flags;
__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
- __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
+ __entry->buf_lockval = bip->bli_buf->b_sema.count;
__entry->li_desc = bip->bli_item.li_desc;
__entry->li_flags = bip->bli_item.li_flags;
),
@@ -998,7 +998,8 @@
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
- __field(loff_t, size)
+ __field(loff_t, isize)
+ __field(loff_t, disize)
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
@@ -1006,16 +1007,18 @@
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
+ __entry->isize = ip->i_size;
+ __entry->disize = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
"offset 0x%llx count %zd",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->size,
+ __entry->isize,
+ __entry->disize,
__entry->new_size,
__entry->offset,
__entry->count)
@@ -1028,40 +1031,7 @@
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-
-
-TRACE_EVENT(xfs_itruncate_start,
- TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
- xfs_off_t toss_start, xfs_off_t toss_finish),
- TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
- __field(xfs_off_t, toss_start)
- __field(xfs_off_t, toss_finish)
- __field(int, flag)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = new_size;
- __entry->toss_start = toss_start;
- __entry->toss_finish = toss_finish;
- __entry->flag = flag;
- ),
- TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
- "toss start 0x%llx toss finish 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
- __entry->size,
- __entry->new_size,
- __entry->toss_start,
- __entry->toss_finish)
-);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
@@ -1089,8 +1059,8 @@
DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
TRACE_EVENT(xfs_pagecache_inval,
TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 6fa2146..837f311 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -220,7 +220,7 @@
{
ASSERT(d->d_id);
-#ifdef QUOTADEBUG
+#ifdef DEBUG
if (d->d_blk_hardlimit)
ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
be64_to_cpu(d->d_blk_hardlimit));
@@ -231,6 +231,7 @@
ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
be64_to_cpu(d->d_rtb_hardlimit));
#endif
+
if (!d->d_btimer) {
if ((d->d_blk_softlimit &&
(be64_to_cpu(d->d_bcount) >=
@@ -318,7 +319,7 @@
ASSERT(tp);
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
@@ -534,7 +535,7 @@
}
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
/*
* calculate the location of the dquot inside the buffer.
@@ -622,7 +623,7 @@
* brelse it because we have the changes incore.
*/
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
xfs_trans_brelse(tp, bp);
return (error);
@@ -1423,45 +1424,6 @@
}
-#ifdef QUOTADEBUG
-void
-xfs_qm_dqprint(xfs_dquot_t *dqp)
-{
- struct xfs_mount *mp = dqp->q_mount;
-
- xfs_debug(mp, "-----------KERNEL DQUOT----------------");
- xfs_debug(mp, "---- dquotID = %d",
- (int)be32_to_cpu(dqp->q_core.d_id));
- xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
- xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
- xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
- xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
- xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
- xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
- xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
- xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
- xfs_debug(mp, "---- bcount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_bcount),
- (int)be64_to_cpu(dqp->q_core.d_bcount));
- xfs_debug(mp, "---- icount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_icount),
- (int)be64_to_cpu(dqp->q_core.d_icount));
- xfs_debug(mp, "---- btimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_btimer));
- xfs_debug(mp, "---- itimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_itimer));
- xfs_debug(mp, "---------------------------");
-}
-#endif
-
/*
* Give the buffer a little push if it is incore and
* wait on the flush lock.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5da3a23..34b7e94 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -116,12 +116,6 @@
(XFS_IS_UQUOTA_ON((d)->q_mount)) : \
(XFS_IS_OQUOTA_ON((d)->q_mount))))
-#ifdef QUOTADEBUG
-extern void xfs_qm_dqprint(xfs_dquot_t *);
-#else
-#define xfs_qm_dqprint(a)
-#endif
-
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
extern int xfs_qm_dqpurge(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index b94dace..46e54ad 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -67,32 +67,6 @@
.seeks = DEFAULT_SEEKS,
};
-#ifdef DEBUG
-extern struct mutex qcheck_lock;
-#endif
-
-#ifdef QUOTADEBUG
-static void
-xfs_qm_dquot_list_print(
- struct xfs_mount *mp)
-{
- xfs_dquot_t *dqp;
- int i = 0;
-
- list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
- xfs_debug(mp, " %d. \"%d (%s)\" "
- "bcnt = %lld, icnt = %lld, refs = %d",
- i++, be32_to_cpu(dqp->q_core.d_id),
- DQFLAGTO_TYPESTR(dqp),
- (long long)be64_to_cpu(dqp->q_core.d_bcount),
- (long long)be64_to_cpu(dqp->q_core.d_icount),
- dqp->q_nrefs);
- }
-}
-#else
-static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
-#endif
-
/*
* Initialize the XQM structure.
* Note that there is not one quota manager per file system.
@@ -165,9 +139,6 @@
atomic_set(&xqm->qm_totaldquots, 0);
xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
xqm->qm_nrefs = 0;
-#ifdef DEBUG
- mutex_init(&qcheck_lock);
-#endif
return xqm;
out_free_udqhash:
@@ -204,9 +175,6 @@
mutex_lock(&xqm->qm_dqfrlist_lock);
list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
xfs_dqlock(dqp);
-#ifdef QUOTADEBUG
- xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
-#endif
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
xfs_dqunlock(dqp);
@@ -214,9 +182,6 @@
}
mutex_unlock(&xqm->qm_dqfrlist_lock);
mutex_destroy(&xqm->qm_dqfrlist_lock);
-#ifdef DEBUG
- mutex_destroy(&qcheck_lock);
-#endif
kmem_free(xqm);
}
@@ -409,11 +374,6 @@
xfs_warn(mp, "Failed to initialize disk quotas.");
return;
}
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp))
- xfs_qm_internalqcheck(mp);
-#endif
}
/*
@@ -866,8 +826,8 @@
}
done:
-#ifdef QUOTADEBUG
- if (! error) {
+#ifdef DEBUG
+ if (!error) {
if (XFS_IS_UQUOTA_ON(mp))
ASSERT(ip->i_udquot);
if (XFS_IS_OQUOTA_ON(mp))
@@ -1733,8 +1693,6 @@
mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
mp->m_qflags |= flags;
- xfs_qm_dquot_list_print(mp);
-
error_return:
if (error) {
xfs_warn(mp,
@@ -2096,9 +2054,6 @@
xfs_trans_t *tp;
int error;
-#ifdef QUOTADEBUG
- xfs_notice(mp, "Writing superblock quota changes");
-#endif
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
if ((error = xfs_trans_reserve(tp, 0,
mp->m_sb.sb_sectsize + 128, 0,
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 567b29b..43b9abe 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -163,10 +163,4 @@
extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-#ifdef DEBUG
-extern int xfs_qm_internalqcheck(xfs_mount_t *);
-#else
-#define xfs_qm_internalqcheck(mp) (0)
-#endif
-
#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2dadb15..609246f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -263,7 +263,7 @@
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip);
- error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
+ error = xfs_itruncate_data(&tp, ip, 0);
if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT);
@@ -622,7 +622,6 @@
xfs_trans_log_dquot(tp, dqp);
error = xfs_trans_commit(tp, 0);
- xfs_qm_dqprint(dqp);
xfs_qm_dqrele(dqp);
out_unlock:
@@ -657,7 +656,6 @@
xfs_qm_dqput(dqp);
return XFS_ERROR(ENOENT);
}
- /* xfs_qm_dqprint(dqp); */
/*
* Convert the disk dquot to the exportable format
*/
@@ -906,354 +904,3 @@
ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
}
-
-/*------------------------------------------------------------------------*/
-#ifdef DEBUG
-/*
- * This contains all the test functions for XFS disk quotas.
- * Currently it does a quota accounting check. ie. it walks through
- * all inodes in the file system, calculating the dquot accounting fields,
- * and prints out any inconsistencies.
- */
-xfs_dqhash_t *qmtest_udqtab;
-xfs_dqhash_t *qmtest_gdqtab;
-int qmtest_hashmask;
-int qmtest_nfails;
-struct mutex qcheck_lock;
-
-#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
- (__psunsigned_t)(id)) & \
- (qmtest_hashmask - 1))
-
-#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \
- (qmtest_udqtab + \
- DQTEST_HASHVAL(mp, id)) : \
- (qmtest_gdqtab + \
- DQTEST_HASHVAL(mp, id)))
-
-#define DQTEST_LIST_PRINT(l, NXT, title) \
-{ \
- xfs_dqtest_t *dqp; int i = 0;\
- xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
- for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
- dqp = (xfs_dqtest_t *)dqp->NXT) { \
- xfs_debug(dqp->q_mount, \
- " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
- ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
- dqp->d_bcount, dqp->d_icount); } \
-}
-
-typedef struct dqtest {
- uint dq_flags; /* various flags (XFS_DQ_*) */
- struct list_head q_hashlist;
- xfs_dqhash_t *q_hash; /* the hashchain header */
- xfs_mount_t *q_mount; /* filesystem this relates to */
- xfs_dqid_t d_id; /* user id or group id */
- xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */
- xfs_qcnt_t d_icount; /* # inodes owned by the user */
-} xfs_dqtest_t;
-
-STATIC void
-xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
-{
- list_add(&dqp->q_hashlist, &h->qh_list);
- h->qh_version++;
- h->qh_nelems++;
-}
-STATIC void
-xfs_qm_dqtest_print(
- struct xfs_mount *mp,
- struct dqtest *d)
-{
- xfs_debug(mp, "-----------DQTEST DQUOT----------------");
- xfs_debug(mp, "---- dquot ID = %d", d->d_id);
- xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
- xfs_debug(mp, "---- bcount = %Lu (0x%x)",
- d->d_bcount, (int)d->d_bcount);
- xfs_debug(mp, "---- icount = %Lu (0x%x)",
- d->d_icount, (int)d->d_icount);
- xfs_debug(mp, "---------------------------");
-}
-
-STATIC void
-xfs_qm_dqtest_failed(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp,
- char *reason,
- xfs_qcnt_t a,
- xfs_qcnt_t b,
- int error)
-{
- qmtest_nfails++;
- if (error)
- xfs_debug(dqp->q_mount,
- "quotacheck failed id=%d, err=%d\nreason: %s",
- d->d_id, error, reason);
- else
- xfs_debug(dqp->q_mount,
- "quotacheck failed id=%d (%s) [%d != %d]",
- d->d_id, reason, (int)a, (int)b);
- xfs_qm_dqtest_print(dqp->q_mount, d);
- if (dqp)
- xfs_qm_dqprint(dqp);
-}
-
-STATIC int
-xfs_dqtest_cmp2(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp)
-{
- int err = 0;
- if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
- xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
- be64_to_cpu(dqp->q_core.d_icount),
- d->d_icount, 0);
- err++;
- }
- if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
- xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
- be64_to_cpu(dqp->q_core.d_bcount),
- d->d_bcount, 0);
- err++;
- }
- if (dqp->q_core.d_blk_softlimit &&
- be64_to_cpu(dqp->q_core.d_bcount) >=
- be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
- if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
- xfs_debug(dqp->q_mount,
- "%d [%s] BLK TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d));
- err++;
- }
- }
- if (dqp->q_core.d_ino_softlimit &&
- be64_to_cpu(dqp->q_core.d_icount) >=
- be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
- if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
- xfs_debug(dqp->q_mount,
- "%d [%s] INO TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d));
- err++;
- }
- }
-#ifdef QUOTADEBUG
- if (!err) {
- xfs_debug(dqp->q_mount, "%d [%s] qchecked",
- d->d_id, DQFLAGTO_TYPESTR(d));
- }
-#endif
- return (err);
-}
-
-STATIC void
-xfs_dqtest_cmp(
- xfs_dqtest_t *d)
-{
- xfs_dquot_t *dqp;
- int error;
-
- /* xfs_qm_dqtest_print(d); */
- if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
- &dqp))) {
- xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
- return;
- }
- xfs_dqtest_cmp2(d, dqp);
- xfs_qm_dqput(dqp);
-}
-
-STATIC int
-xfs_qm_internalqcheck_dqget(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- xfs_dqtest_t **O_dq)
-{
- xfs_dqtest_t *d;
- xfs_dqhash_t *h;
-
- h = DQTEST_HASH(mp, id, type);
- list_for_each_entry(d, &h->qh_list, q_hashlist) {
- if (d->d_id == id && mp == d->q_mount) {
- *O_dq = d;
- return (0);
- }
- }
- d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
- d->dq_flags = type;
- d->d_id = id;
- d->q_mount = mp;
- d->q_hash = h;
- INIT_LIST_HEAD(&d->q_hashlist);
- xfs_qm_hashinsert(h, d);
- *O_dq = d;
- return (0);
-}
-
-STATIC void
-xfs_qm_internalqcheck_get_dquots(
- xfs_mount_t *mp,
- xfs_dqid_t uid,
- xfs_dqid_t projid,
- xfs_dqid_t gid,
- xfs_dqtest_t **ud,
- xfs_dqtest_t **gd)
-{
- if (XFS_IS_UQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
- if (XFS_IS_GQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
- else if (XFS_IS_PQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
-}
-
-
-STATIC void
-xfs_qm_internalqcheck_dqadjust(
- xfs_inode_t *ip,
- xfs_dqtest_t *d)
-{
- d->d_icount++;
- d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
-}
-
-STATIC int
-xfs_qm_internalqcheck_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- int *ubused, /* not used */
- int *res) /* bulkstat result code */
-{
- xfs_inode_t *ip;
- xfs_dqtest_t *ud, *gd;
- uint lock_flags;
- boolean_t ipreleased;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
- *res = BULKSTAT_RV_NOTHING;
- xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
- __func__, (unsigned long long) ino,
- (unsigned long long) mp->m_sb.sb_uquotino,
- (unsigned long long) mp->m_sb.sb_gquotino);
- return XFS_ERROR(EINVAL);
- }
- ipreleased = B_FALSE;
- again:
- lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
- *res = BULKSTAT_RV_NOTHING;
- return (error);
- }
-
- /*
- * This inode can have blocks after eof which can get released
- * when we send it to inactive. Since we don't check the dquot
- * until the after all our calculations are done, we must get rid
- * of those now.
- */
- if (! ipreleased) {
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
- ipreleased = B_TRUE;
- goto again;
- }
- xfs_qm_internalqcheck_get_dquots(mp,
- (xfs_dqid_t) ip->i_d.di_uid,
- (xfs_dqid_t) xfs_get_projid(ip),
- (xfs_dqid_t) ip->i_d.di_gid,
- &ud, &gd);
- if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(ud);
- xfs_qm_internalqcheck_dqadjust(ip, ud);
- }
- if (XFS_IS_OQUOTA_ON(mp)) {
- ASSERT(gd);
- xfs_qm_internalqcheck_dqadjust(ip, gd);
- }
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
- *res = BULKSTAT_RV_DIDONE;
- return (0);
-}
-
-
-/* PRIVATE, debugging */
-int
-xfs_qm_internalqcheck(
- xfs_mount_t *mp)
-{
- xfs_ino_t lastino;
- int done, count;
- int i;
- int error;
-
- lastino = 0;
- qmtest_hashmask = 32;
- count = 5;
- done = 0;
- qmtest_nfails = 0;
-
- if (! XFS_IS_QUOTA_ON(mp))
- return XFS_ERROR(ESRCH);
-
- xfs_log_force(mp, XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
- xfs_log_force(mp, XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
-
- mutex_lock(&qcheck_lock);
- /* There should be absolutely no quota activity while this
- is going on. */
- qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters
- */
- error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust,
- 0, NULL, &done);
- if (error) {
- xfs_debug(mp, "Bulkstat returned error 0x%x", error);
- break;
- }
- } while (!done);
-
- xfs_debug(mp, "Checking results against system dquots");
- for (i = 0; i < qmtest_hashmask; i++) {
- xfs_dqtest_t *d, *n;
- xfs_dqhash_t *h;
-
- h = &qmtest_udqtab[i];
- list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
- xfs_dqtest_cmp(d);
- kmem_free(d);
- }
- h = &qmtest_gdqtab[i];
- list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
- xfs_dqtest_cmp(d);
- kmem_free(d);
- }
- }
-
- if (qmtest_nfails) {
- xfs_debug(mp, "******** quotacheck failed ********");
- xfs_debug(mp, "failures = %d", qmtest_nfails);
- } else {
- xfs_debug(mp, "******** quotacheck successful! ********");
- }
- kmem_free(qmtest_udqtab);
- kmem_free(qmtest_gdqtab);
- mutex_unlock(&qcheck_lock);
- return (qmtest_nfails);
-}
-
-#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 2a36487..4d00ee6 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,7 +59,7 @@
xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
/*
- * Initialize i_transp so we can later determine if this dquot is
+ * Initialize d_transp so we can later determine if this dquot is
* associated with this transaction.
*/
dqp->q_transp = tp;
@@ -387,18 +387,18 @@
qtrx->qt_delbcnt_delta;
totalrtbdelta = qtrx->qt_rtbcount_delta +
qtrx->qt_delrtb_delta;
-#ifdef QUOTADEBUG
+#ifdef DEBUG
if (totalbdelta < 0)
ASSERT(be64_to_cpu(d->d_bcount) >=
- (xfs_qcnt_t) -totalbdelta);
+ -totalbdelta);
if (totalrtbdelta < 0)
ASSERT(be64_to_cpu(d->d_rtbcount) >=
- (xfs_qcnt_t) -totalrtbdelta);
+ -totalrtbdelta);
if (qtrx->qt_icount_delta < 0)
ASSERT(be64_to_cpu(d->d_icount) >=
- (xfs_qcnt_t) -qtrx->qt_icount_delta);
+ -qtrx->qt_icount_delta);
#endif
if (totalbdelta)
be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
@@ -642,11 +642,6 @@
((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
(XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
(XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-#ifdef QUOTADEBUG
- xfs_debug(mp,
- "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
- nblks, *resbcountp, hardlimit);
-#endif
if (nblks > 0) {
/*
* dquot is locked already. See if we'd go over the
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 5ad8ad3..53ec3ea 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,6 @@
#define STATIC
#define DEBUG 1
#define XFS_BUF_LOCK_TRACKING 1
-/* #define QUOTADEBUG 1 */
#endif
#include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 11dd720..0135e2a 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
+extern int xfs_check_acl(struct inode *inode, int mask);
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 95862bb..1e00b3e 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -570,9 +570,7 @@
xfs_agblock_t tbno; /* start block of trimmed extent */
xfs_extlen_t tlen; /* length of trimmed extent */
xfs_agblock_t tend; /* end block of trimmed extent */
- xfs_agblock_t end; /* end of allocated extent */
int i; /* success/failure of operation */
- xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
@@ -625,18 +623,16 @@
*
* Fix the length according to mod and prod if given.
*/
- end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
- args->len = end - args->agbno;
+ args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+ - args->agbno;
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args))
goto not_found;
- rlen = args->len;
- ASSERT(args->agbno + rlen <= tend);
- end = args->agbno + rlen;
+ ASSERT(args->agbno + args->len <= tend);
/*
- * We are allocating agbno for rlen [agbno .. end]
+ * We are allocating agbno for args->len
* Allocate/initialize a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -2127,7 +2123,7 @@
* Validate the magic number of the agf block.
*/
agf_ok =
- be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
+ agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 2b35188..ffb3386 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -31,7 +31,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -311,72 +310,6 @@
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_allocbt_trace_buf;
-
-STATIC void
-xfs_allocbt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
- (void *)func, (void *)s, NULL, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_allocbt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *s0 = cur->bc_private.a.agno;
- *l0 = cur->bc_rec.a.ar_startblock;
- *l1 = cur->bc_rec.a.ar_blockcount;
-}
-
-STATIC void
-xfs_allocbt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be32_to_cpu(key->alloc.ar_startblock);
- *l1 = be32_to_cpu(key->alloc.ar_blockcount);
-}
-
-STATIC void
-xfs_allocbt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- *l0 = be32_to_cpu(rec->alloc.ar_startblock);
- *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
- *l2 = 0;
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_allocbt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -393,18 +326,10 @@
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
.key_diff = xfs_allocbt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_allocbt_keys_inorder,
.recs_inorder = xfs_allocbt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_allocbt_trace_enter,
- .trace_cursor = xfs_allocbt_trace_cursor,
- .trace_key = xfs_allocbt_trace_key,
- .trace_record = xfs_allocbt_trace_record,
-#endif
};
/*
@@ -427,13 +352,16 @@
cur->bc_tp = tp;
cur->bc_mp = mp;
- cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
-
cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_CNT)
+
+ if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+ } else {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+ }
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
deleted file mode 100644
index 0902249..0000000
--- a/fs/xfs/xfs_arch.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_ARCH_H__
-#define __XFS_ARCH_H__
-
-#ifndef XFS_BIG_INUMS
-# error XFS_BIG_INUMS must be defined true or false
-#endif
-
-#ifdef __KERNEL__
-
-#include <asm/byteorder.h>
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-#else /* __KERNEL__ */
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-#ifdef XFS_NATIVE_HOST
-#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
-#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
-#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
-#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
-#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
-#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
-#else
-#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
-#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
-#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
-#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
-#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
-#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
-#endif
-
-static inline void be16_add_cpu(__be16 *a, __s16 b)
-{
- *a = cpu_to_be16(be16_to_cpu(*a) + b);
-}
-
-static inline void be32_add_cpu(__be32 *a, __s32 b)
-{
- *a = cpu_to_be32(be32_to_cpu(*a) + b);
-}
-
-static inline void be64_add_cpu(__be64 *a, __s64 b)
-{
- *a = cpu_to_be64(be64_to_cpu(*a) + b);
-}
-
-#endif /* __KERNEL__ */
-
-/*
- * get and set integers from potentially unaligned locations
- */
-
-#define INT_GET_UNALIGNED_16_BE(pointer) \
- ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
-#define INT_SET_UNALIGNED_16_BE(pointer,value) \
- { \
- ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
- ((__u8*)(pointer))[1] = (((value) ) & 0xff); \
- }
-
-/*
- * In directories inode numbers are stored as unaligned arrays of unsigned
- * 8bit integers on disk.
- *
- * For v1 directories or v2 directories that contain inode numbers that
- * do not fit into 32bit the array has eight members, but the first member
- * is always zero:
- *
- * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7|
- *
- * For v2 directories that only contain entries with inode numbers that fit
- * into 32bits a four-member array is used:
- *
- * |24-31|16-23| 8-15| 0- 7|
- */
-
-#define XFS_GET_DIR_INO4(di) \
- (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-
-#define XFS_PUT_DIR_INO4(from, di) \
-do { \
- (di).i[0] = (((from) & 0xff000000ULL) >> 24); \
- (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \
- (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \
- (di).i[3] = ((from) & 0x000000ffULL); \
-} while (0)
-
-#define XFS_DI_HI(di) \
- (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-#define XFS_DI_LO(di) \
- (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
-
-#define XFS_GET_DIR_INO8(di) \
- (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
- ((xfs_ino_t)XFS_DI_HI(di) << 32))
-
-#define XFS_PUT_DIR_INO8(from, di) \
-do { \
- (di).i[0] = 0; \
- (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \
- (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \
- (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \
- (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \
- (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \
- (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \
- (di).i[7] = ((from) & 0x00000000000000ffULL); \
-} while (0)
-
-#endif /* __XFS_ARCH_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01d2072..cbae424 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -822,17 +822,21 @@
error = xfs_attr_root_inactive(&trans, dp);
if (error)
goto out;
+
/*
- * signal synchronous inactive transactions unless this
- * is a synchronous mount filesystem in which case we
- * know that we're here because we've been called out of
- * xfs_inactive which means that the last reference is gone
- * and the unlink transaction has already hit the disk so
- * async inactive transactions are safe.
+ * Signal synchronous inactive transactions unless this is a
+ * synchronous mount filesystem in which case we know that we're here
+ * because we've been called out of xfs_inactive which means that the
+ * last reference is gone and the unlink transaction has already hit
+ * the disk so async inactive transactions are safe.
*/
- if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
- (!(mp->m_flags & XFS_MOUNT_WSYNC)
- ? 1 : 0))))
+ if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
+ if (dp->i_d.di_anextents > 0)
+ xfs_trans_set_sync(trans);
+ }
+
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+ if (error)
goto out;
/*
@@ -1199,7 +1203,7 @@
return XFS_ERROR(error);
ASSERT(bp != NULL);
leaf = bp->data;
- if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
+ if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
xfs_da_brelse(NULL, bp);
@@ -1606,9 +1610,8 @@
XFS_ATTR_FORK);
if (error)
goto out;
- ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
- bp->data)->hdr.info.magic)
- == XFS_ATTR_LEAF_MAGIC);
+ ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_bmap_init(args->flist, args->firstblock);
@@ -1873,11 +1876,11 @@
return(XFS_ERROR(EFSCORRUPTED));
}
node = bp->data;
- if (be16_to_cpu(node->hdr.info.magic)
- == XFS_ATTR_LEAF_MAGIC)
+ if (node->hdr.info.magic ==
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
break;
- if (unlikely(be16_to_cpu(node->hdr.info.magic)
- != XFS_DA_NODE_MAGIC)) {
+ if (unlikely(node->hdr.info.magic !=
+ cpu_to_be16(XFS_DA_NODE_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
@@ -1912,8 +1915,8 @@
*/
for (;;) {
leaf = bp->data;
- if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
- != XFS_ATTR_LEAF_MAGIC)) {
+ if (unlikely(leaf->hdr.info.magic !=
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 71e90dc2..8fad960 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -731,7 +731,7 @@
int bytes, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
entry = &leaf->entries[0];
bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -777,7 +777,7 @@
ASSERT(bp != NULL);
memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
/*
@@ -872,7 +872,7 @@
goto out;
node = bp1->data;
leaf = bp2->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/* both on-disk, don't endian-flip twice */
node->btree[0].hashval =
leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
@@ -997,7 +997,7 @@
int tablesize, entsize, sum, tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT((args->index >= 0)
&& (args->index <= be16_to_cpu(leaf->hdr.count)));
hdr = &leaf->hdr;
@@ -1070,7 +1070,7 @@
int tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -1256,8 +1256,8 @@
ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
leaf1 = blk1->bp->data;
leaf2 = blk2->bp->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
args = state->args;
/*
@@ -1533,7 +1533,7 @@
*/
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
leaf = (xfs_attr_leafblock_t *)info;
count = be16_to_cpu(leaf->hdr.count);
bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1596,7 +1596,7 @@
bytes = state->blocksize - (state->blocksize>>2);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
count += be16_to_cpu(leaf->hdr.count);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1650,7 +1650,7 @@
xfs_mount_t *mp;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
mp = args->trans->t_mountp;
ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1813,8 +1813,8 @@
ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
drop_leaf = drop_blk->bp->data;
save_leaf = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
drop_hdr = &drop_leaf->hdr;
save_hdr = &save_leaf->hdr;
@@ -1915,7 +1915,7 @@
xfs_dahash_t hashval;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2019,7 +2019,7 @@
xfs_attr_leaf_name_remote_t *name_rmt;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -2087,8 +2087,8 @@
/*
* Set up environment.
*/
- ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr_s = &leaf_s->hdr;
hdr_d = &leaf_d->hdr;
ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2222,8 +2222,8 @@
leaf1 = leaf1_bp->data;
leaf2 = leaf2_bp->data;
- ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
- (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
+ ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
+ (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
(be16_to_cpu(leaf2->hdr.count) > 0) &&
((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2246,7 +2246,7 @@
xfs_attr_leafblock_t *leaf;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
if (!leaf->hdr.count)
@@ -2265,7 +2265,7 @@
xfs_attr_leaf_name_remote_t *name_rmt;
int size;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr_leaf_name_local(leaf, index);
size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2451,7 +2451,7 @@
ASSERT(bp != NULL);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
@@ -2515,7 +2515,7 @@
ASSERT(bp != NULL);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
@@ -2585,13 +2585,13 @@
}
leaf1 = bp1->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
ASSERT(args->index >= 0);
entry1 = &leaf1->entries[ args->index ];
leaf2 = bp2->data;
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
ASSERT(args->index2 >= 0);
entry2 = &leaf2->entries[ args->index2 ];
@@ -2689,9 +2689,9 @@
* This is a depth-first traversal!
*/
info = bp->data;
- if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+ if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp, bp, 1);
- } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+ } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
error = xfs_attr_leaf_inactive(trans, dp, bp);
} else {
error = XFS_ERROR(EIO);
@@ -2739,7 +2739,7 @@
}
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
parent_blkno = xfs_da_blkno(bp); /* save for re-read later */
count = be16_to_cpu(node->hdr.count);
if (!count) {
@@ -2773,10 +2773,10 @@
* Invalidate the subtree, however we have to.
*/
info = child_bp->data;
- if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+ if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp,
child_bp, level+1);
- } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+ } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
error = xfs_attr_leaf_inactive(trans, dp,
child_bp);
} else {
@@ -2836,7 +2836,7 @@
int error, count, size, tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/*
* Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e546a33..c51a3f9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -29,15 +29,11 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_mount.h"
#include "xfs_itable.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
#include "xfs_inode_item.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
@@ -94,6 +90,7 @@
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real(
+ struct xfs_trans *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -439,6 +436,7 @@
*/
STATIC int /* error */
xfs_bmap_add_extent(
+ struct xfs_trans *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -524,7 +522,7 @@
if (cur)
ASSERT(cur->bc_private.b.flags &
XFS_BTCUR_BPRV_WASDEL);
- error = xfs_bmap_add_extent_delay_real(ip,
+ error = xfs_bmap_add_extent_delay_real(tp, ip,
idx, &cur, new, &da_new,
first, flist, &logflags);
} else {
@@ -561,7 +559,7 @@
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
+ error = xfs_bmap_extents_to_btree(tp, ip, first,
flist, &cur, da_old > 0, &tmp_logflags, whichfork);
logflags |= tmp_logflags;
if (error)
@@ -604,6 +602,7 @@
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real(
+ struct xfs_trans *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -901,7 +900,7 @@
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+ error = xfs_bmap_extents_to_btree(tp, ip,
first, flist, &cur, 1, &tmp_rval,
XFS_DATA_FORK);
rval |= tmp_rval;
@@ -984,7 +983,7 @@
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+ error = xfs_bmap_extents_to_btree(tp, ip,
first, flist, &cur, 1, &tmp_rval,
XFS_DATA_FORK);
rval |= tmp_rval;
@@ -1052,7 +1051,7 @@
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+ error = xfs_bmap_extents_to_btree(tp, ip,
first, flist, &cur, 1, &tmp_rval,
XFS_DATA_FORK);
rval |= tmp_rval;
@@ -2871,8 +2870,8 @@
len = del->br_blockcount;
do_div(bno, mp->m_sb.sb_rextsize);
do_div(len, mp->m_sb.sb_rextsize);
- if ((error = xfs_rtfree_extent(ip->i_transp, bno,
- (xfs_extlen_t)len)))
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
goto done;
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
@@ -4080,7 +4079,7 @@
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+ if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
be16_to_cpu(block->bb_level) != level ||
be16_to_cpu(block->bb_numrecs) == 0 ||
be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -4662,7 +4661,7 @@
if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
got.br_state = XFS_EXT_UNWRITTEN;
}
- error = xfs_bmap_add_extent(ip, &lastx, &cur, &got,
+ error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got,
firstblock, flist, &tmp_logflags,
whichfork);
logflags |= tmp_logflags;
@@ -4763,7 +4762,7 @@
mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
? XFS_EXT_NORM
: XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur, mval,
+ error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval,
firstblock, flist, &tmp_logflags,
whichfork);
logflags |= tmp_logflags;
@@ -5117,7 +5116,7 @@
del.br_blockcount = mod;
}
del.br_state = XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur, &del,
+ error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del,
firstblock, flist, &logflags,
XFS_DATA_FORK);
if (error)
@@ -5175,18 +5174,18 @@
}
prev.br_state = XFS_EXT_UNWRITTEN;
lastx--;
- error = xfs_bmap_add_extent(ip, &lastx, &cur,
- &prev, firstblock, flist, &logflags,
- XFS_DATA_FORK);
+ error = xfs_bmap_add_extent(tp, ip, &lastx,
+ &cur, &prev, firstblock, flist,
+ &logflags, XFS_DATA_FORK);
if (error)
goto error0;
goto nodelete;
} else {
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur,
- &del, firstblock, flist, &logflags,
- XFS_DATA_FORK);
+ error = xfs_bmap_add_extent(tp, ip, &lastx,
+ &cur, &del, firstblock, flist,
+ &logflags, XFS_DATA_FORK);
if (error)
goto error0;
goto nodelete;
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 87d3c10..e2f5d59 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -33,7 +33,6 @@
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
@@ -425,10 +424,10 @@
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
- ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
- ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+ ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
+ ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_level != 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -732,95 +731,6 @@
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_bmbt_trace_buf;
-
-STATIC void
-xfs_bmbt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- struct xfs_inode *ip = cur->bc_private.b.ip;
- int whichfork = cur->bc_private.b.whichfork;
-
- ktrace_enter(xfs_bmbt_trace_buf,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_bmbt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- struct xfs_bmbt_rec_host r;
-
- xfs_bmbt_set_all(&r, &cur->bc_rec.b);
-
- *s0 = (cur->bc_nlevels << 24) |
- (cur->bc_private.b.flags << 16) |
- cur->bc_private.b.allocated;
- *l0 = r.l0;
- *l1 = r.l1;
-}
-
-STATIC void
-xfs_bmbt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be64_to_cpu(key->bmbt.br_startoff);
- *l1 = 0;
-}
-
-/* Endian flipping versions of the bmbt extraction functions */
-STATIC void
-xfs_bmbt_disk_get_all(
- xfs_bmbt_rec_t *r,
- xfs_bmbt_irec_t *s)
-{
- __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
- get_unaligned_be64(&r->l1), s);
-}
-
-STATIC void
-xfs_bmbt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- struct xfs_bmbt_irec irec;
-
- xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
- *l0 = irec.br_startoff;
- *l1 = irec.br_startblock;
- *l2 = irec.br_blockcount;
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
.key_len = sizeof(xfs_bmbt_key_t),
@@ -837,18 +747,10 @@
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_bmbt_trace_enter,
- .trace_cursor = xfs_bmbt_trace_cursor,
- .trace_key = xfs_bmbt_trace_key,
- .trace_record = xfs_bmbt_trace_record,
-#endif
};
/*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 2f9e97c..cabf4b5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -32,7 +32,6 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -66,11 +65,11 @@
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
- (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+ (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
- (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+ (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_rightsib)));
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -105,10 +104,10 @@
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
- (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+ (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
block->bb_u.s.bb_leftsib &&
- (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+ (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -511,9 +510,9 @@
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
else
- return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
+ return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
/*
@@ -777,14 +776,14 @@
b = XFS_BUF_TO_BLOCK(bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
+ if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
+ if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
} else {
- if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
+ if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
+ if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
}
}
@@ -795,9 +794,9 @@
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return be64_to_cpu(ptr->l) == NULLDFSBNO;
+ return ptr->l == cpu_to_be64(NULLDFSBNO);
else
- return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+ return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
STATIC void
@@ -923,12 +922,12 @@
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
+ ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
} else {
ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
- ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+ ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
be32_to_cpu(ptr->s));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 82fafc6..8d05a6a 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -199,25 +199,6 @@
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
#endif
-
- /* btree tracing */
-#ifdef XFS_BTREE_TRACE
- void (*trace_enter)(struct xfs_btree_cur *, const char *,
- char *, int, int, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t);
- void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
- __uint64_t *, __uint64_t *);
- void (*trace_key)(struct xfs_btree_cur *,
- union xfs_btree_key *, __uint64_t *,
- __uint64_t *);
- void (*trace_record)(struct xfs_btree_cur *,
- union xfs_btree_rec *, __uint64_t *,
- __uint64_t *, __uint64_t *);
-#endif
};
/*
@@ -452,4 +433,23 @@
(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+/*
+ * Trace hooks. Currently not implemented as they need to be ported
+ * over to the generic tracing functionality, which is some effort.
+ *
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+#define XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define XFS_BTREE_TRACE_ARGI(c, i)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define XFS_BTREE_TRACE_CURSOR(c, t)
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
deleted file mode 100644
index 44ff942..0000000
--- a/fs/xfs/xfs_btree_trace.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
-
-STATIC void
-xfs_btree_trace_ptr(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr ptr,
- __psunsigned_t *high,
- __psunsigned_t *low)
-{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- __u64 val = be64_to_cpu(ptr.l);
- *high = val >> 32;
- *low = (int)val;
- } else {
- *high = 0;
- *low = be32_to_cpu(ptr.s);
- }
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
- */
-void
-xfs_btree_trace_argbi(
- const char *func,
- struct xfs_btree_cur *cur,
- struct xfs_buf *b,
- int i,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
- line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
- */
-void
-xfs_btree_trace_argbii(
- const char *func,
- struct xfs_btree_cur *cur,
- struct xfs_buf *b,
- int i0,
- int i1,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
- line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for 3 block-length args
- * and an integer arg.
- */
-void
-xfs_btree_trace_argfffi(
- const char *func,
- struct xfs_btree_cur *cur,
- xfs_dfiloff_t o,
- xfs_dfsbno_t b,
- xfs_dfilblks_t i,
- int j,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
- line,
- o >> 32, (int)o,
- b >> 32, (int)b,
- i >> 32, (int)i,
- (int)j, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for one integer arg.
- */
-void
-xfs_btree_trace_argi(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
- line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, key.
- */
-void
-xfs_btree_trace_argipk(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_ptr ptr,
- union xfs_btree_key *key,
- int line)
-{
- __psunsigned_t high, low;
- __uint64_t l0, l1;
-
- xfs_btree_trace_ptr(cur, ptr, &high, &low);
- cur->bc_ops->trace_key(cur, key, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
- line, i, high, low,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, rec.
- */
-void
-xfs_btree_trace_argipr(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_ptr ptr,
- union xfs_btree_rec *rec,
- int line)
-{
- __psunsigned_t high, low;
- __uint64_t l0, l1, l2;
-
- xfs_btree_trace_ptr(cur, ptr, &high, &low);
- cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
- line, i,
- high, low,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- l2 >> 32, (int)l2,
- 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, key.
- */
-void
-xfs_btree_trace_argik(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_key *key,
- int line)
-{
- __uint64_t l0, l1;
-
- cur->bc_ops->trace_key(cur, key, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
- line, i,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- 0, 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for record.
- */
-void
-xfs_btree_trace_argr(
- const char *func,
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- int line)
-{
- __uint64_t l0, l1, l2;
-
- cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
- line,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- l2 >> 32, (int)l2,
- 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for the cursor/operation.
- */
-void
-xfs_btree_trace_cursor(
- const char *func,
- struct xfs_btree_cur *cur,
- int type,
- int line)
-{
- __uint32_t s0;
- __uint64_t l0, l1;
- char *s;
-
- switch (type) {
- case XBT_ARGS:
- s = "args";
- break;
- case XBT_ENTRY:
- s = "entry";
- break;
- case XBT_ERROR:
- s = "error";
- break;
- case XBT_EXIT:
- s = "exit";
- break;
- default:
- s = "unknown";
- break;
- }
-
- cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
- s0,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- (__psunsigned_t)cur->bc_bufs[0],
- (__psunsigned_t)cur->bc_bufs[1],
- (__psunsigned_t)cur->bc_bufs[2],
- (__psunsigned_t)cur->bc_bufs[3],
- (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
- (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
-}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
deleted file mode 100644
index 2d8a309..0000000
--- a/fs/xfs/xfs_btree_trace.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_BTREE_TRACE_H__
-#define __XFS_BTREE_TRACE_H__
-
-struct xfs_btree_cur;
-struct xfs_buf;
-
-
-/*
- * Trace hooks.
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-
-#ifdef XFS_BTREE_TRACE
-
-/*
- * Trace buffer entry types.
- */
-#define XFS_BTREE_KTRACE_ARGBI 1
-#define XFS_BTREE_KTRACE_ARGBII 2
-#define XFS_BTREE_KTRACE_ARGFFFI 3
-#define XFS_BTREE_KTRACE_ARGI 4
-#define XFS_BTREE_KTRACE_ARGIPK 5
-#define XFS_BTREE_KTRACE_ARGIPR 6
-#define XFS_BTREE_KTRACE_ARGIK 7
-#define XFS_BTREE_KTRACE_ARGR 8
-#define XFS_BTREE_KTRACE_CUR 9
-
-/*
- * Sub-types for cursor traces.
- */
-#define XBT_ARGS 0
-#define XBT_ENTRY 1
-#define XBT_ERROR 2
-#define XBT_EXIT 3
-
-void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
- struct xfs_buf *, int, int);
-void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
- struct xfs_buf *, int, int, int);
-void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
-void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_ptr, union xfs_btree_key *, int);
-void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_ptr, union xfs_btree_rec *, int);
-void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_key *, int);
-void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
- union xfs_btree_rec *, int);
-void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
-
-#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
- xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
- xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
-#define XFS_BTREE_TRACE_ARGI(c, i) \
- xfs_btree_trace_argi(__func__, c, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
- xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
- xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
- xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGR(c, r) \
- xfs_btree_trace_argr(__func__, c, r, __LINE__)
-#define XFS_BTREE_TRACE_CURSOR(c, t) \
- xfs_btree_trace_cursor(__func__, c, t, __LINE__)
-#else
-#define XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define XFS_BTREE_TRACE_ARGI(c, i)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define XFS_BTREE_TRACE_CURSOR(c, t)
-#endif /* XFS_BTREE_TRACE */
-
-#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7b7e005..8849291 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -90,13 +90,11 @@
uint first,
uint last)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
uint nbytes;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
- if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
+ if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
return;
- }
ASSERT(bip->bli_logged != NULL);
nbytes = last - first + 1;
@@ -408,7 +406,7 @@
int stale = bip->bli_flags & XFS_BLI_STALE;
int freed;
- ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
+ ASSERT(bp->b_fspriv == bip);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_buf_item_unpin(bip);
@@ -420,7 +418,7 @@
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -443,7 +441,7 @@
* Since the transaction no longer refers to the buffer,
* the buffer should no longer refer to the transaction.
*/
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
}
/*
@@ -454,13 +452,13 @@
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_fspriv = NULL;
+ bp->b_iodone = NULL;
} else {
spin_lock(&ailp->xa_lock);
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
xfs_buf_item_relse(bp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
+ ASSERT(bp->b_fspriv == NULL);
}
xfs_buf_relse(bp);
}
@@ -483,7 +481,7 @@
if (XFS_BUF_ISPINNED(bp))
return XFS_ITEM_PINNED;
- if (!XFS_BUF_CPSEMA(bp))
+ if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
/* take a reference to the buffer. */
@@ -525,7 +523,7 @@
uint hold;
/* Clear the buffer's association with this transaction. */
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
/*
* If this is a transaction abort, don't return early. Instead, allow
@@ -684,7 +682,7 @@
xfs_buf_t *bp,
xfs_mount_t *mp)
{
- xfs_log_item_t *lip;
+ xfs_log_item_t *lip = bp->b_fspriv;
xfs_buf_log_item_t *bip;
int chunks;
int map_size;
@@ -696,12 +694,8 @@
* nothing to do here so return.
*/
ASSERT(bp->b_target->bt_mount == mp);
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- if (lip->li_type == XFS_LI_BUF) {
- return;
- }
- }
+ if (lip != NULL && lip->li_type == XFS_LI_BUF)
+ return;
/*
* chunks is the number of XFS_BLF_CHUNK size pieces
@@ -740,11 +734,9 @@
* Put the buf item into the list of items attached to the
* buffer at the front.
*/
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- bip->bli_item.li_bio_list =
- XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- }
- XFS_BUF_SET_FSPRIVATE(bp, bip);
+ if (bp->b_fspriv)
+ bip->bli_item.li_bio_list = bp->b_fspriv;
+ bp->b_fspriv = bip;
}
@@ -876,12 +868,11 @@
trace_xfs_buf_item_relse(bp, _RET_IP_);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
- XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
- if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
- (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
- XFS_BUF_CLR_IODONE_FUNC(bp);
- }
+ bip = bp->b_fspriv;
+ bp->b_fspriv = bip->bli_item.li_bio_list;
+ if (bp->b_fspriv == NULL)
+ bp->b_iodone = NULL;
+
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
@@ -905,20 +896,20 @@
xfs_log_item_t *head_lip;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
lip->li_cb = cb;
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ head_lip = bp->b_fspriv;
+ if (head_lip) {
lip->li_bio_list = head_lip->li_bio_list;
head_lip->li_bio_list = lip;
} else {
- XFS_BUF_SET_FSPRIVATE(bp, lip);
+ bp->b_fspriv = lip;
}
- ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) ||
- (XFS_BUF_IODONE_FUNC(bp) == NULL));
- XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+ ASSERT(bp->b_iodone == NULL ||
+ bp->b_iodone == xfs_buf_iodone_callbacks);
+ bp->b_iodone = xfs_buf_iodone_callbacks;
}
/*
@@ -939,8 +930,8 @@
{
struct xfs_log_item *lip;
- while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
- XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
+ while ((lip = bp->b_fspriv) != NULL) {
+ bp->b_fspriv = lip->li_bio_list;
ASSERT(lip->li_cb != NULL);
/*
* Clear the next pointer so we don't have any
@@ -1007,7 +998,7 @@
XFS_BUF_DONE(bp);
XFS_BUF_SET_START(bp);
}
- ASSERT(XFS_BUF_IODONE_FUNC(bp));
+ ASSERT(bp->b_iodone != NULL);
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
xfs_buf_relse(bp);
return;
@@ -1026,8 +1017,8 @@
do_callbacks:
xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_fspriv = NULL;
+ bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6102ac6..2925726 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,11 +24,12 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -36,10 +37,6 @@
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -89,7 +86,7 @@
*/
STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
+STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
xfs_da_state_blk_t *drop_blk,
xfs_da_state_blk_t *save_blk);
@@ -321,11 +318,11 @@
ASSERT(bp != NULL);
node = bp->data;
oldroot = blk1->bp->data;
- if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
+ if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
(char *)oldroot);
} else {
- ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
leaf = (xfs_dir2_leaf_t *)oldroot;
size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
(char *)leaf);
@@ -352,7 +349,7 @@
node->hdr.count = cpu_to_be16(2);
#ifdef DEBUG
- if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
+ if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
ASSERT(blk1->blkno >= mp->m_dirleafblk &&
blk1->blkno < mp->m_dirfreeblk);
ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -384,7 +381,7 @@
int useextra;
node = oldblk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
/*
* With V2 dirs the extra block is data or freespace.
@@ -483,8 +480,8 @@
node1 = node2;
node2 = tmpnode;
}
- ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
- ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
if (count == 0)
return;
@@ -578,7 +575,7 @@
int tmp;
node = oldblk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
ASSERT(newblk->blkno != 0);
if (state->args->whichfork == XFS_DATA_FORK)
@@ -714,7 +711,7 @@
ASSERT(args != NULL);
ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
oldroot = root_blk->bp->data;
- ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT(!oldroot->hdr.info.forw);
ASSERT(!oldroot->hdr.info.back);
@@ -737,10 +734,10 @@
ASSERT(bp != NULL);
blkinfo = bp->data;
if (be16_to_cpu(oldroot->hdr.level) == 1) {
- ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
- be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+ blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
} else {
- ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
}
ASSERT(!blkinfo->forw);
ASSERT(!blkinfo->back);
@@ -776,7 +773,7 @@
*/
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
node = (xfs_da_intnode_t *)info;
count = be16_to_cpu(node->hdr.count);
if (count > (state->node_ents >> 1)) {
@@ -836,7 +833,7 @@
count -= state->node_ents >> 2;
count -= be16_to_cpu(node->hdr.count);
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
count -= be16_to_cpu(node->hdr.count);
xfs_da_brelse(state->args->trans, bp);
if (count >= 0)
@@ -911,7 +908,7 @@
}
for (blk--, level--; level >= 0; blk--, level--) {
node = blk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
btree = &node->btree[ blk->index ];
if (be32_to_cpu(btree->hashval) == lasthash)
break;
@@ -979,8 +976,8 @@
drop_node = drop_blk->bp->data;
save_node = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
- ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
tp = state->args->trans;
/*
@@ -1278,8 +1275,8 @@
node1 = node1_bp->data;
node2 = node2_bp->data;
- ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
- (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
+ ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
+ node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
((be32_to_cpu(node2->btree[0].hashval) <
be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1299,7 +1296,7 @@
xfs_da_intnode_t *node;
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (count)
*count = be16_to_cpu(node->hdr.count);
if (!node->hdr.count)
@@ -1412,7 +1409,7 @@
for (blk = &path->blk[level]; level >= 0; blk--, level--) {
ASSERT(blk->bp != NULL);
node = blk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
blk->index++;
blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1451,9 +1448,9 @@
return(error);
ASSERT(blk->bp != NULL);
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
- be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
- be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+ info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+ info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
blk->magic = be16_to_cpu(info->magic);
if (blk->magic == XFS_DA_NODE_MAGIC) {
node = (xfs_da_intnode_t *)info;
@@ -1546,79 +1543,62 @@
.compname = xfs_da_compname
};
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
int
-xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+xfs_da_grow_inode_int(
+ struct xfs_da_args *args,
+ xfs_fileoff_t *bno,
+ int count)
{
- xfs_fileoff_t bno, b;
- xfs_bmbt_irec_t map;
- xfs_bmbt_irec_t *mapp;
- xfs_inode_t *dp;
- int nmap, error, w, count, c, got, i, mapi;
- xfs_trans_t *tp;
- xfs_mount_t *mp;
- xfs_drfsbno_t nblks;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ int w = args->whichfork;
+ xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
+ struct xfs_bmbt_irec map, *mapp;
+ int nmap, error, got, i, mapi;
- dp = args->dp;
- mp = dp->i_mount;
- w = args->whichfork;
- tp = args->trans;
- nblks = dp->i_d.di_nblocks;
-
- /*
- * For new directories adjust the file offset and block count.
- */
- if (w == XFS_DATA_FORK) {
- bno = mp->m_dirleafblk;
- count = mp->m_dirblkfsbs;
- } else {
- bno = 0;
- count = 1;
- }
/*
* Find a spot in the file space to put the new block.
*/
- if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
+ error = xfs_bmap_first_unused(tp, dp, count, bno, w);
+ if (error)
return error;
- if (w == XFS_DATA_FORK)
- ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
+
/*
* Try mapping it in one filesystem block.
*/
nmap = 1;
ASSERT(args->firstblock != NULL);
- if ((error = xfs_bmapi(tp, dp, bno, count,
+ error = xfs_bmapi(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
- args->flist))) {
+ args->flist);
+ if (error)
return error;
- }
+
ASSERT(nmap <= 1);
if (nmap == 1) {
mapp = ↦
mapi = 1;
- }
- /*
- * If we didn't get it and the block might work if fragmented,
- * try without the CONTIG flag. Loop until we get it all.
- */
- else if (nmap == 0 && count > 1) {
+ } else if (nmap == 0 && count > 1) {
+ xfs_fileoff_t b;
+ int c;
+
+ /*
+ * If we didn't get it and the block might work if fragmented,
+ * try without the CONTIG flag. Loop until we get it all.
+ */
mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
- for (b = bno, mapi = 0; b < bno + count; ) {
+ for (b = *bno, mapi = 0; b < *bno + count; ) {
nmap = MIN(XFS_BMAP_MAX_NMAP, count);
- c = (int)(bno + count - b);
- if ((error = xfs_bmapi(tp, dp, b, c,
+ c = (int)(*bno + count - b);
+ error = xfs_bmapi(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
XFS_BMAPI_METADATA,
args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist))) {
- kmem_free(mapp);
- return error;
- }
+ &mapp[mapi], &nmap, args->flist);
+ if (error)
+ goto out_free_map;
if (nmap < 1)
break;
mapi += nmap;
@@ -1629,24 +1609,53 @@
mapi = 0;
mapp = NULL;
}
+
/*
* Count the blocks we got, make sure it matches the total.
*/
for (i = 0, got = 0; i < mapi; i++)
got += mapp[i].br_blockcount;
- if (got != count || mapp[0].br_startoff != bno ||
+ if (got != count || mapp[0].br_startoff != *bno ||
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
- bno + count) {
- if (mapp != &map)
- kmem_free(mapp);
- return XFS_ERROR(ENOSPC);
+ *bno + count) {
+ error = XFS_ERROR(ENOSPC);
+ goto out_free_map;
}
- if (mapp != &map)
- kmem_free(mapp);
+
/* account for newly allocated blocks in reserved blocks total */
args->total -= dp->i_d.di_nblocks - nblks;
- *new_blkno = (xfs_dablk_t)bno;
- return 0;
+
+out_free_map:
+ if (mapp != &map)
+ kmem_free(mapp);
+ return error;
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(
+ struct xfs_da_args *args,
+ xfs_dablk_t *new_blkno)
+{
+ xfs_fileoff_t bno;
+ int count;
+ int error;
+
+ if (args->whichfork == XFS_DATA_FORK) {
+ bno = args->dp->i_mount->m_dirleafblk;
+ count = args->dp->i_mount->m_dirblkfsbs;
+ } else {
+ bno = 0;
+ count = 1;
+ }
+
+ error = xfs_da_grow_inode_int(args, &bno, count);
+ if (!error)
+ *new_blkno = (xfs_dablk_t)bno;
+ return error;
}
/*
@@ -1704,12 +1713,12 @@
/*
* Get values from the moved block.
*/
- if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
+ if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
dead_level = 0;
dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
} else {
- ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
dead_node = (xfs_da_intnode_t *)dead_info;
dead_level = be16_to_cpu(dead_node->hdr.level);
dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1768,8 +1777,8 @@
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
goto done;
par_node = par_buf->data;
- if (unlikely(
- be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
+ if (unlikely(par_node->hdr.info.magic !=
+ cpu_to_be16(XFS_DA_NODE_MAGIC) ||
(level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
XFS_ERRLEVEL_LOW, mp);
@@ -1820,7 +1829,7 @@
par_node = par_buf->data;
if (unlikely(
be16_to_cpu(par_node->hdr.level) != level ||
- be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
+ par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
XFS_ERRLEVEL_LOW, mp);
error = XFS_ERROR(EFSCORRUPTED);
@@ -1930,8 +1939,7 @@
xfs_daddr_t *mappedbnop,
xfs_dabuf_t **bpp,
int whichfork,
- int caller,
- inst_t *ra)
+ int caller)
{
xfs_buf_t *bp = NULL;
xfs_buf_t **bplist;
@@ -2070,25 +2078,22 @@
* Build a dabuf structure.
*/
if (bplist) {
- rbp = xfs_da_buf_make(nbplist, bplist, ra);
+ rbp = xfs_da_buf_make(nbplist, bplist);
} else if (bp)
- rbp = xfs_da_buf_make(1, &bp, ra);
+ rbp = xfs_da_buf_make(1, &bp);
else
rbp = NULL;
/*
* For read_buf, check the magic number.
*/
if (caller == 1) {
- xfs_dir2_data_t *data;
- xfs_dir2_free_t *free;
- xfs_da_blkinfo_t *info;
+ xfs_dir2_data_hdr_t *hdr = rbp->data;
+ xfs_dir2_free_t *free = rbp->data;
+ xfs_da_blkinfo_t *info = rbp->data;
uint magic, magic1;
- info = rbp->data;
- data = rbp->data;
- free = rbp->data;
magic = be16_to_cpu(info->magic);
- magic1 = be32_to_cpu(data->hdr.magic);
+ magic1 = be32_to_cpu(hdr->magic);
if (unlikely(
XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
(magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2096,7 +2101,7 @@
(magic != XFS_DIR2_LEAFN_MAGIC) &&
(magic1 != XFS_DIR2_BLOCK_MAGIC) &&
(magic1 != XFS_DIR2_DATA_MAGIC) &&
- (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
+ (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
mp, XFS_ERRTAG_DA_READ_BUF,
XFS_RANDOM_DA_READ_BUF))) {
trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
@@ -2143,8 +2148,7 @@
xfs_dabuf_t **bpp,
int whichfork)
{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
- (inst_t *)__return_address);
+ return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
}
/*
@@ -2159,8 +2163,7 @@
xfs_dabuf_t **bpp,
int whichfork)
{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
- (inst_t *)__return_address);
+ return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
}
/*
@@ -2176,8 +2179,7 @@
xfs_daddr_t rval;
rval = -1;
- if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
- (inst_t *)__return_address))
+ if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
return -1;
else
return rval;
@@ -2235,17 +2237,12 @@
kmem_zone_free(xfs_da_state_zone, state);
}
-#ifdef XFS_DABUF_DEBUG
-xfs_dabuf_t *xfs_dabuf_global_list;
-static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
-#endif
-
/*
* Create a dabuf.
*/
/* ARGSUSED */
STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
{
xfs_buf_t *bp;
xfs_dabuf_t *dabuf;
@@ -2257,11 +2254,6 @@
else
dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
dabuf->dirty = 0;
-#ifdef XFS_DABUF_DEBUG
- dabuf->ra = ra;
- dabuf->target = XFS_BUF_TARGET(bps[0]);
- dabuf->blkno = XFS_BUF_ADDR(bps[0]);
-#endif
if (nbuf == 1) {
dabuf->nbuf = 1;
bp = bps[0];
@@ -2281,23 +2273,6 @@
XFS_BUF_COUNT(bp));
}
}
-#ifdef XFS_DABUF_DEBUG
- {
- xfs_dabuf_t *p;
-
- spin_lock(&xfs_dabuf_global_lock);
- for (p = xfs_dabuf_global_list; p; p = p->next) {
- ASSERT(p->blkno != dabuf->blkno ||
- p->target != dabuf->target);
- }
- dabuf->prev = NULL;
- if (xfs_dabuf_global_list)
- xfs_dabuf_global_list->prev = dabuf;
- dabuf->next = xfs_dabuf_global_list;
- xfs_dabuf_global_list = dabuf;
- spin_unlock(&xfs_dabuf_global_lock);
- }
-#endif
return dabuf;
}
@@ -2333,25 +2308,12 @@
ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
if (dabuf->dirty)
xfs_da_buf_clean(dabuf);
- if (dabuf->nbuf > 1)
+ if (dabuf->nbuf > 1) {
kmem_free(dabuf->data);
-#ifdef XFS_DABUF_DEBUG
- {
- spin_lock(&xfs_dabuf_global_lock);
- if (dabuf->prev)
- dabuf->prev->next = dabuf->next;
- else
- xfs_dabuf_global_list = dabuf->next;
- if (dabuf->next)
- dabuf->next->prev = dabuf->prev;
- spin_unlock(&xfs_dabuf_global_lock);
- }
- memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
-#endif
- if (dabuf->nbuf == 1)
- kmem_zone_free(xfs_dabuf_zone, dabuf);
- else
kmem_free(dabuf);
+ } else {
+ kmem_zone_free(xfs_dabuf_zone, dabuf);
+ }
}
/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index fe9f5a8..dbf7c07 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -145,22 +145,11 @@
short dirty; /* data needs to be copied back */
short bbcount; /* how large is data in bbs */
void *data; /* pointer for buffers' data */
-#ifdef XFS_DABUF_DEBUG
- inst_t *ra; /* return address of caller to make */
- struct xfs_dabuf *next; /* next in global chain */
- struct xfs_dabuf *prev; /* previous in global chain */
- struct xfs_buftarg *target; /* device for buffer */
- xfs_daddr_t blkno; /* daddr first in bps[0] */
-#endif
struct xfs_buf *bps[1]; /* actually nbuf of these */
} xfs_dabuf_t;
#define XFS_DA_BUF_SIZE(n) \
(sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
-#ifdef XFS_DABUF_DEBUG
-extern xfs_dabuf_t *xfs_dabuf_global_list;
-#endif
-
/*
* Storage for holding state during Btree searches and split/join ops.
*
@@ -248,6 +237,8 @@
* Utility routines.
*/
int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
+ int count);
int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
xfs_dabuf_t **bp, int whichfork);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index dba7a71..4580ce0 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,20 +24,17 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
@@ -122,15 +119,15 @@
xfs_dir_isempty(
xfs_inode_t *dp)
{
- xfs_dir2_sf_t *sfp;
+ xfs_dir2_sf_hdr_t *sfp;
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
return 0;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- return !sfp->hdr.count;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ return !sfp->count;
}
/*
@@ -500,129 +497,34 @@
/*
* Add a block to the directory.
- * This routine is for data and free blocks, not leaf/node blocks
- * which are handled by xfs_da_grow_inode.
+ *
+ * This routine is for data and free blocks, not leaf/node blocks which are
+ * handled by xfs_da_grow_inode.
*/
int
xfs_dir2_grow_inode(
- xfs_da_args_t *args,
- int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
- xfs_dir2_db_t *dbp) /* out: block number added */
+ struct xfs_da_args *args,
+ int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
+ xfs_dir2_db_t *dbp) /* out: block number added */
{
- xfs_fileoff_t bno; /* directory offset of new block */
- int count; /* count of filesystem blocks */
- xfs_inode_t *dp; /* incore directory inode */
- int error;
- int got; /* blocks actually mapped */
- int i;
- xfs_bmbt_irec_t map; /* single structure for bmap */
- int mapi; /* mapping index */
- xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */
- xfs_mount_t *mp;
- int nmap; /* number of bmap entries */
- xfs_trans_t *tp;
- xfs_drfsbno_t nblks;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ xfs_fileoff_t bno; /* directory offset of new block */
+ int count; /* count of filesystem blocks */
+ int error;
trace_xfs_dir2_grow_inode(args, space);
- dp = args->dp;
- tp = args->trans;
- mp = dp->i_mount;
- nblks = dp->i_d.di_nblocks;
/*
* Set lowest possible block in the space requested.
*/
bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
count = mp->m_dirblkfsbs;
- /*
- * Find the first hole for our block.
- */
- if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
+
+ error = xfs_da_grow_inode_int(args, &bno, count);
+ if (error)
return error;
- nmap = 1;
- ASSERT(args->firstblock != NULL);
- /*
- * Try mapping the new block contiguously (one extent).
- */
- if ((error = xfs_bmapi(tp, dp, bno, count,
- XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
- args->firstblock, args->total, &map, &nmap,
- args->flist)))
- return error;
- ASSERT(nmap <= 1);
- if (nmap == 1) {
- mapp = ↦
- mapi = 1;
- }
- /*
- * Didn't work and this is a multiple-fsb directory block.
- * Try again with contiguous flag turned on.
- */
- else if (nmap == 0 && count > 1) {
- xfs_fileoff_t b; /* current file offset */
- /*
- * Space for maximum number of mappings.
- */
- mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
- /*
- * Iterate until we get to the end of our block.
- */
- for (b = bno, mapi = 0; b < bno + count; ) {
- int c; /* current fsb count */
-
- /*
- * Can't map more than MAX_NMAP at once.
- */
- nmap = MIN(XFS_BMAP_MAX_NMAP, count);
- c = (int)(bno + count - b);
- if ((error = xfs_bmapi(tp, dp, b, c,
- XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
- args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist))) {
- kmem_free(mapp);
- return error;
- }
- if (nmap < 1)
- break;
- /*
- * Add this bunch into our table, go to the next offset.
- */
- mapi += nmap;
- b = mapp[mapi - 1].br_startoff +
- mapp[mapi - 1].br_blockcount;
- }
- }
- /*
- * Didn't work.
- */
- else {
- mapi = 0;
- mapp = NULL;
- }
- /*
- * See how many fsb's we got.
- */
- for (i = 0, got = 0; i < mapi; i++)
- got += mapp[i].br_blockcount;
- /*
- * Didn't get enough fsb's, or the first/last block's are wrong.
- */
- if (got != count || mapp[0].br_startoff != bno ||
- mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
- bno + count) {
- if (mapp != &map)
- kmem_free(mapp);
- return XFS_ERROR(ENOSPC);
- }
- /*
- * Done with the temporary mapping table.
- */
- if (mapp != &map)
- kmem_free(mapp);
-
- /* account for newly allocated blocks in reserved blocks total */
- args->total -= dp->i_d.di_nblocks - nblks;
*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
/*
@@ -634,7 +536,7 @@
size = XFS_FSB_TO_B(mp, bno + count);
if (size > dp->i_d.di_size) {
dp->i_d.di_size = size;
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
}
}
return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 74a3b10..e937d99 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -16,49 +16,14 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DIR2_H__
-#define __XFS_DIR2_H__
+#define __XFS_DIR2_H__
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_put_args;
struct xfs_bmap_free;
+struct xfs_da_args;
struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
-/*
- * Directory version 2.
- * There are 4 possible formats:
- * shortform
- * single block - data with embedded leaf at the end
- * multiple data blocks, single leaf+freeindex block
- * data blocks, node&leaf blocks (btree), freeindex blocks
- *
- * The shortform format is in xfs_dir2_sf.h.
- * The single block format is in xfs_dir2_block.h.
- * The data block format is in xfs_dir2_data.h.
- * The leaf and freeindex block formats are in xfs_dir2_leaf.h.
- * Node blocks are the same as the other version, in xfs_da_btree.h.
- */
-
-/*
- * Byte offset in data block and shortform entry.
- */
-typedef __uint16_t xfs_dir2_data_off_t;
-#define NULLDATAOFF 0xffffU
-typedef uint xfs_dir2_data_aoff_t; /* argument form */
-
-/*
- * Directory block number (logical dirblk in file)
- */
-typedef __uint32_t xfs_dir2_db_t;
-
-/*
- * Byte offset in a directory.
- */
-typedef xfs_off_t xfs_dir2_off_t;
-
extern struct xfs_name xfs_name_dotdot;
/*
@@ -86,21 +51,10 @@
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, uint resblks);
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
/*
- * Utility routines for v2 directories.
+ * Direct call from the bmap code, bypassing the generic directory layer.
*/
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
- xfs_dir2_db_t *dbp);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
- int *vp);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
- int *vp);
-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
- struct xfs_dabuf *bp);
-
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
- const unsigned char *name, int len);
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 580d99c..9245e02 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -23,17 +23,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -67,7 +64,7 @@
xfs_da_args_t *args) /* directory op arguments */
{
xfs_dir2_data_free_t *bf; /* bestfree table in block */
- xfs_dir2_block_t *block; /* directory block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -105,13 +102,13 @@
return error;
}
ASSERT(bp != NULL);
- block = bp->data;
+ hdr = bp->data;
/*
* Check the magic number, corrupted if wrong.
*/
- if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
+ if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
- XFS_ERRLEVEL_LOW, mp, block);
+ XFS_ERRLEVEL_LOW, mp, hdr);
xfs_da_brelse(tp, bp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -119,8 +116,8 @@
/*
* Set up pointers to parts of the block.
*/
- bf = block->hdr.bestfree;
- btp = xfs_dir2_block_tail_p(mp, block);
+ bf = hdr->bestfree;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* No stale entries? Need space for entry and new leaf.
@@ -133,7 +130,7 @@
/*
* Data object just before the first leaf entry.
*/
- enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then can't do this add without cleaning up:
* the space before the first leaf entry needs to be free so it
@@ -146,7 +143,7 @@
*/
else {
dup = (xfs_dir2_data_unused_t *)
- ((char *)block + be16_to_cpu(bf[0].offset));
+ ((char *)hdr + be16_to_cpu(bf[0].offset));
if (dup == enddup) {
/*
* It is the biggest freespace, is it too small
@@ -159,7 +156,7 @@
*/
if (be16_to_cpu(bf[1].length) >= len)
dup = (xfs_dir2_data_unused_t *)
- ((char *)block +
+ ((char *)hdr +
be16_to_cpu(bf[1].offset));
else
dup = NULL;
@@ -182,7 +179,7 @@
*/
else if (be16_to_cpu(bf[0].length) >= len) {
dup = (xfs_dir2_data_unused_t *)
- ((char *)block + be16_to_cpu(bf[0].offset));
+ ((char *)hdr + be16_to_cpu(bf[0].offset));
compact = 0;
}
/*
@@ -196,7 +193,7 @@
/*
* Data object just before the first leaf entry.
*/
- dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then the data will go where the
* leaf data starts now, if it works at all.
@@ -255,7 +252,8 @@
highstale = lfloghigh = -1;
fromidx >= 0;
fromidx--) {
- if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
+ if (blp[fromidx].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
if (highstale == -1)
highstale = toidx;
else {
@@ -272,7 +270,7 @@
lfloghigh -= be32_to_cpu(btp->stale) - 1;
be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
xfs_dir2_data_make_free(tp, bp,
- (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
&needlog, &needscan);
blp += be32_to_cpu(btp->stale) - 1;
@@ -282,7 +280,7 @@
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
}
}
@@ -318,7 +316,7 @@
*/
xfs_dir2_data_use_free(tp, bp, enddup,
(xfs_dir2_data_aoff_t)
- ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
+ ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
sizeof(*blp)),
(xfs_dir2_data_aoff_t)sizeof(*blp),
&needlog, &needscan);
@@ -331,8 +329,7 @@
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
- &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
}
/*
@@ -353,12 +350,14 @@
else {
for (lowstale = mid;
lowstale >= 0 &&
- be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
+ blp[lowstale].address !=
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
lowstale--)
continue;
for (highstale = mid + 1;
highstale < be32_to_cpu(btp->count) &&
- be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
+ blp[highstale].address !=
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
(lowstale < 0 || mid - lowstale > highstale - mid);
highstale++)
continue;
@@ -397,13 +396,13 @@
*/
blp[mid].hashval = cpu_to_be32(args->hashval);
blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
/*
* Mark space for the data entry used.
*/
xfs_dir2_data_use_free(tp, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
/*
* Create the new data entry.
@@ -412,12 +411,12 @@
dep->namelen = args->namelen;
memcpy(dep->name, args->name, args->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Clean up the bestfree array and log the header, tail, and entry.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, bp);
xfs_dir2_block_log_tail(tp, bp);
@@ -437,7 +436,7 @@
xfs_off_t *offset,
filldir_t filldir)
{
- xfs_dir2_block_t *block; /* directory block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dabuf_t *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
@@ -470,13 +469,13 @@
* We'll skip entries before this.
*/
wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
/*
* Set up values for the loop.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- ptr = (char *)block->u;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ ptr = (char *)(hdr + 1);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
/*
@@ -502,11 +501,11 @@
/*
* The entry is before the desired starting point, skip it.
*/
- if ((char *)dep - (char *)block < wantoff)
+ if ((char *)dep - (char *)hdr < wantoff)
continue;
cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
- (char *)dep - (char *)block);
+ (char *)dep - (char *)hdr);
/*
* If it didn't fit, set the final offset to here & return.
@@ -540,17 +539,14 @@
int first, /* index of first logged leaf */
int last) /* index of last logged leaf */
{
- xfs_dir2_block_t *block; /* directory block structure */
- xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dir2_block_tail_t *btp; /* block tail */
- xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_leaf_entry_t *blp;
+ xfs_dir2_block_tail_t *btp;
- mp = tp->t_mountp;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
- xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
- (uint)((char *)&blp[last + 1] - (char *)block - 1));
+ xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+ (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
}
/*
@@ -561,15 +557,12 @@
xfs_trans_t *tp, /* transaction structure */
xfs_dabuf_t *bp) /* block buffer */
{
- xfs_dir2_block_t *block; /* directory block structure */
- xfs_dir2_block_tail_t *btp; /* block tail */
- xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_block_tail_t *btp;
- mp = tp->t_mountp;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
- xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
- (uint)((char *)(btp + 1) - (char *)block - 1));
+ btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+ xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+ (uint)((char *)(btp + 1) - (char *)hdr - 1));
}
/*
@@ -580,7 +573,7 @@
xfs_dir2_block_lookup(
xfs_da_args_t *args) /* dir lookup arguments */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -600,14 +593,14 @@
return error;
dp = args->dp;
mp = dp->i_mount;
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Get the offset from the leaf entry, to point to the data.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)block +
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
* Fill in inode number, CI name if appropriate, release the block.
@@ -628,7 +621,7 @@
int *entno) /* returned entry number */
{
xfs_dir2_dataptr_t addr; /* data entry address */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -654,9 +647,9 @@
return error;
}
ASSERT(bp != NULL);
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Loop doing a binary search for our hash value.
@@ -694,7 +687,7 @@
* Get pointer to the entry from the leaf.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
/*
* Compare name and if it's an exact match, return the index
* and buffer. If it's the first case-insensitive match, store
@@ -733,7 +726,7 @@
xfs_dir2_block_removename(
xfs_da_args_t *args) /* directory operation args */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -760,20 +753,20 @@
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ hdr = bp->data;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry using the leaf entry.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
* Mark the data entry's space free.
*/
needlog = needscan = 0;
xfs_dir2_data_make_free(tp, bp,
- (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
/*
* Fix up the block tail.
@@ -789,15 +782,15 @@
* Fix up bestfree, log the header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, bp);
xfs_dir2_data_check(dp, bp);
/*
* See if the size as a shortform is good enough.
*/
- if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
- XFS_IFORK_DSIZE(dp)) {
+ size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+ if (size > XFS_IFORK_DSIZE(dp)) {
xfs_da_buf_done(bp);
return 0;
}
@@ -815,7 +808,7 @@
xfs_dir2_block_replace(
xfs_da_args_t *args) /* directory operation args */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -836,14 +829,14 @@
}
dp = args->dp;
mp = dp->i_mount;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ hdr = bp->data;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry we need to change.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
/*
* Change the inode number to the new value.
@@ -882,7 +875,7 @@
xfs_dabuf_t *dbp) /* data buffer */
{
__be16 *bestsp; /* leaf bests table */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused data entry */
@@ -906,7 +899,7 @@
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
* If there are data blocks other than the first one, take this
@@ -917,7 +910,7 @@
while (dp->i_d.di_size > mp->m_dirblksize) {
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
- mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+ mp->m_dirblksize - (uint)sizeof(*hdr)) {
if ((error =
xfs_dir2_leaf_trim_data(args, lbp,
(xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -935,18 +928,18 @@
XFS_DATA_FORK))) {
goto out;
}
- block = dbp->data;
- ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+ hdr = dbp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
/*
* Size of the "leaf" area in the block.
*/
- size = (uint)sizeof(block->tail) +
+ size = (uint)sizeof(xfs_dir2_block_tail_t) +
(uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
/*
* Look at the last data entry.
*/
- tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
- dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free or is too short we can't do it.
*/
@@ -958,7 +951,7 @@
/*
* Start converting it to block form.
*/
- block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+ hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
needlog = 1;
needscan = 0;
/*
@@ -969,7 +962,7 @@
/*
* Initialize the block tail.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
btp->stale = 0;
xfs_dir2_block_log_tail(tp, dbp);
@@ -978,7 +971,8 @@
*/
lep = xfs_dir2_block_leaf_p(btp);
for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
- if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
lep[to++] = leaf->ents[from];
}
@@ -988,7 +982,7 @@
* Scan the bestfree if we need it and log the data block header.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
/*
@@ -1002,8 +996,8 @@
/*
* Now see if the resulting block can be shrunken to shortform.
*/
- if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
- XFS_IFORK_DSIZE(dp)) {
+ size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+ if (size > XFS_IFORK_DSIZE(dp)) {
error = 0;
goto out;
}
@@ -1024,12 +1018,10 @@
xfs_da_args_t *args) /* operation arguments */
{
xfs_dir2_db_t blkno; /* dir-relative block # (0) */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
- char *buf; /* sf buffer */
- int buf_len;
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
int dummy; /* trash */
@@ -1043,7 +1035,8 @@
int newoffset; /* offset from current entry */
int offset; /* target block offset */
xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform header */
__be16 *tagp; /* end of data entry */
xfs_trans_t *tp; /* transaction pointer */
struct xfs_name name;
@@ -1061,32 +1054,30 @@
ASSERT(XFS_FORCED_SHUTDOWN(mp));
return XFS_ERROR(EIO);
}
+
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
+
/*
- * Copy the directory into the stack buffer.
+ * Copy the directory into a temporary buffer.
* Then pitch the incore inode data so we can make extents.
*/
+ sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+ memcpy(sfp, oldsfp, dp->i_df.if_bytes);
- buf_len = dp->i_df.if_bytes;
- buf = kmem_alloc(buf_len, KM_SLEEP);
-
- memcpy(buf, sfp, buf_len);
- xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
+ xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
dp->i_d.di_size = 0;
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- /*
- * Reset pointer - old sfp is gone.
- */
- sfp = (xfs_dir2_sf_t *)buf;
+
/*
* Add block 0 to the inode.
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
if (error) {
- kmem_free(buf);
+ kmem_free(sfp);
return error;
}
/*
@@ -1094,21 +1085,21 @@
*/
error = xfs_dir2_data_init(args, blkno, &bp);
if (error) {
- kmem_free(buf);
+ kmem_free(sfp);
return error;
}
- block = bp->data;
- block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+ hdr = bp->data;
+ hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
/*
* Compute size of block "tail" area.
*/
i = (uint)sizeof(*btp) +
- (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
/*
* The whole thing is initialized to free by the init routine.
* Say we're using the leaf and tail area.
*/
- dup = (xfs_dir2_data_unused_t *)block->u;
+ dup = (xfs_dir2_data_unused_t *)(hdr + 1);
needlog = needscan = 0;
xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
&needscan);
@@ -1116,50 +1107,51 @@
/*
* Fill in the tail.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */
btp->stale = 0;
blp = xfs_dir2_block_leaf_p(btp);
- endoffset = (uint)((char *)blp - (char *)block);
+ endoffset = (uint)((char *)blp - (char *)hdr);
/*
* Remove the freespace, we'll manage it.
*/
xfs_dir2_data_use_free(tp, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
/*
* Create entry for .
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+ ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
dep->inumber = cpu_to_be64(dp->i_ino);
dep->namelen = 1;
dep->name[0] = '.';
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
/*
* Create entry for ..
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
- dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+ ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
+ dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
dep->namelen = 2;
dep->name[0] = dep->name[1] = '.';
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
offset = XFS_DIR2_DATA_FIRST_OFFSET;
/*
* Loop over existing entries, stuff them in.
*/
- if ((i = 0) == sfp->hdr.count)
+ i = 0;
+ if (!sfp->count)
sfep = NULL;
else
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1179,43 +1171,40 @@
* There should be a hole here, make one.
*/
if (offset < newoffset) {
- dup = (xfs_dir2_data_unused_t *)
- ((char *)block + offset);
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
dup->length = cpu_to_be16(newoffset - offset);
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
- ((char *)dup - (char *)block));
+ ((char *)dup - (char *)hdr));
xfs_dir2_data_log_unused(tp, bp, dup);
- (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
- dup, &dummy);
+ xfs_dir2_data_freeinsert(hdr, dup, &dummy);
offset += be16_to_cpu(dup->length);
continue;
}
/*
* Copy a real entry.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
- dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
+ dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
dep->namelen = sfep->namelen;
memcpy(dep->name, sfep->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
name.name = sfep->name;
name.len = sfep->namelen;
blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
hashname(&name));
blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
- offset = (int)((char *)(tagp + 1) - (char *)block);
- if (++i == sfp->hdr.count)
+ (char *)dep - (char *)hdr));
+ offset = (int)((char *)(tagp + 1) - (char *)hdr);
+ if (++i == sfp->count)
sfep = NULL;
else
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
/* Done with the temporary buffer */
- kmem_free(buf);
+ kmem_free(sfp);
/*
* Sort the leaf entries by hash value.
*/
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
deleted file mode 100644
index 10e6896..0000000
--- a/fs/xfs/xfs_dir2_block.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_BLOCK_H__
-#define __XFS_DIR2_BLOCK_H__
-
-/*
- * xfs_dir2_block.h
- * Directory version 2, single block format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_data_hdr;
-struct xfs_dir2_leaf_entry;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The single block format is as follows:
- * xfs_dir2_data_hdr_t structure
- * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
- * xfs_dir2_leaf_entry_t structures
- * xfs_dir2_block_tail_t structure
- */
-
-#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */
-
-typedef struct xfs_dir2_block_tail {
- __be32 count; /* count of leaf entries */
- __be32 stale; /* count of stale lf entries */
-} xfs_dir2_block_tail_t;
-
-/*
- * Generic single-block structure, for xfs_db.
- */
-typedef struct xfs_dir2_block {
- xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */
- xfs_dir2_data_union_t u[1];
- xfs_dir2_leaf_entry_t leaf[1];
- xfs_dir2_block_tail_t tail;
-} xfs_dir2_block_t;
-
-/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
-{
- return (((xfs_dir2_block_tail_t *)
- ((char *)(block) + (mp)->m_dirblksize)) - 1);
-}
-
-/*
- * Pointer to the leaf entries embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_leaf_entry *
-xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
-{
- return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
- xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
-extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_BLOCK_H__ */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 921595b..5bbe2a8 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -23,18 +23,18 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
+STATIC xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
+
#ifdef DEBUG
/*
* Check the consistency of the data block.
@@ -50,7 +50,7 @@
xfs_dir2_data_free_t *bf; /* bestfree table */
xfs_dir2_block_tail_t *btp=NULL; /* block tail */
int count; /* count of entries found */
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_free_t *dfp; /* bestfree entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
@@ -66,17 +66,19 @@
struct xfs_name name;
mp = dp->i_mount;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- bf = d->hdr.bestfree;
- p = (char *)d->u;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ hdr = bp->data;
+ bf = hdr->bestfree;
+ p = (char *)(hdr + 1);
+
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+ btp = xfs_dir2_block_tail_p(mp, hdr);
lep = xfs_dir2_block_leaf_p(btp);
endp = (char *)lep;
- } else
- endp = (char *)d + mp->m_dirblksize;
+ } else {
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+ endp = (char *)hdr + mp->m_dirblksize;
+ }
+
count = lastfree = freeseen = 0;
/*
* Account for zero bestfree entries.
@@ -108,8 +110,8 @@
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
ASSERT(lastfree == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
- (char *)dup - (char *)d);
- dfp = xfs_dir2_data_freefind(d, dup);
+ (char *)dup - (char *)hdr);
+ dfp = xfs_dir2_data_freefind(hdr, dup);
if (dfp) {
i = (int)(dfp - bf);
ASSERT((freeseen & (1 << i)) == 0);
@@ -132,13 +134,13 @@
ASSERT(dep->namelen != 0);
ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
- (char *)dep - (char *)d);
+ (char *)dep - (char *)hdr);
count++;
lastfree = 0;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
(xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)d));
+ ((char *)dep - (char *)hdr));
name.name = dep->name;
name.len = dep->namelen;
hash = mp->m_dirnameops->hashname(&name);
@@ -155,9 +157,10 @@
* Need to have seen all the entries and all the bestfree slots.
*/
ASSERT(freeseen == 7);
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
- if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (lep[i].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
if (i > 0)
ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
@@ -172,9 +175,9 @@
* Given a data block and an unused entry from that block,
* return the bestfree entry if any that corresponds to it.
*/
-xfs_dir2_data_free_t *
+STATIC xfs_dir2_data_free_t *
xfs_dir2_data_freefind(
- xfs_dir2_data_t *d, /* data block */
+ xfs_dir2_data_hdr_t *hdr, /* data block */
xfs_dir2_data_unused_t *dup) /* data unused entry */
{
xfs_dir2_data_free_t *dfp; /* bestfree entry */
@@ -184,17 +187,17 @@
int seenzero; /* saw a 0 bestfree entry */
#endif
- off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+ off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
#if defined(DEBUG) && defined(__KERNEL__)
/*
* Validate some consistency in the bestfree table.
* Check order, non-overlapping entries, and if we find the
* one we're looking for it has to be exact.
*/
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
- dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+ for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
+ dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
dfp++) {
if (!dfp->offset) {
ASSERT(!dfp->length);
@@ -210,7 +213,7 @@
else
ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
- if (dfp > &d->hdr.bestfree[0])
+ if (dfp > &hdr->bestfree[0])
ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
}
#endif
@@ -219,13 +222,13 @@
* it can't be there since they're sorted.
*/
if (be16_to_cpu(dup->length) <
- be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
+ be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
return NULL;
/*
* Look at the three bestfree entries for our guy.
*/
- for (dfp = &d->hdr.bestfree[0];
- dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+ for (dfp = &hdr->bestfree[0];
+ dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
dfp++) {
if (!dfp->offset)
return NULL;
@@ -243,7 +246,7 @@
*/
xfs_dir2_data_free_t * /* entry inserted */
xfs_dir2_data_freeinsert(
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block pointer */
xfs_dir2_data_unused_t *dup, /* unused space */
int *loghead) /* log the data header (out) */
{
@@ -251,12 +254,13 @@
xfs_dir2_data_free_t new; /* new bestfree entry */
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
- dfp = d->hdr.bestfree;
+ dfp = hdr->bestfree;
new.length = dup->length;
- new.offset = cpu_to_be16((char *)dup - (char *)d);
+ new.offset = cpu_to_be16((char *)dup - (char *)hdr);
+
/*
* Insert at position 0, 1, or 2; or not at all.
*/
@@ -286,36 +290,36 @@
*/
STATIC void
xfs_dir2_data_freeremove(
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
int *loghead) /* out: log data header */
{
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
/*
* It's the first entry, slide the next 2 up.
*/
- if (dfp == &d->hdr.bestfree[0]) {
- d->hdr.bestfree[0] = d->hdr.bestfree[1];
- d->hdr.bestfree[1] = d->hdr.bestfree[2];
+ if (dfp == &hdr->bestfree[0]) {
+ hdr->bestfree[0] = hdr->bestfree[1];
+ hdr->bestfree[1] = hdr->bestfree[2];
}
/*
* It's the second entry, slide the 3rd entry up.
*/
- else if (dfp == &d->hdr.bestfree[1])
- d->hdr.bestfree[1] = d->hdr.bestfree[2];
+ else if (dfp == &hdr->bestfree[1])
+ hdr->bestfree[1] = hdr->bestfree[2];
/*
* Must be the last entry.
*/
else
- ASSERT(dfp == &d->hdr.bestfree[2]);
+ ASSERT(dfp == &hdr->bestfree[2]);
/*
* Clear the 3rd entry, must be zero now.
*/
- d->hdr.bestfree[2].length = 0;
- d->hdr.bestfree[2].offset = 0;
+ hdr->bestfree[2].length = 0;
+ hdr->bestfree[2].offset = 0;
*loghead = 1;
}
@@ -325,7 +329,7 @@
void
xfs_dir2_data_freescan(
xfs_mount_t *mp, /* filesystem mount point */
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
int *loghead) /* out: log data header */
{
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -335,23 +339,23 @@
char *p; /* current entry pointer */
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
/*
* Start by clearing the table.
*/
- memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
+ memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
*loghead = 1;
/*
* Set up pointers.
*/
- p = (char *)d->u;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ p = (char *)(hdr + 1);
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+ btp = xfs_dir2_block_tail_p(mp, hdr);
endp = (char *)xfs_dir2_block_leaf_p(btp);
} else
- endp = (char *)d + mp->m_dirblksize;
+ endp = (char *)hdr + mp->m_dirblksize;
/*
* Loop over the block's entries.
*/
@@ -361,9 +365,9 @@
* If it's a free entry, insert it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- ASSERT((char *)dup - (char *)d ==
+ ASSERT((char *)dup - (char *)hdr ==
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
- xfs_dir2_data_freeinsert(d, dup, loghead);
+ xfs_dir2_data_freeinsert(hdr, dup, loghead);
p += be16_to_cpu(dup->length);
}
/*
@@ -371,7 +375,7 @@
*/
else {
dep = (xfs_dir2_data_entry_t *)p;
- ASSERT((char *)dep - (char *)d ==
+ ASSERT((char *)dep - (char *)hdr ==
be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
p += xfs_dir2_data_entsize(dep->namelen);
}
@@ -389,7 +393,7 @@
xfs_dabuf_t **bpp) /* output block buffer */
{
xfs_dabuf_t *bp; /* block buffer */
- xfs_dir2_data_t *d; /* pointer to block */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused entry pointer */
int error; /* error return value */
@@ -410,26 +414,28 @@
return error;
}
ASSERT(bp != NULL);
+
/*
* Initialize the header.
*/
- d = bp->data;
- d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
- d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
+ hdr = bp->data;
+ hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+ hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
- d->hdr.bestfree[i].length = 0;
- d->hdr.bestfree[i].offset = 0;
+ hdr->bestfree[i].length = 0;
+ hdr->bestfree[i].offset = 0;
}
+
/*
* Set up an unused entry for the block's body.
*/
- dup = &d->u[0].unused;
+ dup = (xfs_dir2_data_unused_t *)(hdr + 1);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
- t=mp->m_dirblksize - (uint)sizeof(d->hdr);
- d->hdr.bestfree[0].length = cpu_to_be16(t);
+ t = mp->m_dirblksize - (uint)sizeof(*hdr);
+ hdr->bestfree[0].length = cpu_to_be16(t);
dup->length = cpu_to_be16(t);
- *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
+ *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
/*
* Log it and return it.
*/
@@ -448,14 +454,14 @@
xfs_dabuf_t *bp, /* block buffer */
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+
+ xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
- (char *)d - 1));
+ (char *)hdr - 1));
}
/*
@@ -466,13 +472,12 @@
xfs_trans_t *tp, /* transaction pointer */
xfs_dabuf_t *bp) /* block buffer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
- (uint)(sizeof(d->hdr) - 1));
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+
+ xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
}
/*
@@ -484,23 +489,23 @@
xfs_dabuf_t *bp, /* block buffer */
xfs_dir2_data_unused_t *dup) /* data unused pointer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+
/*
* Log the first part of the unused entry.
*/
- xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+ xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
(uint)((char *)&dup->length + sizeof(dup->length) -
- 1 - (char *)d));
+ 1 - (char *)hdr));
/*
* Log the end (tag) of the unused entry.
*/
xfs_da_log_buf(tp, bp,
- (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
- (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
+ (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
+ (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
sizeof(xfs_dir2_data_off_t) - 1));
}
@@ -517,7 +522,7 @@
int *needlogp, /* out: log header */
int *needscanp) /* out: regen bestfree */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr; /* data block pointer */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
char *endptr; /* end of data area */
xfs_mount_t *mp; /* filesystem mount point */
@@ -527,28 +532,29 @@
xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
mp = tp->t_mountp;
- d = bp->data;
+ hdr = bp->data;
+
/*
* Figure out where the end of the data area is.
*/
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
- endptr = (char *)d + mp->m_dirblksize;
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
+ endptr = (char *)hdr + mp->m_dirblksize;
else {
xfs_dir2_block_tail_t *btp; /* block tail */
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+ btp = xfs_dir2_block_tail_p(mp, hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
}
/*
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
*/
- if (offset > sizeof(d->hdr)) {
+ if (offset > sizeof(*hdr)) {
__be16 *tagp; /* tag just before us */
- tagp = (__be16 *)((char *)d + offset) - 1;
- prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
+ tagp = (__be16 *)((char *)hdr + offset) - 1;
+ prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
prevdup = NULL;
} else
@@ -557,9 +563,9 @@
* If this isn't the end of the block, see if the entry after
* us is free.
*/
- if ((char *)d + offset + len < endptr) {
+ if ((char *)hdr + offset + len < endptr) {
postdup =
- (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
postdup = NULL;
} else
@@ -576,21 +582,21 @@
/*
* See if prevdup and/or postdup are in bestfree table.
*/
- dfp = xfs_dir2_data_freefind(d, prevdup);
- dfp2 = xfs_dir2_data_freefind(d, postdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
+ dfp2 = xfs_dir2_data_freefind(hdr, postdup);
/*
* We need a rescan unless there are exactly 2 free entries
* namely our two. Then we know what's happening, otherwise
* since the third bestfree is there, there might be more
* entries.
*/
- needscan = (d->hdr.bestfree[2].length != 0);
+ needscan = (hdr->bestfree[2].length != 0);
/*
* Fix up the new big freespace.
*/
be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(prevdup) =
- cpu_to_be16((char *)prevdup - (char *)d);
+ cpu_to_be16((char *)prevdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, prevdup);
if (!needscan) {
/*
@@ -600,18 +606,18 @@
* Remove entry 1 first then entry 0.
*/
ASSERT(dfp && dfp2);
- if (dfp == &d->hdr.bestfree[1]) {
- dfp = &d->hdr.bestfree[0];
+ if (dfp == &hdr->bestfree[1]) {
+ dfp = &hdr->bestfree[0];
ASSERT(dfp2 == dfp);
- dfp2 = &d->hdr.bestfree[1];
+ dfp2 = &hdr->bestfree[1];
}
- xfs_dir2_data_freeremove(d, dfp2, needlogp);
- xfs_dir2_data_freeremove(d, dfp, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
/*
* Now insert the new entry.
*/
- dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
- ASSERT(dfp == &d->hdr.bestfree[0]);
+ dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
+ ASSERT(dfp == &hdr->bestfree[0]);
ASSERT(dfp->length == prevdup->length);
ASSERT(!dfp[1].length);
ASSERT(!dfp[2].length);
@@ -621,10 +627,10 @@
* The entry before us is free, merge with it.
*/
else if (prevdup) {
- dfp = xfs_dir2_data_freefind(d, prevdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
be16_add_cpu(&prevdup->length, len);
*xfs_dir2_data_unused_tag_p(prevdup) =
- cpu_to_be16((char *)prevdup - (char *)d);
+ cpu_to_be16((char *)prevdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, prevdup);
/*
* If the previous entry was in the table, the new entry
@@ -632,27 +638,27 @@
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
*/
else {
needscan = be16_to_cpu(prevdup->length) >
- be16_to_cpu(d->hdr.bestfree[2].length);
+ be16_to_cpu(hdr->bestfree[2].length);
}
}
/*
* The following entry is free, merge with it.
*/
else if (postdup) {
- dfp = xfs_dir2_data_freefind(d, postdup);
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+ dfp = xfs_dir2_data_freefind(hdr, postdup);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If the following entry was in the table, the new entry
@@ -660,28 +666,28 @@
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
*/
else {
needscan = be16_to_cpu(newdup->length) >
- be16_to_cpu(d->hdr.bestfree[2].length);
+ be16_to_cpu(hdr->bestfree[2].length);
}
}
/*
* Neither neighbor is free. Make a new entry.
*/
else {
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(len);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
- (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
*needscanp = needscan;
}
@@ -699,7 +705,7 @@
int *needlogp, /* out: need to log header */
int *needscanp) /* out: need regen bestfree */
{
- xfs_dir2_data_t *d; /* data block */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
int matchback; /* matches end of freespace */
int matchfront; /* matches start of freespace */
@@ -708,24 +714,24 @@
xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
int oldlen; /* old unused entry's length */
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ hdr = bp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
- ASSERT(offset >= (char *)dup - (char *)d);
- ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
- ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+ ASSERT(offset >= (char *)dup - (char *)hdr);
+ ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
+ ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
/*
* Look up the entry in the bestfree table.
*/
- dfp = xfs_dir2_data_freefind(d, dup);
+ dfp = xfs_dir2_data_freefind(hdr, dup);
oldlen = be16_to_cpu(dup->length);
- ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
+ ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
/*
* Check for alignment with front and back of the entry.
*/
- matchfront = (char *)dup - (char *)d == offset;
- matchback = (char *)dup + oldlen - (char *)d == offset + len;
+ matchfront = (char *)dup - (char *)hdr == offset;
+ matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
ASSERT(*needscanp == 0);
needscan = 0;
/*
@@ -734,9 +740,9 @@
*/
if (matchfront && matchback) {
if (dfp) {
- needscan = (d->hdr.bestfree[2].offset != 0);
+ needscan = (hdr->bestfree[2].offset != 0);
if (!needscan)
- xfs_dir2_data_freeremove(d, dfp, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
}
}
/*
@@ -744,27 +750,27 @@
* Make a new entry with the remaining freespace.
*/
else if (matchfront) {
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(oldlen - len);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+ ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
* choice for the last slot, or not. Rescan.
*/
- needscan = dfp == &d->hdr.bestfree[2];
+ needscan = dfp == &hdr->bestfree[2];
}
}
/*
@@ -773,25 +779,25 @@
*/
else if (matchback) {
newdup = dup;
- newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+ newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+ ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
* choice for the last slot, or not. Rescan.
*/
- needscan = dfp == &d->hdr.bestfree[2];
+ needscan = dfp == &hdr->bestfree[2];
}
}
/*
@@ -800,15 +806,15 @@
*/
else {
newdup = dup;
- newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+ newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
- newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
*xfs_dir2_data_unused_tag_p(newdup2) =
- cpu_to_be16((char *)newdup2 - (char *)d);
+ cpu_to_be16((char *)newdup2 - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup2);
/*
* If the old entry was in the table, we need to scan
@@ -819,13 +825,12 @@
* the 2 new will work.
*/
if (dfp) {
- needscan = (d->hdr.bestfree[2].length != 0);
+ needscan = (hdr->bestfree[2].length != 0);
if (!needscan) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup,
- needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup2,
- needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup2,
+ needlogp);
}
}
}
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
deleted file mode 100644
index efbc290..0000000
--- a/fs/xfs/xfs_dir2_data.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_DATA_H__
-#define __XFS_DIR2_DATA_H__
-
-/*
- * Directory format 2, data block structures.
- */
-
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Constants.
- */
-#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */
-#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
-#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
-#define XFS_DIR2_DATA_FREE_TAG 0xffff
-#define XFS_DIR2_DATA_FD_COUNT 3
-
-/*
- * Directory address space divided into sections,
- * spaces separated by 32GB.
- */
-#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
-#define XFS_DIR2_DATA_SPACE 0
-#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_DATA_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
-
-/*
- * Offsets of . and .. in data space (always block 0)
- */
-#define XFS_DIR2_DATA_DOT_OFFSET \
- ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
-#define XFS_DIR2_DATA_DOTDOT_OFFSET \
- (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
-#define XFS_DIR2_DATA_FIRST_OFFSET \
- (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
-
-/*
- * Structures.
- */
-
-/*
- * Describe a free area in the data block.
- * The freespace will be formatted as a xfs_dir2_data_unused_t.
- */
-typedef struct xfs_dir2_data_free {
- __be16 offset; /* start of freespace */
- __be16 length; /* length of freespace */
-} xfs_dir2_data_free_t;
-
-/*
- * Header for the data blocks.
- * Always at the beginning of a directory-sized block.
- * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
- */
-typedef struct xfs_dir2_data_hdr {
- __be32 magic; /* XFS_DIR2_DATA_MAGIC */
- /* or XFS_DIR2_BLOCK_MAGIC */
- xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
-} xfs_dir2_data_hdr_t;
-
-/*
- * Active entry in a data block. Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_entry {
- __be64 inumber; /* inode number */
- __u8 namelen; /* name length */
- __u8 name[1]; /* name bytes, no null */
- /* variable offset */
- __be16 tag; /* starting offset of us */
-} xfs_dir2_data_entry_t;
-
-/*
- * Unused entry in a data block. Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_unused {
- __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
- __be16 length; /* total free length */
- /* variable offset */
- __be16 tag; /* starting offset of us */
-} xfs_dir2_data_unused_t;
-
-typedef union {
- xfs_dir2_data_entry_t entry;
- xfs_dir2_data_unused_t unused;
-} xfs_dir2_data_union_t;
-
-/*
- * Generic data block structure, for xfs_db.
- */
-typedef struct xfs_dir2_data {
- xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */
- xfs_dir2_data_union_t u[1];
-} xfs_dir2_data_t;
-
-/*
- * Macros.
- */
-
-/*
- * Size of a data entry.
- */
-static inline int xfs_dir2_data_entsize(int n)
-{
- return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
- (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * Pointer to a freespace's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
-{
- return (__be16 *)((char *)dup +
- be16_to_cpu(dup->length) - sizeof(__be16));
-}
-
-/*
- * Function declarations.
- */
-#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
-#else
-#define xfs_dir2_data_check(dp,bp)
-#endif
-extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
- xfs_dir2_data_unused_t *dup);
-extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
- xfs_dir2_data_unused_t *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
- int *loghead);
-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
- struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_entry_t *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_unused_t *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_aoff_t offset,
- xfs_dir2_data_aoff_t len, int *needlogp,
- int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_unused_t *dup,
- xfs_dir2_data_aoff_t offset,
- xfs_dir2_data_aoff_t len, int *needlogp,
- int *needscanp);
-
-#endif /* __XFS_DIR2_DATA_H__ */
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
new file mode 100644
index 0000000..0727098
--- /dev/null
+++ b/fs/xfs/xfs_dir2_format.h
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_DIR2_FORMAT_H__
+#define __XFS_DIR2_FORMAT_H__
+
+/*
+ * Directory version 2.
+ *
+ * There are 4 possible formats:
+ * - shortform - embedded into the inode
+ * - single block - data with embedded leaf at the end
+ * - multiple data blocks, single leaf+freeindex block
+ * - data blocks, node and leaf blocks (btree), freeindex blocks
+ *
+ * Note: many node blocks structures and constants are shared with the attr
+ * code and defined in xfs_da_btree.h.
+ */
+
+#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */
+#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */
+#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
+
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef __uint16_t xfs_dir2_data_off_t;
+#define NULLDATAOFF 0xffffU
+typedef uint xfs_dir2_data_aoff_t; /* argument form */
+
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
+
+/*
+ * Offset in data space of a data entry.
+ */
+typedef __uint32_t xfs_dir2_dataptr_t;
+#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
+#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
+
+/*
+ * Byte offset in a directory.
+ */
+typedef xfs_off_t xfs_dir2_off_t;
+
+/*
+ * Directory block number (logical dirblk in file)
+ */
+typedef __uint32_t xfs_dir2_db_t;
+
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+
+typedef union {
+ xfs_dir2_ino8_t i8;
+ xfs_dir2_ino4_t i4;
+} xfs_dir2_inou_t;
+#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to fit into the
+ * literal area of the inode. These "shortform" directories consist of a
+ * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
+ * structures. Due the different inode number storage size and the variable
+ * length name field in the xfs_dir2_sf_entry all these structure are
+ * variable length, and the accessors in this file should be used to iterate
+ * over them.
+ */
+typedef struct xfs_dir2_sf_hdr {
+ __uint8_t count; /* count of entries */
+ __uint8_t i8count; /* count of 8-byte inode #s */
+ xfs_dir2_inou_t parent; /* parent dir inode number */
+} __arch_pack xfs_dir2_sf_hdr_t;
+
+typedef struct xfs_dir2_sf_entry {
+ __u8 namelen; /* actual name length */
+ xfs_dir2_sf_off_t offset; /* saved offset */
+ __u8 name[]; /* name, variable size */
+ /*
+ * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
+ * variable offset after the name.
+ */
+} __arch_pack xfs_dir2_sf_entry_t;
+
+static inline int xfs_dir2_sf_hdr_size(int i8count)
+{
+ return sizeof(struct xfs_dir2_sf_hdr) -
+ (i8count == 0) *
+ (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
+}
+
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
+{
+ return get_unaligned_be16(&sfep->offset.i);
+}
+
+static inline void
+xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
+{
+ put_unaligned_be16(off, &sfep->offset.i);
+}
+
+static inline int
+xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
+{
+ return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */
+ len + /* name */
+ (hdr->i8count ? /* ino */
+ sizeof(xfs_dir2_ino8_t) :
+ sizeof(xfs_dir2_ino4_t));
+}
+
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
+{
+ return (struct xfs_dir2_sf_entry *)
+ ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
+}
+
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (struct xfs_dir2_sf_entry *)
+ ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
+}
+
+
+/*
+ * Data block structures.
+ *
+ * A pure data block looks like the following drawing on disk:
+ *
+ * +-------------------------------------------------+
+ * | xfs_dir2_data_hdr_t |
+ * +-------------------------------------------------+
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ * | ... |
+ * +-------------------------------------------------+
+ * | unused space |
+ * +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ *
+ * In addition to the pure data blocks for the data and node formats,
+ * most structures are also used for the combined data/freespace "block"
+ * format below.
+ */
+
+#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
+#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define XFS_DIR2_DATA_FREE_TAG 0xffff
+#define XFS_DIR2_DATA_FD_COUNT 3
+
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32GB.
+ */
+#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define XFS_DIR2_DATA_SPACE 0
+#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_DATA_FIRSTDB(mp) \
+ xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define XFS_DIR2_DATA_DOT_OFFSET \
+ ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
+#define XFS_DIR2_DATA_DOTDOT_OFFSET \
+ (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
+#define XFS_DIR2_DATA_FIRST_OFFSET \
+ (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
+
+/*
+ * Describe a free area in the data block.
+ *
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+ __be16 offset; /* start of freespace */
+ __be16 length; /* length of freespace */
+} xfs_dir2_data_free_t;
+
+/*
+ * Header for the data blocks.
+ *
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+ __be32 magic; /* XFS_DIR2_DATA_MAGIC or */
+ /* XFS_DIR2_BLOCK_MAGIC */
+ xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+
+/*
+ * Active entry in a data block.
+ *
+ * Aligned to 8 bytes. After the variable length name field there is a
+ * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
+ */
+typedef struct xfs_dir2_data_entry {
+ __be64 inumber; /* inode number */
+ __u8 namelen; /* name length */
+ __u8 name[]; /* name bytes, no null */
+ /* __be16 tag; */ /* starting offset of us */
+} xfs_dir2_data_entry_t;
+
+/*
+ * Unused entry in a data block.
+ *
+ * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed
+ * using xfs_dir2_data_unused_tag_p.
+ */
+typedef struct xfs_dir2_data_unused {
+ __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
+ __be16 length; /* total free length */
+ /* variable offset */
+ __be16 tag; /* starting offset of us */
+} xfs_dir2_data_unused_t;
+
+/*
+ * Size of a data entry.
+ */
+static inline int xfs_dir2_data_entsize(int n)
+{
+ return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
+ (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
+{
+ return (__be16 *)((char *)dep +
+ xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+}
+
+/*
+ * Pointer to a freespace's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
+{
+ return (__be16 *)((char *)dup +
+ be16_to_cpu(dup->length) - sizeof(__be16));
+}
+
+/*
+ * Leaf block structures.
+ *
+ * A pure leaf block looks like the following drawing on disk:
+ *
+ * +---------------------------+
+ * | xfs_dir2_leaf_hdr_t |
+ * +---------------------------+
+ * | xfs_dir2_leaf_entry_t |
+ * | xfs_dir2_leaf_entry_t |
+ * | xfs_dir2_leaf_entry_t |
+ * | xfs_dir2_leaf_entry_t |
+ * | ... |
+ * +---------------------------+
+ * | xfs_dir2_data_off_t |
+ * | xfs_dir2_data_off_t |
+ * | xfs_dir2_data_off_t |
+ * | ... |
+ * +---------------------------+
+ * | xfs_dir2_leaf_tail_t |
+ * +---------------------------+
+ *
+ * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
+ * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
+ * for directories with separate leaf nodes and free space blocks
+ * (magic = XFS_DIR2_LEAFN_MAGIC).
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+
+/*
+ * Offset of the leaf/node space. First block in this space
+ * is the btree root.
+ */
+#define XFS_DIR2_LEAF_SPACE 1
+#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_LEAF_FIRSTDB(mp) \
+ xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
+
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+ xfs_da_blkinfo_t info; /* header for da routines */
+ __be16 count; /* count of entries */
+ __be16 stale; /* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+ __be32 hashval; /* hash value of name */
+ __be32 address; /* address of data entry */
+} xfs_dir2_leaf_entry_t;
+
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+ __be32 bestcount;
+} xfs_dir2_leaf_tail_t;
+
+/*
+ * Leaf block.
+ */
+typedef struct xfs_dir2_leaf {
+ xfs_dir2_leaf_hdr_t hdr; /* leaf header */
+ xfs_dir2_leaf_entry_t ents[]; /* entries */
+} xfs_dir2_leaf_t;
+
+/*
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+{
+ return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
+ (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
+{
+ return (struct xfs_dir2_leaf_tail *)
+ ((char *)lp + mp->m_dirblksize -
+ sizeof(struct xfs_dir2_leaf_tail));
+}
+
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+static inline __be16 *
+xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
+{
+ return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
+}
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+ return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr. It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+ return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+ return (xfs_dir2_db_t)
+ (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+ return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+ return (xfs_dir2_data_aoff_t)(by &
+ ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+ return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+ xfs_dir2_data_aoff_t o)
+{
+ return ((xfs_dir2_off_t)db <<
+ (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+ return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+ return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+ xfs_dir2_data_aoff_t o)
+{
+ return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
+{
+ return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
+{
+ return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
+}
+
+/*
+ * Free space block defintions for the node format.
+ */
+
+/*
+ * Offset of the freespace index.
+ */
+#define XFS_DIR2_FREE_SPACE 2
+#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_FREE_FIRSTDB(mp) \
+ xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
+
+typedef struct xfs_dir2_free_hdr {
+ __be32 magic; /* XFS_DIR2_FREE_MAGIC */
+ __be32 firstdb; /* db of first entry */
+ __be32 nvalid; /* count of valid entries */
+ __be32 nused; /* count of used entries */
+} xfs_dir2_free_hdr_t;
+
+typedef struct xfs_dir2_free {
+ xfs_dir2_free_hdr_t hdr; /* block header */
+ __be16 bests[]; /* best free counts */
+ /* unused entries are -1 */
+} xfs_dir2_free_t;
+
+static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
+{
+ return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
+ sizeof(xfs_dir2_data_off_t);
+}
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+ return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+ return db % xfs_dir2_free_max_bests(mp);
+}
+
+/*
+ * Single block format.
+ *
+ * The single block format looks like the following drawing on disk:
+ *
+ * +-------------------------------------------------+
+ * | xfs_dir2_data_hdr_t |
+ * +-------------------------------------------------+
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
+ * | ... |
+ * +-------------------------------------------------+
+ * | unused space |
+ * +-------------------------------------------------+
+ * | ... |
+ * | xfs_dir2_leaf_entry_t |
+ * | xfs_dir2_leaf_entry_t |
+ * +-------------------------------------------------+
+ * | xfs_dir2_block_tail_t |
+ * +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+
+typedef struct xfs_dir2_block_tail {
+ __be32 count; /* count of leaf entries */
+ __be32 stale; /* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
+{
+ return ((struct xfs_dir2_block_tail *)
+ ((char *)hdr + mp->m_dirblksize)) - 1;
+}
+
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
+{
+ return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
+}
+
+#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae89122..ca2386d 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,18 +24,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -64,7 +60,7 @@
{
__be16 *bestsp; /* leaf's bestsp entries */
xfs_dablk_t blkno; /* leaf block's bno */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */
xfs_dir2_block_tail_t *btp; /* block's tail */
xfs_inode_t *dp; /* incore directory inode */
@@ -101,9 +97,9 @@
}
ASSERT(lbp != NULL);
leaf = lbp->data;
- block = dbp->data;
+ hdr = dbp->data;
xfs_dir2_data_check(dp, dbp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Set the counts in the leaf header.
@@ -123,23 +119,23 @@
* tail be free.
*/
xfs_dir2_data_make_free(tp, dbp,
- (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
- (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+ (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+ (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
(char *)blp),
&needlog, &needscan);
/*
* Fix up the block header, make it a data block.
*/
- block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+ hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Set up leaf tail and bests table.
*/
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
ltp->bestcount = cpu_to_be32(1);
bestsp = xfs_dir2_leaf_bests_p(ltp);
- bestsp[0] = block->hdr.bestfree[0].length;
+ bestsp[0] = hdr->bestfree[0].length;
/*
* Log the data header and leaf bests table.
*/
@@ -152,6 +148,131 @@
return 0;
}
+STATIC void
+xfs_dir2_leaf_find_stale(
+ struct xfs_dir2_leaf *leaf,
+ int index,
+ int *lowstale,
+ int *highstale)
+{
+ /*
+ * Find the first stale entry before our index, if any.
+ */
+ for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
+ if (leaf->ents[*lowstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ break;
+ }
+
+ /*
+ * Find the first stale entry at or after our index, if any.
+ * Stop if the result would require moving more entries than using
+ * lowstale.
+ */
+ for (*highstale = index;
+ *highstale < be16_to_cpu(leaf->hdr.count);
+ ++*highstale) {
+ if (leaf->ents[*highstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ break;
+ if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
+ break;
+ }
+}
+
+struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(
+ xfs_dir2_leaf_t *leaf, /* leaf structure */
+ int index, /* leaf table position */
+ int compact, /* need to compact leaves */
+ int lowstale, /* index of prev stale leaf */
+ int highstale, /* index of next stale leaf */
+ int *lfloglow, /* low leaf logging index */
+ int *lfloghigh) /* high leaf logging index */
+{
+ if (!leaf->hdr.stale) {
+ xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
+
+ /*
+ * Now we need to make room to insert the leaf entry.
+ *
+ * If there are no stale entries, just insert a hole at index.
+ */
+ lep = &leaf->ents[index];
+ if (index < be16_to_cpu(leaf->hdr.count))
+ memmove(lep + 1, lep,
+ (be16_to_cpu(leaf->hdr.count) - index) *
+ sizeof(*lep));
+
+ /*
+ * Record low and high logging indices for the leaf.
+ */
+ *lfloglow = index;
+ *lfloghigh = be16_to_cpu(leaf->hdr.count);
+ be16_add_cpu(&leaf->hdr.count, 1);
+ return lep;
+ }
+
+ /*
+ * There are stale entries.
+ *
+ * We will use one of them for the new entry. It's probably not at
+ * the right location, so we'll have to shift some up or down first.
+ *
+ * If we didn't compact before, we need to find the nearest stale
+ * entries before and after our insertion point.
+ */
+ if (compact == 0)
+ xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
+ /*
+ * If the low one is better, use it.
+ */
+ if (lowstale >= 0 &&
+ (highstale == be16_to_cpu(leaf->hdr.count) ||
+ index - lowstale - 1 < highstale - index)) {
+ ASSERT(index - lowstale - 1 >= 0);
+ ASSERT(leaf->ents[lowstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+ /*
+ * Copy entries up to cover the stale entry and make room
+ * for the new entry.
+ */
+ if (index - lowstale - 1 > 0) {
+ memmove(&leaf->ents[lowstale],
+ &leaf->ents[lowstale + 1],
+ (index - lowstale - 1) *
+ sizeof(xfs_dir2_leaf_entry_t));
+ }
+ *lfloglow = MIN(lowstale, *lfloglow);
+ *lfloghigh = MAX(index - 1, *lfloghigh);
+ be16_add_cpu(&leaf->hdr.stale, -1);
+ return &leaf->ents[index - 1];
+ }
+
+ /*
+ * The high one is better, so use that one.
+ */
+ ASSERT(highstale - index >= 0);
+ ASSERT(leaf->ents[highstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+ /*
+ * Copy entries down to cover the stale entry and make room for the
+ * new entry.
+ */
+ if (highstale - index > 0) {
+ memmove(&leaf->ents[index + 1],
+ &leaf->ents[index],
+ (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
+ }
+ *lfloglow = MIN(index, *lfloglow);
+ *lfloghigh = MAX(highstale, *lfloghigh);
+ be16_add_cpu(&leaf->hdr.stale, -1);
+ return &leaf->ents[index];
+}
+
/*
* Add an entry to a leaf form directory.
*/
@@ -161,7 +282,7 @@
{
__be16 *bestsp; /* freespace table in leaf */
int compact; /* need to compact leaves */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
@@ -225,7 +346,7 @@
continue;
i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
ASSERT(i < be32_to_cpu(ltp->bestcount));
- ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+ ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
if (be16_to_cpu(bestsp[i]) >= length) {
use_block = i;
break;
@@ -239,7 +360,8 @@
/*
* Remember a block we see that's missing.
*/
- if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
+ if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
+ use_block == -1)
use_block = i;
else if (be16_to_cpu(bestsp[i]) >= length) {
use_block = i;
@@ -250,14 +372,17 @@
/*
* How many bytes do we need in the leaf block?
*/
- needbytes =
- (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
- (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+ needbytes = 0;
+ if (!leaf->hdr.stale)
+ needbytes += sizeof(xfs_dir2_leaf_entry_t);
+ if (use_block == -1)
+ needbytes += sizeof(xfs_dir2_data_off_t);
+
/*
* Now kill use_block if it refers to a missing block, so we
* can use it as an indication of allocation needed.
*/
- if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
+ if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
use_block = -1;
/*
* If we don't have enough free bytes but we can make enough
@@ -369,8 +494,8 @@
*/
else
xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
- data = dbp->data;
- bestsp[use_block] = data->hdr.bestfree[0].length;
+ hdr = dbp->data;
+ bestsp[use_block] = hdr->bestfree[0].length;
grown = 1;
}
/*
@@ -384,7 +509,7 @@
xfs_da_brelse(tp, lbp);
return error;
}
- data = dbp->data;
+ hdr = dbp->data;
grown = 0;
}
xfs_dir2_data_check(dp, dbp);
@@ -392,14 +517,14 @@
* Point to the biggest freespace in our data block.
*/
dup = (xfs_dir2_data_unused_t *)
- ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+ ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
ASSERT(be16_to_cpu(dup->length) >= length);
needscan = needlog = 0;
/*
* Mark the initial part of our freespace in use for the new entry.
*/
xfs_dir2_data_use_free(tp, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
* Initialize our new entry (at last).
@@ -409,12 +534,12 @@
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)data);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Need to scan fix up the bestfree table.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Need to log the data block's header.
*/
@@ -425,107 +550,15 @@
* If the bests table needs to be changed, do it.
* Log the change unless we've already done that.
*/
- if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
- bestsp[use_block] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
+ bestsp[use_block] = hdr->bestfree[0].length;
if (!grown)
xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
}
- /*
- * Now we need to make room to insert the leaf entry.
- * If there are no stale entries, we just insert a hole at index.
- */
- if (!leaf->hdr.stale) {
- /*
- * lep is still good as the index leaf entry.
- */
- if (index < be16_to_cpu(leaf->hdr.count))
- memmove(lep + 1, lep,
- (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
- /*
- * Record low and high logging indices for the leaf.
- */
- lfloglow = index;
- lfloghigh = be16_to_cpu(leaf->hdr.count);
- be16_add_cpu(&leaf->hdr.count, 1);
- }
- /*
- * There are stale entries.
- * We will use one of them for the new entry.
- * It's probably not at the right location, so we'll have to
- * shift some up or down first.
- */
- else {
- /*
- * If we didn't compact before, we need to find the nearest
- * stale entries before and after our insertion point.
- */
- if (compact == 0) {
- /*
- * Find the first stale entry before the insertion
- * point, if any.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) !=
- XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find the next stale entry at or after the insertion
- * point, if any. Stop if we go so far that the
- * lowstale entry would be better.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) !=
- XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 ||
- index - lowstale - 1 >= highstale - index);
- highstale++)
- continue;
- }
- /*
- * If the low one is better, use it.
- */
- if (lowstale >= 0 &&
- (highstale == be16_to_cpu(leaf->hdr.count) ||
- index - lowstale - 1 < highstale - index)) {
- ASSERT(index - lowstale - 1 >= 0);
- ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- /*
- * Copy entries up to cover the stale entry
- * and make room for the new entry.
- */
- if (index - lowstale - 1 > 0)
- memmove(&leaf->ents[lowstale],
- &leaf->ents[lowstale + 1],
- (index - lowstale - 1) * sizeof(*lep));
- lep = &leaf->ents[index - 1];
- lfloglow = MIN(lowstale, lfloglow);
- lfloghigh = MAX(index - 1, lfloghigh);
- }
- /*
- * The high one is better, so use that one.
- */
- else {
- ASSERT(highstale - index >= 0);
- ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- /*
- * Copy entries down to cover the stale entry
- * and make room for the new entry.
- */
- if (highstale - index > 0)
- memmove(&leaf->ents[index + 1],
- &leaf->ents[index],
- (highstale - index) * sizeof(*lep));
- lep = &leaf->ents[index];
- lfloglow = MIN(index, lfloglow);
- lfloghigh = MAX(highstale, lfloghigh);
- }
- be16_add_cpu(&leaf->hdr.stale, -1);
- }
+
+ lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+ highstale, &lfloglow, &lfloghigh);
+
/*
* Fill in the new leaf entry.
*/
@@ -562,7 +595,7 @@
leaf = bp->data;
mp = dp->i_mount;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
/*
* This value is not restrictive enough.
* Should factor in the size of the bests table as well.
@@ -582,7 +615,7 @@
if (i + 1 < be16_to_cpu(leaf->hdr.count))
ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
be32_to_cpu(leaf->ents[i + 1].hashval));
- if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -611,7 +644,8 @@
* Compress out the stale entries in place.
*/
for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
- if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
/*
* Only actually copy the entries that are different.
@@ -663,24 +697,9 @@
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
index = *indexp;
- /*
- * Find the first stale entry before our index, if any.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find the first stale entry at or after our index, if any.
- * Stop if the answer would be worse than lowstale.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 || index - lowstale > highstale - index);
- highstale++)
- continue;
+
+ xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
/*
* Pick the better of lowstale and highstale.
*/
@@ -701,7 +720,8 @@
if (index == from)
newindex = to;
if (from != keepstale &&
- be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
+ leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
if (from == to)
*lowlogp = to;
continue;
@@ -760,7 +780,7 @@
int byteoff; /* offset in current block */
xfs_dir2_db_t curdb; /* db for current block */
xfs_dir2_off_t curoff; /* current overall offset */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
int error = 0; /* error return value */
@@ -1018,23 +1038,23 @@
else if (curoff > newoff)
ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
curdb);
- data = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
/*
* Find our position in the block.
*/
- ptr = (char *)&data->u;
+ ptr = (char *)(hdr + 1);
byteoff = xfs_dir2_byte_to_off(mp, curoff);
/*
* Skip past the header.
*/
if (byteoff == 0)
- curoff += (uint)sizeof(data->hdr);
+ curoff += (uint)sizeof(*hdr);
/*
* Skip past entries until we reach our offset.
*/
else {
- while ((char *)ptr - (char *)data < byteoff) {
+ while ((char *)ptr - (char *)hdr < byteoff) {
dup = (xfs_dir2_data_unused_t *)ptr;
if (be16_to_cpu(dup->freetag)
@@ -1055,8 +1075,8 @@
curoff =
xfs_dir2_db_off_to_byte(mp,
xfs_dir2_byte_to_db(mp, curoff),
- (char *)ptr - (char *)data);
- if (ptr >= (char *)data + mp->m_dirblksize) {
+ (char *)ptr - (char *)hdr);
+ if (ptr >= (char *)hdr + mp->m_dirblksize) {
continue;
}
}
@@ -1179,7 +1199,7 @@
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
firstb = xfs_dir2_leaf_bests_p(ltp) + first;
lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1202,8 +1222,8 @@
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
- be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+ leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
firstlep = &leaf->ents[first];
lastlep = &leaf->ents[last];
xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1221,8 +1241,8 @@
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
- be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+ leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
(uint)(sizeof(leaf->hdr) - 1));
}
@@ -1241,7 +1261,7 @@
mp = tp->t_mountp;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
(uint)(mp->m_dirblksize - 1));
@@ -1437,7 +1457,7 @@
xfs_da_args_t *args) /* operation arguments */
{
__be16 *bestsp; /* leaf block best freespace */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry structure */
@@ -1467,7 +1487,7 @@
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->data;
- data = dbp->data;
+ hdr = dbp->data;
xfs_dir2_data_check(dp, dbp);
/*
* Point to the leaf entry, use that to point to the data entry.
@@ -1475,9 +1495,9 @@
lep = &leaf->ents[index];
db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
dep = (xfs_dir2_data_entry_t *)
- ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
needscan = needlog = 0;
- oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
+ oldbest = be16_to_cpu(hdr->bestfree[0].length);
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1485,7 +1505,7 @@
* Mark the former data entry unused.
*/
xfs_dir2_data_make_free(tp, dbp,
- (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+ (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
/*
* We just mark the leaf entry stale by putting a null in it.
@@ -1499,23 +1519,23 @@
* log the data block header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
/*
* If the longest freespace in the data block has changed,
* put the new value in the bests table and log that.
*/
- if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
- bestsp[db] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
+ bestsp[db] = hdr->bestfree[0].length;
xfs_dir2_leaf_log_bests(tp, lbp, db, db);
}
xfs_dir2_data_check(dp, dbp);
/*
* If the data block is now empty then get rid of the data block.
*/
- if (be16_to_cpu(data->hdr.bestfree[0].length) ==
- mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+ if (be16_to_cpu(hdr->bestfree[0].length) ==
+ mp->m_dirblksize - (uint)sizeof(*hdr)) {
ASSERT(db != mp->m_dirdatablk);
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
/*
@@ -1542,7 +1562,7 @@
* Look for the last active entry (i).
*/
for (i = db - 1; i > 0; i--) {
- if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
+ if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
break;
}
/*
@@ -1686,9 +1706,6 @@
xfs_dir2_db_t db) /* data block number */
{
__be16 *bestsp; /* leaf bests table */
-#ifdef DEBUG
- xfs_dir2_data_t *data; /* data block structure */
-#endif
xfs_dabuf_t *dbp; /* data block buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
@@ -1707,20 +1724,21 @@
XFS_DATA_FORK))) {
return error;
}
-#ifdef DEBUG
- data = dbp->data;
- ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
-#endif
- /* this seems to be an error
- * data is only valid if DEBUG is defined?
- * RMC 09/08/1999
- */
leaf = lbp->data;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
- ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
- mp->m_dirblksize - (uint)sizeof(data->hdr));
+
+#ifdef DEBUG
+{
+ struct xfs_dir2_data_hdr *hdr = dbp->data;
+
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+ ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
+ mp->m_dirblksize - (uint)sizeof(*hdr));
ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
+}
+#endif
+
/*
* Get rid of the data block.
*/
@@ -1740,6 +1758,20 @@
return 0;
}
+static inline size_t
+xfs_dir2_leaf_size(
+ struct xfs_dir2_leaf_hdr *hdr,
+ int counts)
+{
+ int entries;
+
+ entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
+ return sizeof(xfs_dir2_leaf_hdr_t) +
+ entries * sizeof(xfs_dir2_leaf_entry_t) +
+ counts * sizeof(xfs_dir2_data_off_t) +
+ sizeof(xfs_dir2_leaf_tail_t);
+}
+
/*
* Convert node form directory to leaf form directory.
* The root of the node form dir needs to already be a LEAFN block.
@@ -1810,7 +1842,7 @@
return 0;
lbp = state->path.blk[0].bp;
leaf = lbp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Read the freespace block.
*/
@@ -1819,20 +1851,19 @@
return error;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(!free->hdr.firstdb);
+
/*
* Now see if the leafn and free data will fit in a leaf1.
* If not, release the buffer and give up.
*/
- if ((uint)sizeof(leaf->hdr) +
- (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
- be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
- (uint)sizeof(leaf->tail) >
- mp->m_dirblksize) {
+ if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
+ mp->m_dirblksize) {
xfs_da_brelse(tp, fbp);
return 0;
}
+
/*
* If the leaf has any stale entries in it, compress them out.
* The compact routine will log the header.
@@ -1851,7 +1882,7 @@
* Set up the leaf bests table.
*/
memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
- be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+ be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
xfs_dir2_leaf_log_tail(tp, lbp);
xfs_dir2_leaf_check(dp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
deleted file mode 100644
index 6c9539f..0000000
--- a/fs/xfs/xfs_dir2_leaf.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_LEAF_H__
-#define __XFS_DIR2_LEAF_H__
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Offset of the leaf/node space. First block in this space
- * is the btree root.
- */
-#define XFS_DIR2_LEAF_SPACE 1
-#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_LEAF_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
-
-/*
- * Offset in data space of a data entry.
- */
-typedef __uint32_t xfs_dir2_dataptr_t;
-#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
-#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
-
-/*
- * Leaf block header.
- */
-typedef struct xfs_dir2_leaf_hdr {
- xfs_da_blkinfo_t info; /* header for da routines */
- __be16 count; /* count of entries */
- __be16 stale; /* count of stale entries */
-} xfs_dir2_leaf_hdr_t;
-
-/*
- * Leaf block entry.
- */
-typedef struct xfs_dir2_leaf_entry {
- __be32 hashval; /* hash value of name */
- __be32 address; /* address of data entry */
-} xfs_dir2_leaf_entry_t;
-
-/*
- * Leaf block tail.
- */
-typedef struct xfs_dir2_leaf_tail {
- __be32 bestcount;
-} xfs_dir2_leaf_tail_t;
-
-/*
- * Leaf block.
- * bests and tail are at the end of the block for single-leaf only
- * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
- */
-typedef struct xfs_dir2_leaf {
- xfs_dir2_leaf_hdr_t hdr; /* leaf header */
- xfs_dir2_leaf_entry_t ents[1]; /* entries */
- /* ... */
- xfs_dir2_data_off_t bests[1]; /* best free counts */
- xfs_dir2_leaf_tail_t tail; /* leaf tail */
-} xfs_dir2_leaf_t;
-
-/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
- return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
- (uint)sizeof(xfs_dir2_leaf_entry_t));
-}
-
-/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
-{
- return (xfs_dir2_leaf_tail_t *)
- ((char *)(lp) + (mp)->m_dirblksize -
- (uint)sizeof(xfs_dir2_leaf_tail_t));
-}
-
-/*
- * Get address of the bests array in the single-leaf block.
- */
-static inline __be16 *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
-{
- return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
-}
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr. It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_db_t)((by) >> \
- ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_data_aoff_t)((by) & \
- ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return ((xfs_dir2_off_t)(db) << \
- ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
- struct xfs_dabuf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
- int *lowstalep, int *highstalep,
- int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
- size_t bufsize, xfs_off_t *offset,
- filldir_t filldir);
-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
- struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
- int first, int last);
-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, xfs_dir2_db_t db);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-
-#endif /* __XFS_DIR2_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index a0aab7d..084b324 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -23,18 +23,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -73,7 +69,7 @@
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
xfs_da_log_buf(tp, bp,
(uint)((char *)&free->bests[first] - (char *)free),
(uint)((char *)&free->bests[last] - (char *)free +
@@ -91,7 +87,7 @@
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
}
@@ -244,89 +240,13 @@
lfloglow = be16_to_cpu(leaf->hdr.count);
lfloghigh = -1;
}
- /*
- * No stale entries, just insert a space for the new entry.
- */
- if (!leaf->hdr.stale) {
- lep = &leaf->ents[index];
- if (index < be16_to_cpu(leaf->hdr.count))
- memmove(lep + 1, lep,
- (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
- lfloglow = index;
- lfloghigh = be16_to_cpu(leaf->hdr.count);
- be16_add_cpu(&leaf->hdr.count, 1);
- }
- /*
- * There are stale entries. We'll use one for the new entry.
- */
- else {
- /*
- * If we didn't do a compact then we need to figure out
- * which stale entry will be used.
- */
- if (compact == 0) {
- /*
- * Find first stale entry before our insertion point.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) !=
- XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find next stale entry after insertion point.
- * Stop looking if the answer would be worse than
- * lowstale already found.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) !=
- XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 ||
- index - lowstale - 1 >= highstale - index);
- highstale++)
- continue;
- }
- /*
- * Using the low stale entry.
- * Shift entries up toward the stale slot.
- */
- if (lowstale >= 0 &&
- (highstale == be16_to_cpu(leaf->hdr.count) ||
- index - lowstale - 1 < highstale - index)) {
- ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- ASSERT(index - lowstale - 1 >= 0);
- if (index - lowstale - 1 > 0)
- memmove(&leaf->ents[lowstale],
- &leaf->ents[lowstale + 1],
- (index - lowstale - 1) * sizeof(*lep));
- lep = &leaf->ents[index - 1];
- lfloglow = MIN(lowstale, lfloglow);
- lfloghigh = MAX(index - 1, lfloghigh);
- }
- /*
- * Using the high stale entry.
- * Shift entries down toward the stale slot.
- */
- else {
- ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- ASSERT(highstale - index >= 0);
- if (highstale - index > 0)
- memmove(&leaf->ents[index + 1],
- &leaf->ents[index],
- (highstale - index) * sizeof(*lep));
- lep = &leaf->ents[index];
- lfloglow = MIN(index, lfloglow);
- lfloghigh = MAX(highstale, lfloghigh);
- }
- be16_add_cpu(&leaf->hdr.stale, -1);
- }
+
/*
* Insert the new entry, log everything.
*/
+ lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+ highstale, &lfloglow, &lfloghigh);
+
lep->hashval = cpu_to_be32(args->hashval);
lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
args->blkno, args->index));
@@ -352,14 +272,14 @@
leaf = bp->data;
mp = dp->i_mount;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
be32_to_cpu(leaf->ents[i + 1].hashval));
}
- if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -378,7 +298,7 @@
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
if (!leaf->hdr.count)
@@ -417,7 +337,7 @@
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
#endif
@@ -434,7 +354,7 @@
curbp = state->extrablk.bp;
curfdb = state->extrablk.blkno;
free = curbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
length = xfs_dir2_data_entsize(args->namelen);
/*
@@ -488,7 +408,7 @@
ASSERT(be32_to_cpu(free->hdr.magic) ==
XFS_DIR2_FREE_MAGIC);
ASSERT((be32_to_cpu(free->hdr.firstdb) %
- XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+ xfs_dir2_free_max_bests(mp)) == 0);
ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
be32_to_cpu(free->hdr.nvalid));
@@ -500,7 +420,8 @@
/*
* If it has room, return it.
*/
- if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+ if (unlikely(free->bests[fi] ==
+ cpu_to_be16(NULLDATAOFF))) {
XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
@@ -561,7 +482,7 @@
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
#endif
@@ -742,7 +663,8 @@
int i; /* temp leaf index */
for (i = start_s, stale = 0; i < start_s + count; i++) {
- if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf_s->ents[i].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
} else
@@ -789,8 +711,8 @@
leaf1 = leaf1_bp->data;
leaf2 = leaf2_bp->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (be16_to_cpu(leaf1->hdr.count) > 0 &&
be16_to_cpu(leaf2->hdr.count) > 0 &&
(be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -918,7 +840,7 @@
xfs_da_state_blk_t *dblk, /* data block */
int *rval) /* resulting block needs join */
{
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
@@ -938,7 +860,7 @@
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Point to the entry we're removing.
*/
@@ -963,9 +885,9 @@
* in the data block in case it changes.
*/
dbp = dblk->bp;
- data = dbp->data;
- dep = (xfs_dir2_data_entry_t *)((char *)data + off);
- longest = be16_to_cpu(data->hdr.bestfree[0].length);
+ hdr = dbp->data;
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
+ longest = be16_to_cpu(hdr->bestfree[0].length);
needlog = needscan = 0;
xfs_dir2_data_make_free(tp, dbp, off,
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -974,7 +896,7 @@
* Log the data block header if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
xfs_dir2_data_check(dp, dbp);
@@ -982,7 +904,7 @@
* If the longest data block freespace changes, need to update
* the corresponding freeblock entry.
*/
- if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
+ if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
int error; /* error return value */
xfs_dabuf_t *fbp; /* freeblock buffer */
xfs_dir2_db_t fdb; /* freeblock block number */
@@ -1000,27 +922,27 @@
return error;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(be32_to_cpu(free->hdr.firstdb) ==
- XFS_DIR2_MAX_FREE_BESTS(mp) *
+ xfs_dir2_free_max_bests(mp) *
(fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
/*
* Calculate which entry we need to fix.
*/
findex = xfs_dir2_db_to_fdindex(mp, db);
- longest = be16_to_cpu(data->hdr.bestfree[0].length);
+ longest = be16_to_cpu(hdr->bestfree[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
- if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+ if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
/*
* Try to punch out the data block.
*/
error = xfs_dir2_shrink_inode(args, db, dbp);
if (error == 0) {
dblk->bp = NULL;
- data = NULL;
+ hdr = NULL;
}
/*
* We can get ENOSPC if there's no space reservation.
@@ -1036,7 +958,7 @@
* If we got rid of the data block, we can eliminate that entry
* in the free block.
*/
- if (data == NULL) {
+ if (hdr == NULL) {
/*
* One less used entry in the free table.
*/
@@ -1052,7 +974,8 @@
int i; /* free entry index */
for (i = findex - 1;
- i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
+ i >= 0 &&
+ free->bests[i] == cpu_to_be16(NULLDATAOFF);
i--)
continue;
free->hdr.nvalid = cpu_to_be32(i + 1);
@@ -1209,7 +1132,7 @@
*/
blk = &state->path.blk[state->path.active - 1];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1268,7 +1191,7 @@
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = state->blocksize - (state->blocksize >> 2);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes -= count * (uint)sizeof(leaf->ents[0]);
/*
@@ -1327,8 +1250,8 @@
ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
drop_leaf = drop_blk->bp->data;
save_leaf = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
- ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+ ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* If there are any stale leaf entries, take this opportunity
* to purge them.
@@ -1432,7 +1355,7 @@
xfs_da_args_t *args, /* operation arguments */
xfs_da_state_blk_t *fblk) /* optional freespace block */
{
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t dbno; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
@@ -1469,7 +1392,7 @@
*/
ifbno = fblk->blkno;
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = fblk->index;
/*
* This means the free entry showed that the data block had
@@ -1553,7 +1476,7 @@
continue;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = 0;
}
/*
@@ -1680,12 +1603,12 @@
free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
free->hdr.firstdb = cpu_to_be32(
(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
- XFS_DIR2_MAX_FREE_BESTS(mp));
+ xfs_dir2_free_max_bests(mp));
free->hdr.nvalid = 0;
free->hdr.nused = 0;
} else {
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
/*
@@ -1697,7 +1620,7 @@
* freespace block, extend that table.
*/
if (findex >= be32_to_cpu(free->hdr.nvalid)) {
- ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+ ASSERT(findex < xfs_dir2_free_max_bests(mp));
free->hdr.nvalid = cpu_to_be32(findex + 1);
/*
* Tag new entry so nused will go up.
@@ -1708,7 +1631,7 @@
* If this entry was for an empty data block
* (this should always be true) then update the header.
*/
- if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+ if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
be32_add_cpu(&free->hdr.nused, 1);
xfs_dir2_free_log_header(tp, fbp);
}
@@ -1717,8 +1640,8 @@
* We haven't allocated the data entry yet so this will
* change again.
*/
- data = dbp->data;
- free->bests[findex] = data->hdr.bestfree[0].length;
+ hdr = dbp->data;
+ free->bests[findex] = hdr->bestfree[0].length;
logfree = 1;
}
/*
@@ -1743,21 +1666,21 @@
xfs_da_buf_done(fbp);
return error;
}
- data = dbp->data;
+ hdr = dbp->data;
logfree = 0;
}
- ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
+ ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
/*
* Point to the existing unused space.
*/
dup = (xfs_dir2_data_unused_t *)
- ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+ ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
needscan = needlog = 0;
/*
* Mark the first part of the unused space, inuse for us.
*/
xfs_dir2_data_use_free(tp, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
* Fill in the new entry and log it.
@@ -1767,13 +1690,13 @@
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)data);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, dbp, dep);
/*
* Rescan the block for bestfree if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Log the data block header if needed.
*/
@@ -1782,8 +1705,8 @@
/*
* If the freespace entry is now wrong, update it.
*/
- if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
- free->bests[findex] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
+ free->bests[findex] = hdr->bestfree[0].length;
logfree = 1;
}
/*
@@ -1933,7 +1856,7 @@
xfs_da_args_t *args) /* operation arguments */
{
xfs_da_state_blk_t *blk; /* leaf block */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry changed */
int error; /* error return value */
int i; /* btree level */
@@ -1977,10 +1900,10 @@
/*
* Point to the data entry.
*/
- data = state->extrablk.bp->data;
- ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+ hdr = state->extrablk.bp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
dep = (xfs_dir2_data_entry_t *)
- ((char *)data +
+ ((char *)hdr +
xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
ASSERT(inum != be64_to_cpu(dep->inumber));
/*
@@ -2044,7 +1967,7 @@
return 0;
}
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
/*
* If there are used entries, there's nothing to do.
*/
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
deleted file mode 100644
index 82dfe71..0000000
--- a/fs/xfs/xfs_dir2_node.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_NODE_H__
-#define __XFS_DIR2_NODE_H__
-
-/*
- * Directory version 2, btree node format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_da_state;
-struct xfs_da_state_blk;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Offset of the freespace index.
- */
-#define XFS_DIR2_FREE_SPACE 2
-#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_FREE_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
-
-#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */
-
-typedef struct xfs_dir2_free_hdr {
- __be32 magic; /* XFS_DIR2_FREE_MAGIC */
- __be32 firstdb; /* db of first entry */
- __be32 nvalid; /* count of valid entries */
- __be32 nused; /* count of used entries */
-} xfs_dir2_free_hdr_t;
-
-typedef struct xfs_dir2_free {
- xfs_dir2_free_hdr_t hdr; /* block header */
- __be16 bests[1]; /* best free counts */
- /* unused entries are -1 */
-} xfs_dir2_free_t;
-
-#define XFS_DIR2_MAX_FREE_BESTS(mp) \
- (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
- (uint)sizeof(xfs_dir2_data_off_t))
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static inline xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static inline int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
- struct xfs_da_args *args, int *indexp,
- struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
- struct xfs_dabuf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
- struct xfs_da_state_blk *oldblk,
- struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
- struct xfs_da_state_blk *drop_blk,
- struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
- int *rvalp);
-
-#endif /* __XFS_DIR2_NODE_H__ */
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
new file mode 100644
index 0000000..067f403
--- /dev/null
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_DIR2_PRIV_H__
+#define __XFS_DIR2_PRIV_H__
+
+/* xfs_dir2.c */
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+ xfs_dir2_db_t *dbp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+ struct xfs_dabuf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
+ const unsigned char *name, int len);
+
+/* xfs_dir2_block.c */
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
+ xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+ struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+
+/* xfs_dir2_data.c */
+#ifdef DEBUG
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+#else
+#define xfs_dir2_data_check(dp,bp)
+#endif
+extern struct xfs_dir2_data_free *
+xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
+ struct xfs_dir2_data_unused *dup, int *loghead);
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr, int *loghead);
+extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+ struct xfs_dabuf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+ struct xfs_dabuf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
+ int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+ xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
+
+/* xfs_dir2_leaf.c */
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+ struct xfs_dabuf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
+ struct xfs_dabuf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+ int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
+extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
+ size_t bufsize, xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+ struct xfs_dabuf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ int first, int last);
+extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
+ struct xfs_dabuf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+ struct xfs_dabuf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+ struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+extern struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
+ int lowstale, int highstale,
+ int *lfloglow, int *lfloghigh);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+/* xfs_dir2_node.c */
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+ struct xfs_dabuf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+ struct xfs_da_args *args, int *indexp,
+ struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+ struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+ struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+ struct xfs_da_state_blk *drop_blk,
+ struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+ int *rvalp);
+
+/* xfs_dir2_sf.c */
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
+ struct xfs_dir2_sf_entry *sfep);
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+ struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+ int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
+ xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index b1bae6b..79d05e8 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -23,18 +23,16 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_error.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
/*
@@ -60,6 +58,82 @@
#endif /* XFS_BIG_INUMS */
/*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide. These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *from)
+{
+ if (hdr->i8count)
+ return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+ else
+ return get_unaligned_be32(&from->i4.i);
+}
+
+static void
+xfs_dir2_sf_put_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *to,
+ xfs_ino_t ino)
+{
+ ASSERT((ino & 0xff00000000000000ULL) == 0);
+
+ if (hdr->i8count)
+ put_unaligned_be64(ino, &to->i8.i);
+ else
+ put_unaligned_be32(ino, &to->i4.i);
+}
+
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr)
+{
+ return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+
+static void
+xfs_dir2_sf_put_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name. The inode numbers may only be accessed
+ * through the helpers below.
+ */
+static xfs_dir2_inou_t *
+xfs_dir2_sfe_inop(
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
+}
+
+xfs_ino_t
+xfs_dir2_sfe_get_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
+}
+
+static void
+xfs_dir2_sfe_put_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
+}
+
+/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
* directory and a header for the sf directory, if it will fit it the
* space currently present in the inode. If it won't fit, the output
@@ -68,7 +142,7 @@
int /* size for sf form */
xfs_dir2_block_sfsize(
xfs_inode_t *dp, /* incore inode pointer */
- xfs_dir2_block_t *block, /* block directory data */
+ xfs_dir2_data_hdr_t *hdr, /* block directory data */
xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */
{
xfs_dir2_dataptr_t addr; /* data entry address */
@@ -88,7 +162,7 @@
mp = dp->i_mount;
count = i8count = namelen = 0;
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -101,7 +175,7 @@
* Calculate the pointer to the entry at hand.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
/*
* Detect . and .., so we can special-case them.
* . is not included in sf directories.
@@ -138,7 +212,7 @@
*/
sfhp->count = count;
sfhp->i8count = i8count;
- xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+ xfs_dir2_sf_put_parent_ino(sfhp, parent);
return size;
}
@@ -153,7 +227,7 @@
int size, /* shortform directory size */
xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
@@ -164,8 +238,7 @@
xfs_mount_t *mp; /* filesystem mount point */
char *ptr; /* current data pointer */
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
- xfs_ino_t temp;
+ xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */
trace_xfs_dir2_block_to_sf(args);
@@ -176,13 +249,14 @@
* Make a copy of the block data, so we can shrink the inode
* and add local data.
*/
- block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
- memcpy(block, bp->data, mp->m_dirblksize);
+ hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+ memcpy(hdr, bp->data, mp->m_dirblksize);
logflags = XFS_ILOG_CORE;
if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
ASSERT(error != ENOSPC);
goto out;
}
+
/*
* The buffer is now unconditionally gone, whether
* xfs_dir2_shrink_inode worked or not.
@@ -198,14 +272,14 @@
/*
* Copy the header into the newly allocate local space.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
dp->i_d.di_size = size;
/*
* Set up to loop over the block's entries.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- ptr = (char *)block->u;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ ptr = (char *)(hdr + 1);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
sfep = xfs_dir2_sf_firstentry(sfp);
/*
@@ -233,7 +307,7 @@
else if (dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.')
ASSERT(be64_to_cpu(dep->inumber) ==
- xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+ xfs_dir2_sf_get_parent_ino(sfp));
/*
* Normal entry, copy it into shortform.
*/
@@ -241,11 +315,11 @@
sfep->namelen = dep->namelen;
xfs_dir2_sf_put_offset(sfep,
(xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)block));
+ ((char *)dep - (char *)hdr));
memcpy(sfep->name, dep->name, dep->namelen);
- temp = be64_to_cpu(dep->inumber);
- xfs_dir2_sf_put_inumber(sfp, &temp,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ be64_to_cpu(dep->inumber));
+
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -254,7 +328,7 @@
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(block);
+ kmem_free(hdr);
return error;
}
@@ -277,7 +351,7 @@
xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
int old_isize; /* di_size before adding name */
int pick; /* which algorithm to use */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
trace_xfs_dir2_sf_addname(args);
@@ -294,19 +368,19 @@
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Compute entry (and change in) size.
*/
- add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+ add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
incr_isize = add_entsize;
objchange = 0;
#if XFS_BIG_INUMS
/*
* Do we have to change to 8 byte inodes?
*/
- if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
/*
* Yes, adjust the entry size and the total size.
*/
@@ -314,7 +388,7 @@
(uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t);
incr_isize +=
- (sfp->hdr.count + 2) *
+ (sfp->count + 2) *
((uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t));
objchange = 1;
@@ -384,21 +458,21 @@
{
int byteoff; /* byte offset in sf dir */
xfs_inode_t *dp; /* incore directory inode */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
byteoff = (int)((char *)sfep - (char *)sfp);
/*
* Grow the in-inode space.
*/
- xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
+ xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
XFS_DATA_FORK);
/*
* Need to set up again due to realloc of the inode data.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
/*
* Fill in the new entry.
@@ -406,15 +480,14 @@
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
/*
* Update the header and inode.
*/
- sfp->hdr.count++;
+ sfp->count++;
#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
- sfp->hdr.i8count++;
+ sfp->i8count++;
#endif
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
@@ -444,19 +517,19 @@
xfs_dir2_data_aoff_t offset; /* current offset value */
int old_isize; /* previous di_size */
xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */
- xfs_dir2_sf_t *oldsfp; /* original shortform dir */
+ xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */
xfs_dir2_sf_entry_t *sfep; /* entry in new dir */
- xfs_dir2_sf_t *sfp; /* new shortform dir */
+ xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */
/*
* Copy the old directory to the stack buffer.
*/
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
old_isize = (int)dp->i_d.di_size;
buf = kmem_alloc(old_isize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)buf;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
memcpy(oldsfp, sfp, old_isize);
/*
* Loop over the old directory finding the place we're going
@@ -485,7 +558,7 @@
/*
* Reset the pointer since the buffer was reallocated.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Copy the first part of the directory, including the header.
*/
@@ -498,12 +571,11 @@
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
- sfp->hdr.count++;
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+ sfp->count++;
#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
- sfp->hdr.i8count++;
+ sfp->i8count++;
#endif
/*
* If there's more left to copy, do that.
@@ -537,14 +609,14 @@
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_data_aoff_t offset; /* data block offset */
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
int size; /* entry's data size */
int used; /* data bytes used */
dp = args->dp;
mp = dp->i_mount;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
size = xfs_dir2_data_entsize(args->namelen);
offset = XFS_DIR2_DATA_FIRST_OFFSET;
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -554,7 +626,7 @@
* Keep track of data offset and whether we've seen a place
* to insert the new entry.
*/
- for (i = 0; i < sfp->hdr.count; i++) {
+ for (i = 0; i < sfp->count; i++) {
if (!holefit)
holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
offset = xfs_dir2_sf_get_offset(sfep) +
@@ -566,7 +638,7 @@
* was a data block (block form directory).
*/
used = offset +
- (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
(uint)sizeof(xfs_dir2_block_tail_t);
/*
* If it won't fit in a block form then we can't insert it,
@@ -612,30 +684,30 @@
xfs_ino_t ino; /* entry inode number */
int offset; /* data offset */
xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
offset = XFS_DIR2_DATA_FIRST_OFFSET;
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
- ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
offset =
xfs_dir2_sf_get_offset(sfep) +
xfs_dir2_data_entsize(sfep->namelen);
}
- ASSERT(i8count == sfp->hdr.i8count);
+ ASSERT(i8count == sfp->i8count);
ASSERT(XFS_BIG_INUMS || i8count == 0);
ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
ASSERT(offset +
- (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
(uint)sizeof(xfs_dir2_block_tail_t) <=
dp->i_mount->m_dirblksize);
}
@@ -651,7 +723,7 @@
{
xfs_inode_t *dp; /* incore directory inode */
int i8count; /* parent inode is an 8-byte number */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
int size; /* directory size */
trace_xfs_dir2_sf_create(args);
@@ -681,13 +753,13 @@
/*
* Fill in the header,
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- sfp->hdr.i8count = i8count;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ sfp->i8count = i8count;
/*
* Now can put in the inode number, since i8count is set.
*/
- xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
- sfp->hdr.count = 0;
+ xfs_dir2_sf_put_parent_ino(sfp, pino);
+ sfp->count = 0;
dp->i_d.di_size = size;
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -705,7 +777,7 @@
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_dataptr_t off; /* current entry's offset */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_dataptr_t dot_offset;
xfs_dir2_dataptr_t dotdot_offset;
xfs_ino_t ino;
@@ -724,9 +796,9 @@
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* If the block number in the offset is out of range, we're done.
@@ -759,7 +831,7 @@
* Put .. entry unless we're starting past it.
*/
if (*offset <= dotdot_offset) {
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
*offset = dotdot_offset & 0x7fffffff;
return 0;
@@ -770,7 +842,7 @@
* Loop while there are more entries and put'ing works.
*/
sfep = xfs_dir2_sf_firstentry(sfp);
- for (i = 0; i < sfp->hdr.count; i++) {
+ for (i = 0; i < sfp->count; i++) {
off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
xfs_dir2_sf_get_offset(sfep));
@@ -779,7 +851,7 @@
continue;
}
- ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
if (filldir(dirent, (char *)sfep->name, sfep->namelen,
off & 0x7fffffff, ino, DT_UNKNOWN)) {
*offset = off & 0x7fffffff;
@@ -805,7 +877,7 @@
int i; /* entry index */
int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
enum xfs_dacmp cmp; /* comparison result */
xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
@@ -824,8 +896,8 @@
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Special case for .
*/
@@ -839,7 +911,7 @@
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
- args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
@@ -847,7 +919,7 @@
* Loop over all the entries trying to match ours.
*/
ci_sfep = NULL;
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
/*
* Compare name and if it's an exact match, return the inode
@@ -858,8 +930,7 @@
sfep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
- args->inumber = xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
+ args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
if (cmp == XFS_CMP_EXACT)
return XFS_ERROR(EEXIST);
ci_sfep = sfep;
@@ -891,7 +962,7 @@
int newsize; /* new inode size */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
trace_xfs_dir2_sf_removename(args);
@@ -908,32 +979,31 @@
}
ASSERT(dp->i_df.if_bytes == oldsize);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Loop over the old directory entries.
* Find the one we're deleting.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
- ASSERT(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)) ==
- args->inumber);
+ ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
+ args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count)
+ if (i == sfp->count)
return XFS_ERROR(ENOENT);
/*
* Calculate sizes.
*/
byteoff = (int)((char *)sfep - (char *)sfp);
- entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+ entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
newsize = oldsize - entsize;
/*
* Copy the part if any after the removed entry, sliding it down.
@@ -944,22 +1014,22 @@
/*
* Fix up the header and file size.
*/
- sfp->hdr.count--;
+ sfp->count--;
dp->i_d.di_size = newsize;
/*
* Reallocate, making it smaller.
*/
xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
#if XFS_BIG_INUMS
/*
* Are we changing inode number size?
*/
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
- if (sfp->hdr.i8count == 1)
+ if (sfp->i8count == 1)
xfs_dir2_sf_toino4(args);
else
- sfp->hdr.i8count--;
+ sfp->i8count--;
}
#endif
xfs_dir2_sf_check(args);
@@ -983,7 +1053,7 @@
int i8elevated; /* sf_toino8 set i8count=1 */
#endif
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
trace_xfs_dir2_sf_replace(args);
@@ -999,19 +1069,19 @@
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
#if XFS_BIG_INUMS
/*
* New inode number is large, and need to convert to 8-byte inodes.
*/
- if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
int error; /* error return value */
int newsize; /* new inode size */
newsize =
dp->i_df.if_bytes +
- (sfp->hdr.count + 1) *
+ (sfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t));
/*
@@ -1029,7 +1099,7 @@
*/
xfs_dir2_sf_toino8(args);
i8elevated = 1;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
} else
i8elevated = 0;
#endif
@@ -1040,34 +1110,32 @@
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
#endif
- xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
+ xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
}
/*
* Normal entry, look for the name.
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
ASSERT(args->inumber != ino);
#endif
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count) {
+ if (i == sfp->count) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
#if XFS_BIG_INUMS
if (i8elevated)
@@ -1085,10 +1153,10 @@
/*
* And the old count was one, so need to convert to small.
*/
- if (sfp->hdr.i8count == 1)
+ if (sfp->i8count == 1)
xfs_dir2_sf_toino4(args);
else
- sfp->hdr.i8count--;
+ sfp->i8count--;
}
/*
* See if the old number was small, the new number is large.
@@ -1099,9 +1167,9 @@
* add to the i8count unless we just converted to 8-byte
* inodes (which does an implied i8count = 1)
*/
- ASSERT(sfp->hdr.i8count != 0);
+ ASSERT(sfp->i8count != 0);
if (!i8elevated)
- sfp->hdr.i8count++;
+ sfp->i8count++;
}
#endif
xfs_dir2_sf_check(args);
@@ -1121,13 +1189,12 @@
char *buf; /* old dir's buffer */
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
- xfs_ino_t ino; /* entry inode number */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
- xfs_dir2_sf_t *oldsfp; /* old sf directory */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
- xfs_dir2_sf_t *sfp; /* new sf directory */
+ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
trace_xfs_dir2_sf_toino4(args);
@@ -1140,44 +1207,42 @@
*/
oldsize = dp->i_df.if_bytes;
buf = kmem_alloc(oldsize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsfp->hdr.i8count == 1);
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsfp->i8count == 1);
memcpy(buf, oldsfp, oldsize);
/*
* Compute the new inode size.
*/
newsize =
oldsize -
- (oldsfp->hdr.count + 1) *
+ (oldsfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
/*
* Reset our pointers, the data has moved.
*/
- oldsfp = (xfs_dir2_sf_t *)buf;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Fill in the new header.
*/
- sfp->hdr.count = oldsfp->hdr.count;
- sfp->hdr.i8count = 0;
- ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
- xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+ sfp->count = oldsfp->count;
+ sfp->i8count = 0;
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- ino = xfs_dir2_sf_get_inumber(oldsfp,
- xfs_dir2_sf_inumberp(oldsfep));
- xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
}
/*
* Clean up the inode.
@@ -1199,13 +1264,12 @@
char *buf; /* old dir's buffer */
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
- xfs_ino_t ino; /* entry inode number */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
- xfs_dir2_sf_t *oldsfp; /* old sf directory */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
- xfs_dir2_sf_t *sfp; /* new sf directory */
+ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
trace_xfs_dir2_sf_toino8(args);
@@ -1218,44 +1282,42 @@
*/
oldsize = dp->i_df.if_bytes;
buf = kmem_alloc(oldsize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsfp->hdr.i8count == 0);
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsfp->i8count == 0);
memcpy(buf, oldsfp, oldsize);
/*
* Compute the new inode size.
*/
newsize =
oldsize +
- (oldsfp->hdr.count + 1) *
+ (oldsfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
/*
* Reset our pointers, the data has moved.
*/
- oldsfp = (xfs_dir2_sf_t *)buf;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Fill in the new header.
*/
- sfp->hdr.count = oldsfp->hdr.count;
- sfp->hdr.i8count = 1;
- ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
- xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+ sfp->count = oldsfp->count;
+ sfp->i8count = 1;
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- ino = xfs_dir2_sf_get_inumber(oldsfp,
- xfs_dir2_sf_inumberp(oldsfep));
- xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
}
/*
* Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
deleted file mode 100644
index 6ac44b5..0000000
--- a/fs/xfs/xfs_dir2_sf.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_SF_H__
-#define __XFS_DIR2_SF_H__
-
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to
- * fit into the literal area of the inode.
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
-
-typedef union {
- xfs_dir2_ino8_t i8;
- xfs_dir2_ino4_t i4;
-} xfs_dir2_inou_t;
-#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
-
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-
-/*
- * The parent directory has a dedicated field, and the self-pointer must
- * be calculated on the fly.
- *
- * Entries are packed toward the top as tightly as possible. The header
- * and the elements must be memcpy'd out into a work area to get correct
- * alignment for the inode number fields.
- */
-typedef struct xfs_dir2_sf_hdr {
- __uint8_t count; /* count of entries */
- __uint8_t i8count; /* count of 8-byte inode #s */
- xfs_dir2_inou_t parent; /* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
-
-typedef struct xfs_dir2_sf_entry {
- __uint8_t namelen; /* actual name length */
- xfs_dir2_sf_off_t offset; /* saved offset */
- __uint8_t name[1]; /* name, variable size */
- xfs_dir2_inou_t inumber; /* inode number, var. offset */
-} __arch_pack xfs_dir2_sf_entry_t;
-
-typedef struct xfs_dir2_sf {
- xfs_dir2_sf_hdr_t hdr; /* shortform header */
- xfs_dir2_sf_entry_t list[1]; /* shortform entries */
-} xfs_dir2_sf_t;
-
-static inline int xfs_dir2_sf_hdr_size(int i8count)
-{
- return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
- ((i8count) == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
-{
- return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
-}
-
-static inline xfs_intino_t
-xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
-{
- return ((sfp)->hdr.i8count == 0 ? \
- (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
- (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
-}
-
-static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
- xfs_dir2_inou_t *to)
-{
- if ((sfp)->hdr.i8count == 0)
- XFS_PUT_DIR_INO4(*(from), (to)->i4);
- else
- XFS_PUT_DIR_INO8(*(from), (to)->i8);
-}
-
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
- return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
-}
-
-static inline void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
- INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
-}
-
-static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
-{
- return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
- ((sfp)->hdr.i8count == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline int
-xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
- return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
- ((sfp)->hdr.i8count == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
-{
- return ((xfs_dir2_sf_entry_t *) \
- ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
-}
-
-static inline xfs_dir2_sf_entry_t *
-xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
- return ((xfs_dir2_sf_entry_t *) \
- ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
-}
-
-/*
- * Functions.
- */
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
- struct xfs_dir2_block *block,
- xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
- int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
- xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_SF_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 8f6fc1a..c13fed8 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -249,6 +249,11 @@
#define XFS_MAX_LOG_BYTES \
((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
+/* Used for sanity checks on superblock */
+#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \
+ (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
+
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
*/
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 84ebeec..dd5628b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -683,7 +683,7 @@
return 0;
}
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
} else {
/*
* Continue where we left off before. In this case, we
@@ -691,7 +691,7 @@
*/
agbp = *IO_agbp;
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
}
mp = tp->t_mountp;
@@ -775,7 +775,7 @@
if (error)
goto nextag;
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
}
/*
* Here with an allocation group that has a free inode.
@@ -944,7 +944,7 @@
* See if the most recently allocated block has any free.
*/
newino:
- if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
+ if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
XFS_LOOKUP_EQ, &i);
if (error)
@@ -1085,7 +1085,7 @@
return error;
}
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
ASSERT(agbno < be32_to_cpu(agi->agi_length));
/*
* Initialize the cursor.
@@ -1438,7 +1438,7 @@
xfs_agi_t *agi; /* allocation group header */
agi = XFS_BUF_TO_AGI(bp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif
/*
* Compute byte offsets for the first and last fields.
@@ -1492,7 +1492,7 @@
/*
* Validate the magic number of the agi block.
*/
- agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+ agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
be32_to_cpu(agi->agi_seqno) == agno;
if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 16921f5..c6a7581 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -31,7 +31,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
@@ -205,72 +204,6 @@
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_inobt_trace_buf;
-
-STATIC void
-xfs_inobt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
- (void *)func, (void *)s, NULL, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_inobt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *s0 = cur->bc_private.a.agno;
- *l0 = cur->bc_rec.i.ir_startino;
- *l1 = cur->bc_rec.i.ir_free;
-}
-
-STATIC void
-xfs_inobt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be32_to_cpu(key->inobt.ir_startino);
- *l1 = 0;
-}
-
-STATIC void
-xfs_inobt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- *l0 = be32_to_cpu(rec->inobt.ir_startino);
- *l1 = be32_to_cpu(rec->inobt.ir_freecount);
- *l2 = be64_to_cpu(rec->inobt.ir_free);
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
@@ -286,18 +219,10 @@
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_inobt_trace_enter,
- .trace_cursor = xfs_inobt_trace_cursor,
- .trace_key = xfs_inobt_trace_key,
- .trace_record = xfs_inobt_trace_record,
-#endif
};
/*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783..7759812 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,7 +38,6 @@
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_btree_trace.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20..3cc21dd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
@@ -52,7 +51,7 @@
kmem_zone_t *xfs_inode_zone;
/*
- * Used in xfs_itruncate(). This is the maximum number of extents
+ * Used in xfs_itruncate_extents(). This is the maximum number of extents
* freed from a file in a single transaction.
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
@@ -167,7 +166,7 @@
dip = (xfs_dinode_t *)xfs_buf_offset(bp,
(i << mp->m_sb.sb_inodelog));
- di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+ di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
@@ -802,7 +801,7 @@
* If we got something that isn't an inode it means someone
* (nfs or dmi) has a stale handle.
*/
- if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
#ifdef DEBUG
xfs_alert(mp,
"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1179,15 +1178,15 @@
* at least do it for regular files.
*/
#ifdef DEBUG
-void
+STATIC void
xfs_isize_check(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- xfs_fsize_t isize)
+ struct xfs_inode *ip,
+ xfs_fsize_t isize)
{
- xfs_fileoff_t map_first;
- int nimaps;
- xfs_bmbt_irec_t imaps[2];
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t map_first;
+ int nimaps;
+ xfs_bmbt_irec_t imaps[2];
if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
return;
@@ -1214,168 +1213,14 @@
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
}
+#else /* DEBUG */
+#define xfs_isize_check(ip, isize)
#endif /* DEBUG */
/*
- * Calculate the last possible buffered byte in a file. This must
- * include data that was buffered beyond the EOF by the write code.
- * This also needs to deal with overflowing the xfs_fsize_t type
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF. It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
- xfs_inode_t *ip)
-{
- xfs_mount_t *mp;
- xfs_fsize_t last_byte;
- xfs_fileoff_t last_block;
- xfs_fileoff_t size_last_block;
- int error;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-
- mp = ip->i_mount;
- /*
- * Only check for blocks beyond the EOF if the extents have
- * been read in. This eliminates the need for the inode lock,
- * and it also saves us from looking when it really isn't
- * necessary.
- */
- if (ip->i_df.if_flags & XFS_IFEXTENTS) {
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_bmap_last_offset(NULL, ip, &last_block,
- XFS_DATA_FORK);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (error) {
- last_block = 0;
- }
- } else {
- last_block = 0;
- }
- size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
- last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-
- last_byte = XFS_FSB_TO_B(mp, last_block);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- last_byte += (1 << mp->m_writeio_log);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- return last_byte;
-}
-
-/*
- * Start the truncation of the file to new_size. The new size
- * must be smaller than the current size. This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all. This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation. Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock. If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
- xfs_inode_t *ip,
- uint flags,
- xfs_fsize_t new_size)
-{
- xfs_fsize_t last_byte;
- xfs_off_t toss_start;
- xfs_mount_t *mp;
- int error = 0;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
- (flags == XFS_ITRUNC_MAYBE));
-
- mp = ip->i_mount;
-
- /* wait for the completion of any pending DIOs */
- if (new_size == 0 || new_size < ip->i_size)
- xfs_ioend_wait(ip);
-
- /*
- * Call toss_pages or flushinval_pages to get rid of pages
- * overlapping the region being removed. We have to use
- * the less efficient flushinval_pages in the case that the
- * caller may not be able to finish the truncate without
- * dropping the inode's I/O lock. Make sure
- * to catch any pages brought in by buffers overlapping
- * the EOF by searching out beyond the isize by our
- * block size. We round new_size up to a block boundary
- * so that we don't toss things on the same block as
- * new_size but before it.
- *
- * Before calling toss_page or flushinval_pages, make sure to
- * call remapf() over the same region if the file is mapped.
- * This frees up mapped file references to the pages in the
- * given range and for the flushinval_pages case it ensures
- * that we get the latest mapped changes flushed out.
- */
- toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- toss_start = XFS_FSB_TO_B(mp, toss_start);
- if (toss_start < 0) {
- /*
- * The place to start tossing is beyond our maximum
- * file size, so there is no way that the data extended
- * out there.
- */
- return 0;
- }
- last_byte = xfs_file_last_byte(ip);
- trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
- if (last_byte > toss_start) {
- if (flags & XFS_ITRUNC_DEFINITE) {
- xfs_tosspages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- } else {
- error = xfs_flushinval_pages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- }
- }
-
-#ifdef DEBUG
- if (new_size == 0) {
- ASSERT(VN_CACHED(VFS_I(ip)) == 0);
- }
-#endif
- return error;
-}
-
-/*
- * Shrink the file to the given new_size. The new size must be smaller than
- * the current size. This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
+ * Free up the underlying blocks past new_size. The new size must be smaller
+ * than the current size. This routine can be used both for the attribute and
+ * data fork, and does not modify the inode size, which is left to the caller.
*
* The transaction passed to this routine must have made a permanent log
* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
@@ -1387,31 +1232,6 @@
* will be "held" within the returned transaction. This routine does NOT
* require any disk space to be reserved for it within the transaction.
*
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated. For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous. For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file. Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash. wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter. It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
* If we get an error, we must return with the inode locked and linked into the
* current transaction. This keeps things simple for the higher level code,
* because it always knows that the inode is locked and held in the transaction
@@ -1419,124 +1239,30 @@
* dirty on error so that transactions can be easily aborted if possible.
*/
int
-xfs_itruncate_finish(
- xfs_trans_t **tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int fork,
- int sync)
+xfs_itruncate_extents(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_fsize_t new_size)
{
- xfs_fsblock_t first_block;
- xfs_fileoff_t first_unmap_block;
- xfs_fileoff_t last_block;
- xfs_filblks_t unmap_len=0;
- xfs_mount_t *mp;
- xfs_trans_t *ntp;
- int done;
- int committed;
- xfs_bmap_free_t free_list;
- int error;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp = *tpp;
+ struct xfs_trans *ntp;
+ xfs_bmap_free_t free_list;
+ xfs_fsblock_t first_block;
+ xfs_fileoff_t first_unmap_block;
+ xfs_fileoff_t last_block;
+ xfs_filblks_t unmap_len;
+ int committed;
+ int error = 0;
+ int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT(*tp != NULL);
- ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- ASSERT(ip->i_transp == *tp);
+ ASSERT(new_size <= ip->i_size);
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
-
-
- ntp = *tp;
- mp = (ntp)->t_mountp;
- ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-
- /*
- * We only support truncating the entire attribute fork.
- */
- if (fork == XFS_ATTR_FORK) {
- new_size = 0LL;
- }
- first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- trace_xfs_itruncate_finish_start(ip, new_size);
-
- /*
- * The first thing we do is set the size to new_size permanently
- * on disk. This way we don't have to worry about anyone ever
- * being able to look at the data being freed even in the face
- * of a crash. What we're getting around here is the case where
- * we free a block, it is allocated to another file, it is written
- * to, and then we crash. If the new data gets written to the
- * file but the log buffers containing the free and reallocation
- * don't, then we'd end up with garbage in the blocks being freed.
- * As long as we make the new_size permanent before actually
- * freeing any blocks it doesn't matter if they get written to.
- *
- * The callers must signal into us whether or not the size
- * setting here must be synchronous. There are a few cases
- * where it doesn't have to be synchronous. Those cases
- * occur if the file is unlinked and we know the unlink is
- * permanent or if the blocks being truncated are guaranteed
- * to be beyond the inode eof (regardless of the link count)
- * and the eof value is permanent. Both of these cases occur
- * only on wsync-mounted filesystems. In those cases, we're
- * guaranteed that no user will ever see the data in the blocks
- * that are being truncated so the truncate can run async.
- * In the free beyond eof case, the file may wind up with
- * more blocks allocated to it than it needs if we crash
- * and that won't get fixed until the next time the file
- * is re-opened and closed but that's ok as that shouldn't
- * be too many blocks.
- *
- * However, we can't just make all wsync xactions run async
- * because there's one call out of the create path that needs
- * to run sync where it's truncating an existing file to size
- * 0 whose size is > 0.
- *
- * It's probably possible to come up with a test in this
- * routine that would correctly distinguish all the above
- * cases from the values of the function parameters and the
- * inode state but for sanity's sake, I've decided to let the
- * layers above just tell us. It's simpler to correctly figure
- * out in the layer above exactly under what conditions we
- * can run async and I think it's easier for others read and
- * follow the logic in case something has to be changed.
- * cscope is your friend -- rcc.
- *
- * The attribute fork is much simpler.
- *
- * For the attribute fork we allow the caller to tell us whether
- * the unlink of the inode that led to this call is yet permanent
- * in the on disk log. If it is not and we will be freeing extents
- * in this inode then we make the first transaction synchronous
- * to make sure that the unlink is permanent by the time we free
- * the blocks.
- */
- if (fork == XFS_DATA_FORK) {
- if (ip->i_d.di_nextents > 0) {
- /*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
- */
- if (ip->i_size != new_size) {
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- }
- }
- } else if (sync) {
- ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
- if (ip->i_d.di_anextents > 0)
- xfs_trans_set_sync(ntp);
- }
- ASSERT(fork == XFS_DATA_FORK ||
- (fork == XFS_ATTR_FORK &&
- ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
- (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
/*
* Since it is possible for space to become allocated beyond
@@ -1547,128 +1273,142 @@
* beyond the maximum file size (ie it is the same as last_block),
* then there is nothing to do.
*/
+ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- ASSERT(first_unmap_block <= last_block);
- done = 0;
- if (last_block == first_unmap_block) {
- done = 1;
- } else {
- unmap_len = last_block - first_unmap_block + 1;
- }
+ if (first_unmap_block == last_block)
+ return 0;
+
+ ASSERT(first_unmap_block < last_block);
+ unmap_len = last_block - first_unmap_block + 1;
while (!done) {
- /*
- * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
- * will tell us whether it freed the entire range or
- * not. If this is a synchronous mount (wsync),
- * then we can tell bunmapi to keep all the
- * transactions asynchronous since the unlink
- * transaction that made this inode inactive has
- * already hit the disk. There's no danger of
- * the freed blocks being reused, there being a
- * crash, and the reused blocks suddenly reappearing
- * in this file with garbage in them once recovery
- * runs.
- */
xfs_bmap_init(&free_list, &first_block);
- error = xfs_bunmapi(ntp, ip,
+ error = xfs_bunmapi(tp, ip,
first_unmap_block, unmap_len,
- xfs_bmapi_aflag(fork),
+ xfs_bmapi_aflag(whichfork),
XFS_ITRUNC_MAX_EXTENTS,
&first_block, &free_list,
&done);
- if (error) {
- /*
- * If the bunmapi call encounters an error,
- * return to the caller where the transaction
- * can be properly aborted. We just need to
- * make sure we're not holding any resources
- * that we were not when we came in.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ if (error)
+ goto out_bmap_cancel;
/*
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(tp, &free_list, &committed);
- ntp = *tp;
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (committed)
- xfs_trans_ijoin(ntp, ip);
-
- if (error) {
- /*
- * If the bmap finish call encounters an error, return
- * to the caller where the transaction can be properly
- * aborted. We just need to make sure we're not
- * holding any resources that we were not when we came
- * in.
- *
- * Aborting from this point might lose some blocks in
- * the file system, but oh well.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ xfs_trans_ijoin(tp, ip);
+ if (error)
+ goto out_bmap_cancel;
if (committed) {
/*
* Mark the inode dirty so it will be logged and
* moved forward in the log as part of every commit.
*/
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
- ntp = xfs_trans_dup(ntp);
- error = xfs_trans_commit(*tp, 0);
- *tp = ntp;
+ ntp = xfs_trans_dup(tp);
+ error = xfs_trans_commit(tp, 0);
+ tp = ntp;
- xfs_trans_ijoin(ntp, ip);
+ xfs_trans_ijoin(tp, ip);
if (error)
- return error;
+ goto out;
+
/*
- * transaction commit worked ok so we can drop the extra ticket
+ * Transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
- xfs_log_ticket_put(ntp->t_ticket);
- error = xfs_trans_reserve(ntp, 0,
+ xfs_log_ticket_put(tp->t_ticket);
+ error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
XFS_ITRUNCATE_LOG_COUNT);
if (error)
- return error;
+ goto out;
}
+
+out:
+ *tpp = tp;
+ return error;
+out_bmap_cancel:
/*
- * Only update the size in the case of the data fork, but
- * always re-log the inode so that our permanent transaction
- * can keep on rolling it forward in the log.
+ * If the bunmapi call encounters an error, return to the caller where
+ * the transaction can be properly aborted. We just need to make sure
+ * we're not holding any resources that we were not when we came in.
*/
- if (fork == XFS_DATA_FORK) {
- xfs_isize_check(mp, ip, new_size);
+ xfs_bmap_cancel(&free_list);
+ goto out;
+}
+
+int
+xfs_itruncate_data(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ xfs_fsize_t new_size)
+{
+ int error;
+
+ trace_xfs_itruncate_data_start(ip, new_size);
+
+ /*
+ * The first thing we do is set the size to new_size permanently on
+ * disk. This way we don't have to worry about anyone ever being able
+ * to look at the data being freed even in the face of a crash.
+ * What we're getting around here is the case where we free a block, it
+ * is allocated to another file, it is written to, and then we crash.
+ * If the new data gets written to the file but the log buffers
+ * containing the free and reallocation don't, then we'd end up with
+ * garbage in the blocks being freed. As long as we make the new_size
+ * permanent before actually freeing any blocks it doesn't matter if
+ * they get written to.
+ */
+ if (ip->i_d.di_nextents > 0) {
/*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
+ * If we are not changing the file size then do not update
+ * the on-disk file size - we may be called from
+ * xfs_inactive_free_eofblocks(). If we update the on-disk
+ * file size and then the system crashes before the contents
+ * of the file are flushed to disk then the files may be
+ * full of holes (ie NULL files bug).
*/
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
}
}
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_delayed_blks == 0));
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_d.di_nextents == 0));
- trace_xfs_itruncate_finish_end(ip, new_size);
+
+ error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
+ if (error)
+ return error;
+
+ /*
+ * If we are not changing the file size then do not update the on-disk
+ * file size - we may be called from xfs_inactive_free_eofblocks().
+ * If we update the on-disk file size and then the system crashes
+ * before the contents of the file are flushed to disk then the files
+ * may be full of holes (ie NULL files bug).
+ */
+ xfs_isize_check(ip, new_size);
+ if (ip->i_size != new_size) {
+ ip->i_d.di_size = new_size;
+ ip->i_size = new_size;
+ }
+
+ ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+ ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+
+ /*
+ * Always re-log the inode so that our permanent transaction can keep
+ * on rolling it forward in the log.
+ */
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+ trace_xfs_itruncate_data_end(ip, new_size);
return 0;
}
@@ -1694,7 +1434,6 @@
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_mode != 0);
- ASSERT(ip->i_transp == tp);
mp = tp->t_mountp;
@@ -1717,7 +1456,7 @@
ASSERT(agi->agi_unlinked[bucket_index]);
ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
- if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+ if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
/*
* There is already another inode in the bucket we need
* to add ourselves to. Add us at the front of the list.
@@ -1728,8 +1467,7 @@
if (error)
return error;
- ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
- /* both on-disk, don't endian flip twice */
+ ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1794,7 +1532,7 @@
agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
ASSERT(agino != 0);
bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+ ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
ASSERT(agi->agi_unlinked[bucket_index]);
if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1959,7 +1697,7 @@
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ lip = bp->b_fspriv;
while (lip) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
@@ -2086,7 +1824,6 @@
xfs_buf_t *ibp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_transp == tp);
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
@@ -2733,7 +2470,7 @@
* mark the buffer as an error and call them. Otherwise
* mark it as stale and brelse.
*/
- if (XFS_BUF_IODONE_FUNC(bp)) {
+ if (bp->b_iodone) {
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_ERROR(bp,EIO);
@@ -2920,7 +2657,7 @@
*/
xfs_synchronize_times(ip);
- if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+ if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -3073,8 +2810,8 @@
*/
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+ ASSERT(bp->b_fspriv != NULL);
+ ASSERT(bp->b_iodone != NULL);
} else {
/*
* We're flushing an inode which is not in the AIL and has
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 964cfea..a97644a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@
xfs_ifork_t i_df; /* data fork */
/* Transaction and locking information. */
- struct xfs_trans *i_transp; /* ptr to owning transaction*/
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
@@ -458,16 +457,6 @@
extern struct lock_class_key xfs_iolock_reclaimable;
/*
- * Flags for xfs_itruncate_start().
- */
-#define XFS_ITRUNC_DEFINITE 0x1
-#define XFS_ITRUNC_MAYBE 0x2
-
-#define XFS_ITRUNC_FLAGS \
- { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
- { XFS_ITRUNC_MAYBE, "MAYBE" }
-
-/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
* new subdirectory gets S_ISGID bit from parent.
@@ -501,9 +490,10 @@
uint xfs_dic2xflags(struct xfs_dinode *);
int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
-int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
-int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
- xfs_fsize_t, int, int);
+int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
+ int, xfs_fsize_t);
+int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
+ xfs_fsize_t);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
@@ -579,13 +569,6 @@
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
-#ifdef DEBUG
-void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
- xfs_fsize_t);
-#else /* DEBUG */
-#define xfs_isize_check(mp, ip, isize)
-#endif /* DEBUG */
-
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#else
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 09983a3..588406d 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -632,13 +632,8 @@
struct xfs_inode *ip = iip->ili_inode;
unsigned short lock_flags;
- ASSERT(iip->ili_inode->i_itemp != NULL);
- ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
-
- /*
- * Clear the transaction pointer in the inode.
- */
- ip->i_transp = NULL;
+ ASSERT(ip->i_itemp != NULL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* If the inode needed a separate buffer with which to log
@@ -664,8 +659,8 @@
lock_flags = iip->ili_lock_flags;
iip->ili_lock_flags = 0;
if (lock_flags) {
- xfs_iunlock(iip->ili_inode, lock_flags);
- IRELE(iip->ili_inode);
+ xfs_iunlock(ip, lock_flags);
+ IRELE(ip);
}
}
@@ -681,15 +676,15 @@
* where the cluster buffer may be unpinned before the inode is inserted into
* the AIL during transaction committed processing. If the buffer is unpinned
* before the inode item has been committed and inserted, then it is possible
- * for the buffer to be written and IO completions before the inode is inserted
+ * for the buffer to be written and IO completes before the inode is inserted
* into the AIL. In that case, we'd be inserting a clean, stale inode into the
* AIL which will never get removed. It will, however, get reclaimed which
* triggers an assert in xfs_inode_free() complaining about freein an inode
* still in the AIL.
*
- * To avoid this, return a lower LSN than the one passed in so that the
- * transaction committed code will not move the inode forward in the AIL but
- * will still unpin it properly.
+ * To avoid this, just unpin the inode directly and return a LSN of -1 so the
+ * transaction committed code knows that it does not need to do any further
+ * processing on the item.
*/
STATIC xfs_lsn_t
xfs_inode_item_committed(
@@ -699,8 +694,10 @@
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- if (xfs_iflags_test(ip, XFS_ISTALE))
- return lsn - 1;
+ if (xfs_iflags_test(ip, XFS_ISTALE)) {
+ xfs_inode_item_unpin(lip, 0);
+ return -1;
+ }
return lsn;
}
@@ -877,7 +874,7 @@
* Scan the buffer IO completions for other inodes being completed and
* attach them to the current inode log item.
*/
- blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ blip = bp->b_fspriv;
prev = NULL;
while (blip != NULL) {
if (lip->li_cb != xfs_iflush_done) {
@@ -889,7 +886,7 @@
/* remove from list */
next = blip->li_bio_list;
if (!prev) {
- XFS_BUF_SET_FSPRIVATE(bp, next);
+ bp->b_fspriv = next;
} else {
prev->li_bio_list = next;
}
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index b8e4ee4..b253c0e 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -28,17 +28,6 @@
typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */
-/*
- * Useful inode bits for this kernel.
- * Used in some places where having 64-bits in the 32-bit kernels
- * costs too much.
- */
-#if XFS_BIG_INUMS
-typedef xfs_ino_t xfs_intino_t;
-#else
-typedef __uint32_t xfs_intino_t;
-#endif
-
#define NULLFSINO ((xfs_ino_t)-1)
#define NULLAGINO ((xfs_agino_t)-1)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 41d5b8f..06ff843 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -871,15 +871,9 @@
void
xlog_iodone(xfs_buf_t *bp)
{
- xlog_in_core_t *iclog;
- xlog_t *l;
- int aborted;
-
- iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
- aborted = 0;
- l = iclog->ic_log;
+ xlog_in_core_t *iclog = bp->b_fspriv;
+ xlog_t *l = iclog->ic_log;
+ int aborted = 0;
/*
* Race to shutdown the filesystem if we see an error.
@@ -1056,10 +1050,9 @@
bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
if (!bp)
goto out_free_log;
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+ bp->b_iodone = xlog_iodone;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
@@ -1090,10 +1083,8 @@
log->l_iclog_size, 0);
if (!bp)
goto out_free_iclog;
- if (!XFS_BUF_CPSEMA(bp))
- ASSERT(0);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+
+ bp->b_iodone = xlog_iodone;
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
#ifdef DEBUG
@@ -1118,7 +1109,7 @@
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
- ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
+ ASSERT(xfs_buf_islocked(iclog->ic_bp));
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
@@ -1254,9 +1245,8 @@
xlog_bdstrat(
struct xfs_buf *bp)
{
- struct xlog_in_core *iclog;
+ struct xlog_in_core *iclog = bp->b_fspriv;
- iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
XFS_BUF_ERROR(bp, EIO);
XFS_BUF_STALE(bp);
@@ -1269,7 +1259,6 @@
return 0;
}
- bp->b_flags |= _XBF_RUN_QUEUES;
xfs_buf_iorequest(bp);
return 0;
}
@@ -1351,8 +1340,6 @@
}
bp = iclog->ic_bp;
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
@@ -1366,22 +1353,28 @@
iclog->ic_bwritecnt = 1;
}
XFS_BUF_SET_COUNT(bp, count);
- XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */
+ bp->b_fspriv = iclog;
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_LOG_BUFFER;
+ bp->b_flags |= XBF_SYNCIO;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+ bp->b_flags |= XBF_FUA;
+
/*
- * If we have an external log device, flush the data device
- * before flushing the log to make sure all meta data
- * written back from the AIL actually made it to disk
- * before writing out the new log tail LSN in the log buffer.
+ * Flush the data device before flushing the log to make
+ * sure all meta data written back from the AIL actually made
+ * it to disk before stamping the new log tail LSN into the
+ * log buffer. For an external log we need to issue the
+ * flush explicitly, and unfortunately synchronously here;
+ * for an internal log we can simply use the block layer
+ * state machine for preflushes.
*/
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
- XFS_BUF_ORDERED(bp);
+ else
+ bp->b_flags |= XBF_FLUSH;
}
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1404,19 +1397,16 @@
}
if (split) {
bp = iclog->ic_log->l_xbuf;
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
- (unsigned long)1);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
(__psint_t)count), split);
- XFS_BUF_SET_FSPRIVATE(bp, iclog);
+ bp->b_fspriv = iclog;
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_LOG_BUFFER;
+ bp->b_flags |= XBF_SYNCIO;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
- XFS_BUF_ORDERED(bp);
+ bp->b_flags |= XBF_FUA;
dptr = XFS_BUF_PTR(bp);
/*
* Bump the cycle numbers at the start of each block
@@ -3521,13 +3511,13 @@
spin_unlock(&log->l_icloglock);
/* check log magic numbers */
- if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
+ if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
ptr = (xfs_caddr_t) &iclog->ic_header;
for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
ptr += BBSIZE) {
- if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
+ if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: unexpected magic num",
__func__);
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142ca..8fe4206 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -91,6 +91,8 @@
xlog_t *log,
int nbblks)
{
+ struct xfs_buf *bp;
+
if (!xlog_buf_bbcount_valid(log, nbblks)) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
@@ -118,8 +120,10 @@
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
- BBTOB(nbblks), 0);
+ bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
+ if (bp)
+ xfs_buf_unlock(bp);
+ return bp;
}
STATIC void
@@ -264,7 +268,7 @@
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_BUSY(bp);
XFS_BUF_HOLD(bp);
- XFS_BUF_PSEMA(bp, PRIBIO);
+ xfs_buf_lock(bp);
XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
@@ -300,14 +304,14 @@
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+ ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
/*
* IRIX doesn't write the h_fmt field and leaves it zeroed
* (XLOG_FMT_UNKNOWN). This stops us from trying to recover
* a dirty log created in IRIX.
*/
- if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
+ if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
xfs_warn(mp,
"dirty log written in incompatible format - can't recover");
xlog_header_check_dump(mp, head);
@@ -333,7 +337,7 @@
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+ ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
if (uuid_is_nil(&head->h_fs_uuid)) {
/*
@@ -367,7 +371,7 @@
xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR);
}
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0);
}
@@ -534,7 +538,7 @@
head = (xlog_rec_header_t *)offset;
- if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
+ if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
break;
if (!smallmem)
@@ -916,7 +920,7 @@
if (error)
goto done;
- if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
+ if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
found = 1;
break;
}
@@ -933,8 +937,8 @@
if (error)
goto done;
- if (XLOG_HEADER_MAGIC_NUM ==
- be32_to_cpu(*(__be32 *)offset)) {
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
found = 2;
break;
}
@@ -1947,7 +1951,7 @@
* This is all fine; things are still consistent, and we haven't lost
* any quota information. Just don't complain about bad dquot blks.
*/
- if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
+ if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
@@ -2174,7 +2178,7 @@
error = xfs_bwrite(mp, bp);
} else {
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+ bp->b_iodone = xlog_recover_iodone;
xfs_bdwrite(mp, bp);
}
@@ -2238,7 +2242,7 @@
* Make sure the place we're flushing out to really looks
* like an inode!
*/
- if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
+ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
xfs_buf_relse(bp);
xfs_alert(mp,
"%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2434,7 +2438,7 @@
write_inode_buffer:
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+ bp->b_iodone = xlog_recover_iodone;
xfs_bdwrite(mp, bp);
error:
if (need_free)
@@ -2556,7 +2560,7 @@
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+ bp->b_iodone = xlog_recover_iodone;
xfs_bdwrite(mp, bp);
return (0);
@@ -3295,7 +3299,7 @@
{
int hlen;
- if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
+ if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b49b823..7f25245 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -348,7 +348,7 @@
}
/*
- * More sanity checking. These were stolen directly from
+ * More sanity checking. Most of these were stolen directly from
* xfs_repair.
*/
if (unlikely(
@@ -371,23 +371,13 @@
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
- (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
+ (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) ||
+ sbp->sb_dblocks == 0 ||
+ sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
+ sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
if (loud)
- xfs_warn(mp, "SB sanity check 1 failed");
- return XFS_ERROR(EFSCORRUPTED);
- }
-
- /*
- * Sanity check AG count, size fields against data size field
- */
- if (unlikely(
- sbp->sb_dblocks == 0 ||
- sbp->sb_dblocks >
- (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
- sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
- sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
- if (loud)
- xfs_warn(mp, "SB sanity check 2 failed");
+ XFS_CORRUPTION_ERROR("SB sanity check failed",
+ XFS_ERRLEVEL_LOW, mp, sbp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -864,7 +854,8 @@
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
if (mp->m_flags & XFS_MOUNT_RETERR) {
- xfs_warn(mp, "alignment check 1 failed");
+ xfs_warn(mp, "alignment check failed: "
+ "(sunit/swidth vs. blocksize)");
return XFS_ERROR(EINVAL);
}
mp->m_dalign = mp->m_swidth = 0;
@@ -875,6 +866,8 @@
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
if (mp->m_flags & XFS_MOUNT_RETERR) {
+ xfs_warn(mp, "alignment check failed: "
+ "(sunit/swidth vs. ag size)");
return XFS_ERROR(EINVAL);
}
xfs_warn(mp,
@@ -889,8 +882,8 @@
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
if (mp->m_flags & XFS_MOUNT_RETERR) {
- xfs_warn(mp,
- "stripe alignment turned off: sunit(%d) less than bsize(%d)",
+ xfs_warn(mp, "alignment check failed: "
+ "sunit(%d) less than bsize(%d)",
mp->m_dalign,
mp->m_blockmask +1);
return XFS_ERROR(EINVAL);
@@ -1096,10 +1089,6 @@
if (mp->m_flags & XFS_MOUNT_RDONLY)
return 0;
-#ifdef QUOTADEBUG
- xfs_notice(mp, "Writing superblock quota changes");
-#endif
-
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
XFS_DEFAULT_LOG_COUNT);
@@ -1532,7 +1521,7 @@
xfs_warn(mp, "Unable to free reserved block pool. "
"Freespace may not be correct on next mount.");
- error = xfs_log_sbcount(mp, 1);
+ error = xfs_log_sbcount(mp);
if (error)
xfs_warn(mp, "Unable to update superblock counters. "
"Freespace may not be correct on next mount.");
@@ -1568,18 +1557,14 @@
/*
* xfs_log_sbcount
*
- * Called either periodically to keep the on disk superblock values
- * roughly up to date or from unmount to make sure the values are
- * correct on a clean unmount.
+ * Sync the superblock counters to disk.
*
* Note this code can be called during the process of freezing, so
- * we may need to use the transaction allocator which does not not
+ * we may need to use the transaction allocator which does not
* block when the transaction subsystem is in its frozen state.
*/
int
-xfs_log_sbcount(
- xfs_mount_t *mp,
- uint sync)
+xfs_log_sbcount(xfs_mount_t *mp)
{
xfs_trans_t *tp;
int error;
@@ -1605,8 +1590,7 @@
}
xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
- if (sync)
- xfs_trans_set_sync(tp);
+ xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
return error;
}
@@ -1941,22 +1925,19 @@
* the superblock buffer if it can be locked without sleeping.
* If it can't then we'll return NULL.
*/
-xfs_buf_t *
+struct xfs_buf *
xfs_getsb(
- xfs_mount_t *mp,
- int flags)
+ struct xfs_mount *mp,
+ int flags)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp = mp->m_sb_bp;
- ASSERT(mp->m_sb_bp != NULL);
- bp = mp->m_sb_bp;
- if (flags & XBF_TRYLOCK) {
- if (!XFS_BUF_CPSEMA(bp)) {
+ if (!xfs_buf_trylock(bp)) {
+ if (flags & XBF_TRYLOCK)
return NULL;
- }
- } else {
- XFS_BUF_PSEMA(bp, PRIBIO);
+ xfs_buf_lock(bp);
}
+
XFS_BUF_HOLD(bp);
ASSERT(XFS_BUF_ISDONE(bp));
return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3d68bb2..bb24dac 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -371,7 +371,7 @@
int64_t msb_delta; /* Change to make to specified field */
} xfs_mod_sb_t;
-extern int xfs_log_sbcount(xfs_mount_t *, uint);
+extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7c7bc2b..efc147f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1361,7 +1361,7 @@
lip->li_flags |= XFS_LI_ABORTED;
item_lsn = IOP_COMMITTED(lip, commit_lsn);
- /* If the committed routine returns -1, item has been freed. */
+ /* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
return;
@@ -1426,6 +1426,7 @@
static inline void
xfs_log_item_batch_insert(
struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
xfs_lsn_t commit_lsn)
@@ -1434,7 +1435,7 @@
spin_lock(&ailp->xa_lock);
/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
- xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
+ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
for (i = 0; i < nr_items; i++)
IOP_UNPIN(log_items[i], 0);
@@ -1452,6 +1453,13 @@
* as an iclog write error even though we haven't started any IO yet. Hence in
* this case all we need to do is IOP_COMMITTED processing, followed by an
* IOP_UNPIN(aborted) call.
+ *
+ * The AIL cursor is used to optimise the insert process. If commit_lsn is not
+ * at the end of the AIL, the insert cursor avoids the need to walk
+ * the AIL to find the insertion point on every xfs_log_item_batch_insert()
+ * call. This saves a lot of needless list walking and is a net win, even
+ * though it slightly increases that amount of AIL lock traffic to set it up
+ * and tear it down.
*/
void
xfs_trans_committed_bulk(
@@ -1463,8 +1471,13 @@
#define LOG_ITEM_BATCH_SIZE 32
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
struct xfs_log_vec *lv;
+ struct xfs_ail_cursor cur;
int i = 0;
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
+ spin_unlock(&ailp->xa_lock);
+
/* unpin all the log items */
for (lv = log_vector; lv; lv = lv->lv_next ) {
struct xfs_log_item *lip = lv->lv_item;
@@ -1474,7 +1487,7 @@
lip->li_flags |= XFS_LI_ABORTED;
item_lsn = IOP_COMMITTED(lip, commit_lsn);
- /* item_lsn of -1 means the item was freed */
+ /* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
continue;
@@ -1493,7 +1506,9 @@
/*
* Not a bulk update option due to unusual item_lsn.
* Push into AIL immediately, rechecking the lsn once
- * we have the ail lock. Then unpin the item.
+ * we have the ail lock. Then unpin the item. This does
+ * not affect the AIL cursor the bulk insert path is
+ * using.
*/
spin_lock(&ailp->xa_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1507,7 +1522,7 @@
/* Item is a candidate for bulk AIL insert. */
log_items[i++] = lv->lv_item;
if (i >= LOG_ITEM_BATCH_SIZE) {
- xfs_log_item_batch_insert(ailp, log_items,
+ xfs_log_item_batch_insert(ailp, &cur, log_items,
LOG_ITEM_BATCH_SIZE, commit_lsn);
i = 0;
}
@@ -1515,7 +1530,11 @@
/* make sure we insert the remainder! */
if (i)
- xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
+ xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
+
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
}
/*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5fc2380..43233e9 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -163,17 +163,11 @@
}
/*
- * AIL traversal cursor initialisation.
- *
- * The cursor keeps track of where our current traversal is up
- * to by tracking the next ƣtem in the list for us. However, for
- * this to be safe, removing an object from the AIL needs to invalidate
- * any cursor that points to it. hence the traversal cursor needs to
- * be linked to the struct xfs_ail so that deletion can search all the
- * active cursors for invalidation.
- *
- * We don't link the push cursor because it is embedded in the struct
- * xfs_ail and hence easily findable.
+ * The cursor keeps track of where our current traversal is up to by tracking
+ * the next item in the list for us. However, for this to be safe, removing an
+ * object from the AIL needs to invalidate any cursor that points to it. hence
+ * the traversal cursor needs to be linked to the struct xfs_ail so that
+ * deletion can search all the active cursors for invalidation.
*/
STATIC void
xfs_trans_ail_cursor_init(
@@ -181,31 +175,12 @@
struct xfs_ail_cursor *cur)
{
cur->item = NULL;
- if (cur == &ailp->xa_cursors)
- return;
-
- cur->next = ailp->xa_cursors.next;
- ailp->xa_cursors.next = cur;
+ list_add_tail(&cur->list, &ailp->xa_cursors);
}
/*
- * Set the cursor to the next item, because when we look
- * up the cursor the current item may have been freed.
- */
-STATIC void
-xfs_trans_ail_cursor_set(
- struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur,
- struct xfs_log_item *lip)
-{
- if (lip)
- cur->item = xfs_ail_next(ailp, lip);
-}
-
-/*
- * Get the next item in the traversal and advance the cursor.
- * If the cursor was invalidated (inidicated by a lip of 1),
- * restart the traversal.
+ * Get the next item in the traversal and advance the cursor. If the cursor
+ * was invalidated (indicated by a lip of 1), restart the traversal.
*/
struct xfs_log_item *
xfs_trans_ail_cursor_next(
@@ -216,45 +191,31 @@
if ((__psint_t)lip & 1)
lip = xfs_ail_min(ailp);
- xfs_trans_ail_cursor_set(ailp, cur, lip);
+ if (lip)
+ cur->item = xfs_ail_next(ailp, lip);
return lip;
}
/*
- * Now that the traversal is complete, we need to remove the cursor
- * from the list of traversing cursors. Avoid removing the embedded
- * push cursor, but use the fact it is always present to make the
- * list deletion simple.
+ * When the traversal is complete, we need to remove the cursor from the list
+ * of traversing cursors.
*/
void
xfs_trans_ail_cursor_done(
struct xfs_ail *ailp,
- struct xfs_ail_cursor *done)
+ struct xfs_ail_cursor *cur)
{
- struct xfs_ail_cursor *prev = NULL;
- struct xfs_ail_cursor *cur;
-
- done->item = NULL;
- if (done == &ailp->xa_cursors)
- return;
- prev = &ailp->xa_cursors;
- for (cur = prev->next; cur; prev = cur, cur = prev->next) {
- if (cur == done) {
- prev->next = cur->next;
- break;
- }
- }
- ASSERT(cur);
+ cur->item = NULL;
+ list_del_init(&cur->list);
}
/*
- * Invalidate any cursor that is pointing to this item. This is
- * called when an item is removed from the AIL. Any cursor pointing
- * to this object is now invalid and the traversal needs to be
- * terminated so it doesn't reference a freed object. We set the
- * cursor item to a value of 1 so we can distinguish between an
- * invalidation and the end of the list when getting the next item
- * from the cursor.
+ * Invalidate any cursor that is pointing to this item. This is called when an
+ * item is removed from the AIL. Any cursor pointing to this object is now
+ * invalid and the traversal needs to be terminated so it doesn't reference a
+ * freed object. We set the low bit of the cursor item pointer so we can
+ * distinguish between an invalidation and the end of the list when getting the
+ * next item from the cursor.
*/
STATIC void
xfs_trans_ail_cursor_clear(
@@ -263,8 +224,7 @@
{
struct xfs_ail_cursor *cur;
- /* need to search all cursors */
- for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
+ list_for_each_entry(cur, &ailp->xa_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
((__psint_t)cur->item | 1);
@@ -272,9 +232,10 @@
}
/*
- * Return the item in the AIL with the current lsn.
- * Return the current tree generation number for use
- * in calls to xfs_trans_next_ail().
+ * Find the first item in the AIL with the given @lsn by searching in ascending
+ * LSN order and initialise the cursor to point to the next item for a
+ * ascending traversal. Pass a @lsn of zero to initialise the cursor to the
+ * first item in the AIL. Returns NULL if the list is empty.
*/
xfs_log_item_t *
xfs_trans_ail_cursor_first(
@@ -285,46 +246,112 @@
xfs_log_item_t *lip;
xfs_trans_ail_cursor_init(ailp, cur);
- lip = xfs_ail_min(ailp);
- if (lsn == 0)
+
+ if (lsn == 0) {
+ lip = xfs_ail_min(ailp);
goto out;
+ }
list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
goto out;
}
- lip = NULL;
+ return NULL;
+
out:
- xfs_trans_ail_cursor_set(ailp, cur, lip);
+ if (lip)
+ cur->item = xfs_ail_next(ailp, lip);
return lip;
}
+static struct xfs_log_item *
+__xfs_trans_ail_cursor_last(
+ struct xfs_ail *ailp,
+ xfs_lsn_t lsn)
+{
+ xfs_log_item_t *lip;
+
+ list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
+ if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
+ return lip;
+ }
+ return NULL;
+}
+
/*
- * splice the log item list into the AIL at the given LSN.
+ * Find the last item in the AIL with the given @lsn by searching in descending
+ * LSN order and initialise the cursor to point to that item. If there is no
+ * item with the value of @lsn, then it sets the cursor to the last item with an
+ * LSN lower than @lsn. Returns NULL if the list is empty.
+ */
+struct xfs_log_item *
+xfs_trans_ail_cursor_last(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn)
+{
+ xfs_trans_ail_cursor_init(ailp, cur);
+ cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
+ return cur->item;
+}
+
+/*
+ * Splice the log item list into the AIL at the given LSN. We splice to the
+ * tail of the given LSN to maintain insert order for push traversals. The
+ * cursor is optional, allowing repeated updates to the same LSN to avoid
+ * repeated traversals.
*/
static void
xfs_ail_splice(
- struct xfs_ail *ailp,
- struct list_head *list,
- xfs_lsn_t lsn)
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ struct list_head *list,
+ xfs_lsn_t lsn)
{
- xfs_log_item_t *next_lip;
+ struct xfs_log_item *lip = cur ? cur->item : NULL;
+ struct xfs_log_item *next_lip;
- /* If the list is empty, just insert the item. */
- if (list_empty(&ailp->xa_ail)) {
- list_splice(list, &ailp->xa_ail);
- return;
+ /*
+ * Get a new cursor if we don't have a placeholder or the existing one
+ * has been invalidated.
+ */
+ if (!lip || (__psint_t)lip & 1) {
+ lip = __xfs_trans_ail_cursor_last(ailp, lsn);
+
+ if (!lip) {
+ /* The list is empty, so just splice and return. */
+ if (cur)
+ cur->item = NULL;
+ list_splice(list, &ailp->xa_ail);
+ return;
+ }
}
- list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
- if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
- break;
+ /*
+ * Our cursor points to the item we want to insert _after_, so we have
+ * to update the cursor to point to the end of the list we are splicing
+ * in so that it points to the correct location for the next splice.
+ * i.e. before the splice
+ *
+ * lsn -> lsn -> lsn + x -> lsn + x ...
+ * ^
+ * | cursor points here
+ *
+ * After the splice we have:
+ *
+ * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
+ * ^ ^
+ * | cursor points here | needs to move here
+ *
+ * So we set the cursor to the last item in the list to be spliced
+ * before we execute the splice, resulting in the cursor pointing to
+ * the correct item after the splice occurs.
+ */
+ if (cur) {
+ next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
+ cur->item = next_lip;
}
-
- ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
- XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
-
- list_splice_init(list, &next_lip->li_ail);
+ list_splice(list, &lip->li_ail);
}
/*
@@ -351,7 +378,7 @@
struct xfs_ail *ailp = container_of(to_delayed_work(work),
struct xfs_ail, xa_work);
xfs_mount_t *mp = ailp->xa_mount;
- struct xfs_ail_cursor *cur = &ailp->xa_cursors;
+ struct xfs_ail_cursor cur;
xfs_log_item_t *lip;
xfs_lsn_t lsn;
xfs_lsn_t target;
@@ -363,13 +390,12 @@
spin_lock(&ailp->xa_lock);
target = ailp->xa_target;
- xfs_trans_ail_cursor_init(ailp, cur);
- lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
/*
* AIL is empty or our push has reached the end.
*/
- xfs_trans_ail_cursor_done(ailp, cur);
+ xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
goto out_done;
}
@@ -457,12 +483,12 @@
if (stuck > 100)
break;
- lip = xfs_trans_ail_cursor_next(ailp, cur);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
if (lip == NULL)
break;
lsn = lip->li_lsn;
}
- xfs_trans_ail_cursor_done(ailp, cur);
+ xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
if (flush_log) {
@@ -645,6 +671,7 @@
void
xfs_trans_ail_update_bulk(
struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
@@ -674,7 +701,7 @@
list_add(&lip->li_ail, &tmp);
}
- xfs_ail_splice(ailp, &tmp, lsn);
+ xfs_ail_splice(ailp, cur, &tmp, lsn);
if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
@@ -793,6 +820,7 @@
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
+ INIT_LIST_HEAD(&ailp->xa_cursors);
spin_lock_init(&ailp->xa_lock);
INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
mp->m_ail = ailp;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 03b3b7f..15584fc 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -81,7 +81,7 @@
struct xfs_buf_log_item *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+ ASSERT(bp->b_transp == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
@@ -89,7 +89,7 @@
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ bip = bp->b_fspriv;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -110,7 +110,7 @@
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
- XFS_BUF_SET_FSPRIVATE2(bp, tp);
+ bp->b_transp = tp;
}
@@ -160,7 +160,7 @@
*/
bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
XFS_BUF_SUPER_STALE(bp);
@@ -172,8 +172,8 @@
else if (XFS_BUF_ISSTALE(bp))
ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -232,8 +232,8 @@
* recursion count and return the buffer to the caller.
*/
bp = mp->m_sb_bp;
- if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ if (bp->b_transp == tp) {
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -327,9 +327,9 @@
*/
bp = xfs_trans_buf_item_match(tp, target, blkno, len);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bp->b_fspriv != NULL);
ASSERT((XFS_BUF_ISERROR(bp)) == 0);
if (!(XFS_BUF_ISDONE(bp))) {
trace_xfs_trans_read_buf_io(bp, _RET_IP_);
@@ -363,7 +363,7 @@
}
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ bip = bp->b_fspriv;
bip->bli_recur++;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -460,32 +460,30 @@
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
- xfs_log_item_t *lip;
/*
* Default to a normal brelse() call if the tp is NULL.
*/
if (tp == NULL) {
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+ struct xfs_log_item *lip = bp->b_fspriv;
+
+ ASSERT(bp->b_transp == NULL);
+
/*
* If there's a buf log item attached to the buffer,
* then let the AIL know that the buffer is being
* unlocked.
*/
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- if (lip->li_type == XFS_LI_BUF) {
- bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
- xfs_trans_unlocked_item(bip->bli_item.li_ailp,
- lip);
- }
+ if (lip != NULL && lip->li_type == XFS_LI_BUF) {
+ bip = bp->b_fspriv;
+ xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
}
xfs_buf_relse(bp);
return;
}
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -556,7 +554,7 @@
xfs_buf_item_relse(bp);
bip = NULL;
}
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
/*
* If we've still got a buf log item on the buffer, then
@@ -581,16 +579,15 @@
xfs_trans_bhold(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
bip->bli_flags |= XFS_BLI_HOLD;
trace_xfs_trans_bhold(bip);
}
@@ -603,19 +600,17 @@
xfs_trans_bhold_release(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(bip->bli_flags & XFS_BLI_HOLD);
- bip->bli_flags &= ~XFS_BLI_HOLD;
+ bip->bli_flags &= ~XFS_BLI_HOLD;
trace_xfs_trans_bhold_release(bip);
}
@@ -634,14 +629,14 @@
uint first,
uint last)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
- ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
- (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
+ ASSERT(bp->b_iodone == NULL ||
+ bp->b_iodone == xfs_buf_iodone_callbacks);
/*
* Mark the buffer as needing to be written out eventually,
@@ -656,9 +651,8 @@
XFS_BUF_DELAYWRITE(bp);
XFS_BUF_DONE(bp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
- XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+ bp->b_iodone = xfs_buf_iodone_callbacks;
bip->bli_item.li_cb = xfs_buf_iodone;
trace_xfs_trans_log_buf(bip);
@@ -706,13 +700,11 @@
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_binval(bip);
@@ -780,13 +772,11 @@
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_BUF;
@@ -806,13 +796,11 @@
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_STALE_INODE;
@@ -833,13 +821,11 @@
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
@@ -863,16 +849,14 @@
xfs_buf_t *bp,
uint type)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
type == XFS_BLF_GDQUOT_BUF);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_format.blf_flags |= type;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 048b0c6..c8dea2f 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -55,7 +55,6 @@
{
xfs_inode_log_item_t *iip;
- ASSERT(ip->i_transp == NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (ip->i_itemp == NULL)
xfs_inode_item_init(ip, ip->i_mount);
@@ -68,12 +67,6 @@
xfs_trans_add_item(tp, &iip->ili_item);
xfs_trans_inode_broot_debug(ip);
-
- /*
- * Initialize i_transp so we can find it with xfs_inode_incore()
- * in xfs_trans_iget() above.
- */
- ip->i_transp = tp;
}
/*
@@ -111,7 +104,6 @@
ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_transp == tp);
tv = current_fs_time(inode->i_sb);
@@ -140,7 +132,6 @@
xfs_inode_t *ip,
uint flags)
{
- ASSERT(ip->i_transp == tp);
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 6b164e9..212946b 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -53,7 +53,7 @@
* of the list to trigger traversal restarts.
*/
struct xfs_ail_cursor {
- struct xfs_ail_cursor *next;
+ struct list_head list;
struct xfs_log_item *item;
};
@@ -66,7 +66,7 @@
struct xfs_mount *xa_mount;
struct list_head xa_ail;
xfs_lsn_t xa_target;
- struct xfs_ail_cursor xa_cursors;
+ struct list_head xa_cursors;
spinlock_t xa_lock;
struct delayed_work xa_work;
xfs_lsn_t xa_last_pushed_lsn;
@@ -82,6 +82,7 @@
extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items, int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock);
static inline void
@@ -90,7 +91,7 @@
struct xfs_log_item *lip,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
{
- xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -111,10 +112,13 @@
void xfs_trans_unlocked_item(struct xfs_ail *,
xfs_log_item_t *);
-struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
xfs_lsn_t lsn);
-struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
+struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn);
+struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6197207..88d1214 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -50,430 +50,6 @@
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
-int
-xfs_setattr(
- struct xfs_inode *ip,
- struct iattr *iattr,
- int flags)
-{
- xfs_mount_t *mp = ip->i_mount;
- struct inode *inode = VFS_I(ip);
- int mask = iattr->ia_valid;
- xfs_trans_t *tp;
- int code;
- uint lock_flags;
- uint commit_flags=0;
- uid_t uid=0, iuid=0;
- gid_t gid=0, igid=0;
- struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
- int need_iolock = 1;
-
- trace_xfs_setattr(ip);
-
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- code = -inode_change_ok(inode, iattr);
- if (code)
- return code;
-
- olddquot1 = olddquot2 = NULL;
- udqp = gdqp = NULL;
-
- /*
- * If disk quotas is on, we make sure that the dquots do exist on disk,
- * before we start any other transactions. Trying to do this later
- * is messy. We don't care to take a readlock to look at the ids
- * in inode here, because we can't hold it across the trans_reserve.
- * If the IDs do change before we take the ilock, we're covered
- * because the i_*dquot fields will get updated anyway.
- */
- if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
- uint qflags = 0;
-
- if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = iattr->ia_uid;
- qflags |= XFS_QMOPT_UQUOTA;
- } else {
- uid = ip->i_d.di_uid;
- }
- if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = iattr->ia_gid;
- qflags |= XFS_QMOPT_GQUOTA;
- } else {
- gid = ip->i_d.di_gid;
- }
-
- /*
- * We take a reference when we initialize udqp and gdqp,
- * so it is important that we never blindly double trip on
- * the same variable. See xfs_create() for an example.
- */
- ASSERT(udqp == NULL);
- ASSERT(gdqp == NULL);
- code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
- qflags, &udqp, &gdqp);
- if (code)
- return code;
- }
-
- /*
- * For the other attributes, we acquire the inode lock and
- * first do an error checking pass.
- */
- tp = NULL;
- lock_flags = XFS_ILOCK_EXCL;
- if (flags & XFS_ATTR_NOLOCK)
- need_iolock = 0;
- if (!(mask & ATTR_SIZE)) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
- commit_flags = 0;
- code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
- 0, 0, 0);
- if (code) {
- lock_flags = 0;
- goto error_return;
- }
- } else {
- if (need_iolock)
- lock_flags |= XFS_IOLOCK_EXCL;
- }
-
- xfs_ilock(ip, lock_flags);
-
- /*
- * Change file ownership. Must be the owner or privileged.
- */
- if (mask & (ATTR_UID|ATTR_GID)) {
- /*
- * These IDs could have changed since we last looked at them.
- * But, we're assured that if the ownership did change
- * while we didn't have the inode locked, inode's dquot(s)
- * would have changed also.
- */
- iuid = ip->i_d.di_uid;
- igid = ip->i_d.di_gid;
- gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
- uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
- /*
- * Do a quota reservation only if uid/gid is actually
- * going to change.
- */
- if (XFS_IS_QUOTA_RUNNING(mp) &&
- ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
- ASSERT(tp);
- code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
- capable(CAP_FOWNER) ?
- XFS_QMOPT_FORCE_RES : 0);
- if (code) /* out of quota */
- goto error_return;
- }
- }
-
- /*
- * Truncate file. Must have write permission and not be a directory.
- */
- if (mask & ATTR_SIZE) {
- /* Short circuit the truncate case for zero length files */
- if (iattr->ia_size == 0 &&
- ip->i_size == 0 && ip->i_d.di_nextents == 0) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- lock_flags &= ~XFS_ILOCK_EXCL;
- if (mask & ATTR_CTIME) {
- inode->i_mtime = inode->i_ctime =
- current_fs_time(inode->i_sb);
- xfs_mark_inode_dirty_sync(ip);
- }
- code = 0;
- goto error_return;
- }
-
- if (S_ISDIR(ip->i_d.di_mode)) {
- code = XFS_ERROR(EISDIR);
- goto error_return;
- } else if (!S_ISREG(ip->i_d.di_mode)) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
-
- /*
- * Make sure that the dquots are attached to the inode.
- */
- code = xfs_qm_dqattach_locked(ip, 0);
- if (code)
- goto error_return;
-
- /*
- * Now we can make the changes. Before we join the inode
- * to the transaction, if ATTR_SIZE is set then take care of
- * the part of the truncation that must be done without the
- * inode lock. This needs to be done before joining the inode
- * to the transaction, because the inode cannot be unlocked
- * once it is a part of the transaction.
- */
- if (iattr->ia_size > ip->i_size) {
- /*
- * Do the first part of growing a file: zero any data
- * in the last block that is beyond the old EOF. We
- * need to do this before the inode is joined to the
- * transaction to modify the i_size.
- */
- code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
- if (code)
- goto error_return;
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- lock_flags &= ~XFS_ILOCK_EXCL;
-
- /*
- * We are going to log the inode size change in this
- * transaction so any previous writes that are beyond the on
- * disk EOF and the new EOF that have not been written out need
- * to be written here. If we do not write the data out, we
- * expose ourselves to the null files problem.
- *
- * Only flush from the on disk size to the smaller of the in
- * memory file size or the new size as that's the range we
- * really care about here and prevents waiting for other data
- * not within the range we care about here.
- */
- if (ip->i_size != ip->i_d.di_size &&
- iattr->ia_size > ip->i_d.di_size) {
- code = xfs_flush_pages(ip,
- ip->i_d.di_size, iattr->ia_size,
- XBF_ASYNC, FI_NONE);
- if (code)
- goto error_return;
- }
-
- /* wait for all I/O to complete */
- xfs_ioend_wait(ip);
-
- code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
- xfs_get_blocks);
- if (code)
- goto error_return;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
- code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
- if (code)
- goto error_return;
-
- truncate_setsize(inode, iattr->ia_size);
-
- commit_flags = XFS_TRANS_RELEASE_LOG_RES;
- lock_flags |= XFS_ILOCK_EXCL;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip);
-
- /*
- * Only change the c/mtime if we are changing the size
- * or we are explicitly asked to change it. This handles
- * the semantic difference between truncate() and ftruncate()
- * as implemented in the VFS.
- *
- * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
- * is a special case where we need to update the times despite
- * not having these flags set. For all other operations the
- * VFS set these flags explicitly if it wants a timestamp
- * update.
- */
- if (iattr->ia_size != ip->i_size &&
- (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
- iattr->ia_ctime = iattr->ia_mtime =
- current_fs_time(inode->i_sb);
- mask |= ATTR_CTIME | ATTR_MTIME;
- }
-
- if (iattr->ia_size > ip->i_size) {
- ip->i_d.di_size = iattr->ia_size;
- ip->i_size = iattr->ia_size;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- } else if (iattr->ia_size <= ip->i_size ||
- (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
- /*
- * signal a sync transaction unless
- * we're truncating an already unlinked
- * file on a wsync filesystem
- */
- code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
- XFS_DATA_FORK,
- ((ip->i_d.di_nlink != 0 ||
- !(mp->m_flags & XFS_MOUNT_WSYNC))
- ? 1 : 0));
- if (code)
- goto abort_return;
- /*
- * Truncated "down", so we're removing references
- * to old data here - if we now delay flushing for
- * a long time, we expose ourselves unduly to the
- * notorious NULL files problem. So, we mark this
- * vnode and flush it when the file is closed, and
- * do not wait the usual (long) time for writeout.
- */
- xfs_iflags_set(ip, XFS_ITRUNCATED);
- }
- } else if (tp) {
- xfs_trans_ijoin(tp, ip);
- }
-
- /*
- * Change file ownership. Must be the owner or privileged.
- */
- if (mask & (ATTR_UID|ATTR_GID)) {
- /*
- * CAP_FSETID overrides the following restrictions:
- *
- * The set-user-ID and set-group-ID bits of a file will be
- * cleared upon successful return from chown()
- */
- if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
- !capable(CAP_FSETID)) {
- ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
- }
-
- /*
- * Change the ownerships and register quota modifications
- * in the transaction.
- */
- if (iuid != uid) {
- if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(mask & ATTR_UID);
- ASSERT(udqp);
- olddquot1 = xfs_qm_vop_chown(tp, ip,
- &ip->i_udquot, udqp);
- }
- ip->i_d.di_uid = uid;
- inode->i_uid = uid;
- }
- if (igid != gid) {
- if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_PQUOTA_ON(mp));
- ASSERT(mask & ATTR_GID);
- ASSERT(gdqp);
- olddquot2 = xfs_qm_vop_chown(tp, ip,
- &ip->i_gdquot, gdqp);
- }
- ip->i_d.di_gid = gid;
- inode->i_gid = gid;
- }
- }
-
- /*
- * Change file access modes.
- */
- if (mask & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
-
- ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= mode & ~S_IFMT;
-
- inode->i_mode &= S_IFMT;
- inode->i_mode |= mode & ~S_IFMT;
- }
-
- /*
- * Change file access or modified times.
- */
- if (mask & ATTR_ATIME) {
- inode->i_atime = iattr->ia_atime;
- ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- ip->i_update_core = 1;
- }
- if (mask & ATTR_CTIME) {
- inode->i_ctime = iattr->ia_ctime;
- ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- ip->i_update_core = 1;
- }
- if (mask & ATTR_MTIME) {
- inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- ip->i_update_core = 1;
- }
-
- /*
- * And finally, log the inode core if any attribute in it
- * has been changed.
- */
- if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
- ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- XFS_STATS_INC(xs_ig_attrchg);
-
- /*
- * If this is a synchronous mount, make sure that the
- * transaction goes to disk before returning to the user.
- * This is slightly sub-optimal in that truncates require
- * two sync transactions instead of one for wsync filesystems.
- * One for the truncate and one for the timestamps since we
- * don't want to change the timestamps unless we're sure the
- * truncate worked. Truncates are less than 1% of the laddis
- * mix so this probably isn't worth the trouble to optimize.
- */
- code = 0;
- if (mp->m_flags & XFS_MOUNT_WSYNC)
- xfs_trans_set_sync(tp);
-
- code = xfs_trans_commit(tp, commit_flags);
-
- xfs_iunlock(ip, lock_flags);
-
- /*
- * Release any dquot(s) the inode had kept before chown.
- */
- xfs_qm_dqrele(olddquot1);
- xfs_qm_dqrele(olddquot2);
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
-
- if (code)
- return code;
-
- /*
- * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
- * update. We could avoid this with linked transactions
- * and passing down the transaction pointer all the way
- * to attr_set. No previous user of the generic
- * Posix ACL code seems to care about this issue either.
- */
- if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
- code = -xfs_acl_chmod(inode);
- if (code)
- return XFS_ERROR(code);
- }
-
- return 0;
-
- abort_return:
- commit_flags |= XFS_TRANS_ABORT;
- error_return:
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
- if (tp) {
- xfs_trans_cancel(tp, commit_flags);
- }
- if (lock_flags != 0) {
- xfs_iunlock(ip, lock_flags);
- }
- return code;
-}
-
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -621,13 +197,6 @@
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- /*
- * Do the xfs_itruncate_start() call before
- * reserving any log space because
- * itruncate_start will call into the buffer
- * cache and we can't
- * do that within a transaction.
- */
if (flags & XFS_FREE_EOF_TRYLOCK) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
@@ -636,13 +205,6 @@
} else {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
}
- error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
- ip->i_size);
- if (error) {
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return error;
- }
error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp),
@@ -658,15 +220,12 @@
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip);
- error = xfs_itruncate_finish(&tp, ip,
- ip->i_size,
- XFS_DATA_FORK,
- 0);
- /*
- * If we get an error at this point we
- * simply don't bother truncating the file.
- */
+ error = xfs_itruncate_data(&tp, ip, ip->i_size);
if (error) {
+ /*
+ * If we get an error at this point we simply don't
+ * bother truncating the file.
+ */
xfs_trans_cancel(tp,
(XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT));
@@ -1084,20 +643,9 @@
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
if (truncate) {
- /*
- * Do the xfs_itruncate_start() call before
- * reserving any log space because itruncate_start
- * will call into the buffer cache and we can't
- * do that within a transaction.
- */
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return VN_INACTIVE_CACHE;
- }
+ xfs_ioend_wait(ip);
error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp),
@@ -1114,16 +662,7 @@
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip);
- /*
- * normally, we have to run xfs_itruncate_finish sync.
- * But if filesystem is wsync and we're in the inactive
- * path, then we know that nlink == 0, and that the
- * xaction that made nlink == 0 is permanently committed
- * since xfs_remove runs as a synchronous transaction.
- */
- error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
- (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
-
+ error = xfs_itruncate_data(&tp, ip, 0);
if (error) {
xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2430,6 +1969,8 @@
if (!bp)
return XFS_ERROR(ENOMEM);
+ xfs_buf_unlock(bp);
+
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
offset_fsb = XFS_B_TO_FSBT(mp, offset);
nimap = 1;
@@ -2784,7 +2325,7 @@
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = startoffset;
- error = xfs_setattr(ip, &iattr, attr_flags);
+ error = xfs_setattr_size(ip, &iattr, attr_flags);
if (error)
return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 3bcd233..35d3d51 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -13,7 +13,8 @@
struct xfs_iomap;
-int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */