[XFS] Ondisk format extension for extended attributes (attr2).	Basically,
the data/attr forks now grow up/down from either end of the literal area,
rather than dividing the literal area into two chunks and growing both
upward.  Means we can now make much more efficient use of the attribute
space, incl. fitting DMF attributes inline in 256 byte inodes, and large
jumps in dbench3 performance numbers.  It is self enabling, but can be
forced on/off via the attr2/noattr2 mount options.

SGI-PV: 941645
SGI-Modid: xfs-linux:xfs-kern:23835a

Signed-off-by: Nathan Scott <nathans@sgi.com>
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index e13eaa5..50598b1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -118,13 +118,82 @@
 
 
 /*========================================================================
- * External routines when dirsize < XFS_LITINO(mp).
+ * External routines when attribute fork size < XFS_LITINO(mp).
  *========================================================================*/
 
 /*
+ * Query whether the requested number of additional bytes of extended
+ * attribute space will be able to fit inline.
+ * Returns zero if not, else the di_forkoff fork offset to be used in the
+ * literal area for attribute data once the new bytes have been added.
+ *
+ * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value;
+ * special case for dev/uuid inodes, they have fixed size data forks.
+ */
+int
+xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
+{
+	int offset;
+	int minforkoff;	/* lower limit on valid forkoff locations */
+	int maxforkoff;	/* upper limit on valid forkoff locations */
+	xfs_mount_t *mp = dp->i_mount;
+
+	if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+		if (bytes <= XFS_IFORK_ASIZE(dp))
+			return mp->m_attroffset >> 3;
+		return 0;
+	}
+
+	offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
+
+	switch (dp->i_d.di_format) {
+	case XFS_DINODE_FMT_DEV:
+		minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+		return (offset >= minforkoff) ? minforkoff : 0;
+	case XFS_DINODE_FMT_UUID:
+		minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
+		return (offset >= minforkoff) ? minforkoff : 0;
+	}
+
+	/* data fork btree root can have at least this many key/ptr pairs */
+	minforkoff = MAX(dp->i_df.if_bytes, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+	minforkoff = roundup(minforkoff, 8) >> 3;
+
+	/* attr fork btree root can have at least this many key/ptr pairs */
+	maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
+	maxforkoff = maxforkoff >> 3;	/* rounded down */
+
+	if (offset >= minforkoff && offset < maxforkoff)
+		return offset;
+	if (offset >= maxforkoff)
+		return maxforkoff;
+	return 0;
+}
+
+/*
+ * Switch on the ATTR2 superblock bit (implies also FEATURES2)
+ */
+STATIC void
+xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
+{
+	unsigned long s;
+
+	if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
+		s = XFS_SB_LOCK(mp);
+		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+			XFS_SB_UNLOCK(mp, s);
+			xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
+		} else
+			XFS_SB_UNLOCK(mp, s);
+	}
+}
+
+/*
  * Create the initial contents of a shortform attribute list.
  */
-int
+void
 xfs_attr_shortform_create(xfs_da_args_t *args)
 {
 	xfs_attr_sf_hdr_t *hdr;
@@ -148,29 +217,37 @@
 	hdr->count = 0;
 	INT_SET(hdr->totsize, ARCH_CONVERT, sizeof(*hdr));
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
-	return(0);
 }
 
 /*
  * Add a name/value pair to the shortform attribute list.
  * Overflow from the inode has already been checked for.
  */
-int
-xfs_attr_shortform_add(xfs_da_args_t *args)
+void
+xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 {
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int i, offset, size;
+	xfs_mount_t *mp;
 	xfs_inode_t *dp;
 	xfs_ifork_t *ifp;
 
 	dp = args->dp;
+	mp = dp->i_mount;
+	dp->i_d.di_forkoff = forkoff;
+	dp->i_df.if_ext_max =
+		XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+	dp->i_afp->if_ext_max =
+		XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+
 	ifp = dp->i_afp;
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
 	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+#ifdef DEBUG
 		if (sfe->namelen != args->namelen)
 			continue;
 		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
@@ -181,7 +258,8 @@
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
-		return(XFS_ERROR(EEXIST));
+		ASSERT(0);
+#endif
 	}
 
 	offset = (char *)sfe - (char *)sf;
@@ -200,11 +278,11 @@
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 
-	return(0);
+	xfs_sbversion_add_attr2(mp, args->trans);
 }
 
 /*
- * Remove a name from the shortform attribute list structure.
+ * Remove an attribute from the shortform attribute list structure.
  */
 int
 xfs_attr_shortform_remove(xfs_da_args_t *args)
@@ -212,17 +290,16 @@
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int base, size=0, end, totsize, i;
+	xfs_mount_t *mp;
 	xfs_inode_t *dp;
 
-	/*
-	 * Remove the attribute.
-	 */
 	dp = args->dp;
+	mp = dp->i_mount;
 	base = sizeof(xfs_attr_sf_hdr_t);
 	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
-				sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
+	end = INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
 					base += size, i++) {
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
 		if (sfe->namelen != args->namelen)
@@ -237,19 +314,51 @@
 			continue;
 		break;
 	}
-	if (i == INT_GET(sf->hdr.count, ARCH_CONVERT))
+	if (i == end)
 		return(XFS_ERROR(ENOATTR));
 
+	/*
+	 * Fix up the attribute fork data, covering the hole
+	 */
 	end = base + size;
 	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
-	if (end != totsize) {
-		memmove(&((char *)sf)[base], &((char *)sf)[end],
-							totsize - end);
-	}
+	if (end != totsize)
+		memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, -size);
-	xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
-	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
+
+	/*
+	 * Fix up the start offset of the attribute fork
+	 */
+	totsize -= size;
+	if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname) {
+		/*
+		 * Last attribute now removed, revert to original
+		 * inode format making all literal area available
+		 * to the data fork once more.
+		 */
+		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = 0;
+		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+		ASSERT(dp->i_d.di_anextents == 0);
+		ASSERT(dp->i_afp == NULL);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+	} else {
+		xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
+		ASSERT(dp->i_d.di_forkoff);
+		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname);
+		dp->i_afp->if_ext_max =
+			XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp,
+					XFS_ILOG_CORE | XFS_ILOG_ADATA);
+	}
+
+	xfs_sbversion_add_attr2(mp, args->trans);
 
 	return(0);
 }
@@ -649,14 +758,16 @@
 				+ name_loc->namelen
 				+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
 	}
-	return( bytes < XFS_IFORK_ASIZE(dp) );
+	if (bytes == sizeof(struct xfs_attr_sf_hdr))
+		return(-1);
+	return(xfs_attr_shortform_bytesfit(dp, bytes));
 }
 
 /*
  * Convert a leaf attribute list to shortform attribute list
  */
 int
-xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
@@ -683,9 +794,25 @@
 	error = xfs_da_shrink_inode(args, 0, bp);
 	if (error)
 		goto out;
-	error = xfs_attr_shortform_create(args);
-	if (error)
+
+	if (forkoff == -1) {
+		/*
+		 * Last attribute was removed, revert to original
+		 * inode format making all literal area available
+		 * to the data fork once more.
+		 */
+		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = 0;
+		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+		ASSERT(dp->i_d.di_anextents == 0);
+		ASSERT(dp->i_afp == NULL);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
 		goto out;
+	}
+
+	xfs_attr_shortform_create(args);
 
 	/*
 	 * Copy the attributes
@@ -713,7 +840,7 @@
 		nargs.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
-		xfs_attr_shortform_add(&nargs);
+		xfs_attr_shortform_add(&nargs, forkoff);
 	}
 	error = 0;