[XFS] write barrier support  Issue all log sync operations as ordered
writes.  In addition flush the disk cache on fsync if the sync cached
operation didn't sync the log to disk (this requires some additional
bookeping in the transaction and log code). If the device doesn't claim to
support barriers, the filesystem has an extern log volume or the trial
superblock write with barriers enabled failed we disable barriers and
print a warning.  We should probably fail the mount completely, but that
could lead to nasty boot failures for the root filesystem.  Not enabled by
default yet, needs more destructive testing first.

SGI-PV: 912426
SGI-Modid: xfs-linux:xfs-kern:198723a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 51814c3..b9d3ad3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -93,8 +93,11 @@
 STATIC void xlog_state_switch_iclogs(xlog_t		*log,
 				     xlog_in_core_t *iclog,
 				     int		eventual_size);
-STATIC int  xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags);
-STATIC int  xlog_state_sync_all(xlog_t *log, uint flags);
+STATIC int  xlog_state_sync(xlog_t			*log,
+			    xfs_lsn_t 			lsn,
+			    uint			flags,
+			    int				*log_flushed);
+STATIC int  xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
 STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);
 
 /* local functions to manipulate grant head */
@@ -312,12 +315,17 @@
  * semaphore.
  */
 int
-xfs_log_force(xfs_mount_t *mp,
-	      xfs_lsn_t	  lsn,
-	      uint	  flags)
+_xfs_log_force(
+	xfs_mount_t	*mp,
+	xfs_lsn_t	lsn,
+	uint		flags,
+	int		*log_flushed)
 {
-	int	rval;
-	xlog_t *log = mp->m_log;
+	xlog_t		*log = mp->m_log;
+	int		dummy;
+
+	if (!log_flushed)
+		log_flushed = &dummy;
 
 #if defined(DEBUG) || defined(XLOG_NOLOG)
 	if (!xlog_debug && xlog_target == log->l_targ)
@@ -328,17 +336,12 @@
 
 	XFS_STATS_INC(xs_log_force);
 
-	if ((log->l_flags & XLOG_IO_ERROR) == 0) {
-		if (lsn == 0)
-			rval = xlog_state_sync_all(log, flags);
-		else
-			rval = xlog_state_sync(log, lsn, flags);
-	} else {
-		rval = XFS_ERROR(EIO);
-	}
-
-	return rval;
-
+	if (log->l_flags & XLOG_IO_ERROR)
+		return XFS_ERROR(EIO);
+	if (lsn == 0)
+		return xlog_state_sync_all(log, flags, log_flushed);
+	else
+		return xlog_state_sync(log, lsn, flags, log_flushed);
 }	/* xfs_log_force */
 
 /*
@@ -1467,14 +1470,13 @@
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
 	/*
-	 * Do a disk write cache flush for the log block.
-	 * This is a bit of a sledgehammer, it would be better
-	 * to use a tag barrier here that just prevents reordering.
+	 * Do an ordered write for the log block.
+	 *
 	 * It may not be needed to flush the first split block in the log wrap
 	 * case, but do it anyways to be safe -AK
 	 */
-	if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
-		XFS_BUF_FLUSH(bp);
+	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+		XFS_BUF_ORDERED(bp);
 
 	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
 	ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1505,8 +1507,8 @@
 		XFS_BUF_SET_FSPRIVATE(bp, iclog);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
-		if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
-			XFS_BUF_FLUSH(bp);
+		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
 		/*
 		 * Bump the cycle numbers at the start of each block
@@ -2951,7 +2953,7 @@
  *		not in the active nor dirty state.
  */
 STATIC int
-xlog_state_sync_all(xlog_t *log, uint flags)
+xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 {
 	xlog_in_core_t	*iclog;
 	xfs_lsn_t	lsn;
@@ -3000,6 +3002,7 @@
 
 				if (xlog_state_release_iclog(log, iclog))
 					return XFS_ERROR(EIO);
+				*log_flushed = 1;
 				s = LOG_LOCK(log);
 				if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn &&
 				    iclog->ic_state != XLOG_STATE_DIRTY)
@@ -3043,6 +3046,7 @@
 		 */
 		if (iclog->ic_state & XLOG_STATE_IOERROR)
 			return XFS_ERROR(EIO);
+		*log_flushed = 1;
 
 	} else {
 
@@ -3068,7 +3072,8 @@
 int
 xlog_state_sync(xlog_t	  *log,
 		xfs_lsn_t lsn,
-		uint	  flags)
+		uint	  flags,
+		int	  *log_flushed)
 {
     xlog_in_core_t	*iclog;
     int			already_slept = 0;
@@ -3120,6 +3125,7 @@
 			XFS_STATS_INC(xs_log_force_sleep);
 			sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
 				&log->l_icloglock, s);
+			*log_flushed = 1;
 			already_slept = 1;
 			goto try_again;
 		} else {
@@ -3128,6 +3134,7 @@
 			LOG_UNLOCK(log, s);
 			if (xlog_state_release_iclog(log, iclog))
 				return XFS_ERROR(EIO);
+			*log_flushed = 1;
 			s = LOG_LOCK(log);
 		}
 	}
@@ -3152,6 +3159,7 @@
 		 */
 		if (iclog->ic_state & XLOG_STATE_IOERROR)
 			return XFS_ERROR(EIO);
+		*log_flushed = 1;
 	} else {		/* just return */
 		LOG_UNLOCK(log, s);
 	}
@@ -3606,6 +3614,7 @@
 	xlog_ticket_t	*tic;
 	xlog_t		*log;
 	int		retval;
+	int		dummy;
 	SPLDECL(s);
 	SPLDECL(s2);
 
@@ -3684,7 +3693,7 @@
 		 * Force the incore logs to disk before shutting the
 		 * log down completely.
 		 */
-		xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC);
+		xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
 		s2 = LOG_LOCK(log);
 		retval = xlog_state_ioerror(log);
 		LOG_UNLOCK(log, s2);