Merge branch 'master' into gfio

Conflicts:
	fio.c
	fio.h

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/HOWTO b/HOWTO
index 3a0fdd5..eeaddba 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1381,7 +1381,8 @@
 		latency, since queue/complete is one operation there. This
 		value can be in milliseconds or microseconds, fio will choose
 		the most appropriate base and print that. In the example
-		above, milliseconds is the best scale.
+		above, milliseconds is the best scale. Note: in --minimal mode
+		latencies are always expressed in microseconds.
 	clat=	Completion latency. Same names as slat, this denotes the
 		time from submission to completion of the io pieces. For
 		sync io, clat will usually be equal (or very close) to 0,
@@ -1473,14 +1474,14 @@
 		Completion latency: min, max, mean, deviation (usec)
 		Completion latency percentiles: 20 fields (see below)
 		Total latency: min, max, mean, deviation (usec)
-		Bw: min, max, aggregate percentage of total, mean, deviation
+		Bw (KB/s): min, max, aggregate percentage of total, mean, deviation
 	WRITE status:
 		Total IO (KB), bandwidth (KB/sec), IOPS, runtime (msec)
 		Submission latency: min, max, mean, deviation (usec)
 		Completion latency: min, max, mean, deviation (usec)
 		Completion latency percentiles: 20 fields (see below)
 		Total latency: min, max, mean, deviation (usec)
-		Bw: min, max, aggregate percentage of total, mean, deviation
+		Bw (KB/s): min, max, aggregate percentage of total, mean, deviation
 	CPU usage: user, system, context switches, major faults, minor faults
 	IO depths: <=1, 2, 4, 8, 16, 32, >=64
 	IO latencies microseconds: <=2, 4, 10, 20, 50, 100, 250, 500, 750, 1000
diff --git a/engines/windowsaio.c b/engines/windowsaio.c
index 766cc5d..ea89969 100644
--- a/engines/windowsaio.c
+++ b/engines/windowsaio.c
@@ -82,16 +82,16 @@
 	}
 
 	if (!rc) {
-	    for (i = 0; i < td->o.iodepth; i++) {
-	        wd->ovls[i].io_free = TRUE;
-	        wd->ovls[i].io_complete = FALSE;
+		for (i = 0; i < td->o.iodepth; i++) {
+			wd->ovls[i].io_free = TRUE;
+			wd->ovls[i].io_complete = FALSE;
 
 			wd->ovls[i].o.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
 			if (wd->ovls[i].o.hEvent == NULL) {
 				rc = 1;
 				break;
 			}
-	    }
+		}
 	}
 
 	if (!rc) {
@@ -287,6 +287,7 @@
 			if (fov->io_complete) {
 				fov->io_complete = FALSE;
 				fov->io_free  = TRUE;
+				ResetEvent(fov->o.hEvent);
 				wd->aio_events[dequeued] = io_u;
 				dequeued++;
 			}
@@ -297,7 +298,7 @@
 
 		if (dequeued < min) {
 			status = WaitForSingleObject(wd->iocomplete_event, mswait);
-			if (status != WAIT_OBJECT_0 && dequeued > 0)
+			if (status != WAIT_OBJECT_0 && dequeued >= min)
 			    break;
 		}
 
@@ -322,17 +323,15 @@
 	wd = td->io_ops->data;
 
 	for (index = 0; index < td->o.iodepth; index++) {
-		if (wd->ovls[index].io_free) {
-			wd->ovls[index].io_free = FALSE;
-			ResetEvent(wd->ovls[index].o.hEvent);
+		if (wd->ovls[index].io_free)
 			break;
-		}
 	}
 
 	assert(index < td->o.iodepth);
 
-	lpOvl = &wd->ovls[index].o;
+	wd->ovls[index].io_free = FALSE;
 	wd->ovls[index].io_u = io_u;
+	lpOvl = &wd->ovls[index].o;
 	lpOvl->Internal = STATUS_PENDING;
 	lpOvl->InternalHigh = 0;
 	lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
diff --git a/fio.h b/fio.h
index 8246e38..95d9d77 100644
--- a/fio.h
+++ b/fio.h
@@ -314,7 +314,7 @@
 extern int groupid;
 extern int terse_output;
 extern int temp_stall_ts;
-extern unsigned long page_mask, page_size;
+extern uintptr_t page_mask, page_size;
 extern int read_only;
 extern int eta_print;
 extern unsigned long done_secs;
diff --git a/iolog.c b/iolog.c
index 0e49236..137c1e9 100644
--- a/iolog.c
+++ b/iolog.c
@@ -238,7 +238,9 @@
 		else if (ipo->offset > __ipo->offset)
 			p = &(*p)->rb_right;
 		else {
-			assert(ipo->len == __ipo->len);
+			dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
+				__ipo->offset, __ipo->len,
+				ipo->offset, ipo->len);
 			td->io_hist_len--;
 			rb_erase(parent, &td->io_hist_tree);
 			remove_trim_entry(td, __ipo);
diff --git a/libfio.c b/libfio.c
index acdb0ea..36876dd 100644
--- a/libfio.c
+++ b/libfio.c
@@ -41,8 +41,8 @@
 
 unsigned long arch_flags = 0;
 
-unsigned long page_mask;
-unsigned long page_size;
+uintptr_t page_mask;
+uintptr_t page_size;
 
 static const char *fio_os_strings[os_nr] = {
 	"Invalid",
diff --git a/os/os-windows.h b/os/os-windows.h
index 06fe433..8b801ed 100644
--- a/os/os-windows.h
+++ b/os/os-windows.h
@@ -20,6 +20,7 @@
 #define FIO_HAVE_CHARDEV_SIZE
 #define FIO_HAVE_FDATASYNC
 #define FIO_HAVE_WINDOWSAIO
+#define FIO_HAVE_FALLOCATE
 #define FIO_HAVE_GETTID
 #define FIO_HAVE_CLOCK_MONOTONIC
 #define FIO_USE_GENERIC_RAND
diff --git a/os/windows/posix.c b/os/windows/posix.c
index ba7abb5..9ef369e 100755
--- a/os/windows/posix.c
+++ b/os/windows/posix.c
@@ -331,9 +331,43 @@
 
 int posix_fallocate(int fd, off_t offset, off_t len)
 {
-	log_err("%s is not implemented\n", __func__);
-	errno = ENOSYS;
-	return (-1);
+	const int BUFFER_SIZE = 64*1024*1024;
+	int rc = 0;
+	char *buf;
+	unsigned int write_len;
+	unsigned int bytes_written;
+	off_t bytes_remaining = len;
+
+	if (len == 0 || offset < 0)
+		return EINVAL;
+
+	buf = malloc(BUFFER_SIZE);
+
+	if (buf == NULL)
+		return ENOMEM;
+
+	memset(buf, 0, BUFFER_SIZE);
+
+	if (lseek(fd, offset, SEEK_SET) == -1)
+		return errno;
+
+	while (bytes_remaining > 0) {
+		if (bytes_remaining < BUFFER_SIZE)
+			write_len = (unsigned int)bytes_remaining;
+		else
+			write_len = BUFFER_SIZE;
+
+		bytes_written = _write(fd, buf, write_len);
+		if (bytes_written == -1) {
+			rc = errno;
+			break;
+		}
+
+		bytes_remaining -= bytes_written;
+	}
+
+	free(buf);
+	return rc;
 }
 
 int ftruncate(int fildes, off_t length)
@@ -545,7 +579,7 @@
 	int rc;
 
 	if (timeout != -1)
-		to = &tv;		
+		to = &tv;
 
 	to->tv_sec = timeout / 1000;
 	to->tv_usec = (timeout % 1000) * 1000;
@@ -567,7 +601,7 @@
 		if (fds[i].events & POLLOUT)
 			FD_SET(fds[i].fd, &writefds);
 
-		FD_SET(fds[i].fd, &exceptfds);		
+		FD_SET(fds[i].fd, &exceptfds);
 	}
 
 	rc = select(nfds, &readfds, &writefds, &exceptfds, to);
diff --git a/stat.c b/stat.c
index f15ebeb..eff1a27 100644
--- a/stat.c
+++ b/stat.c
@@ -1010,10 +1010,11 @@
 
 			bw = 0;
 			if (ts->runtime[j]) {
-				unsigned long runt;
+				unsigned long runt = ts->runtime[j];
+				unsigned long long kb;
 
-				runt = ts->runtime[j];
-				bw = ts->io_bytes[j] / runt;
+				kb = ts->io_bytes[j] / rs->kb_base;
+				bw = kb * 1000 / runt;
 			}
 			if (bw < rs->min_bw[j])
 				rs->min_bw[j] = bw;
@@ -1025,16 +1026,12 @@
 	}
 
 	for (i = 0; i < groupid + 1; i++) {
-		unsigned long max_run[2];
-
 		rs = &runstats[i];
-		max_run[0] = rs->max_run[0];
-		max_run[1] = rs->max_run[1];
 
 		if (rs->max_run[0])
-			rs->agg[0] = (rs->io_kb[0] * 1000) / max_run[0];
+			rs->agg[0] = (rs->io_kb[0] * 1000) / rs->max_run[0];
 		if (rs->max_run[1])
-			rs->agg[1] = (rs->io_kb[1] * 1000) / max_run[1];
+			rs->agg[1] = (rs->io_kb[1] * 1000) / rs->max_run[1];
 	}
 
 	/*