[PATCH] select: fix returned timeval

With David Woodhouse <dwmw2@infradead.org>

select() presently has a habit of increasing the value of the user's
`timeout' argument on return.

We were writing back a timeout larger than the original.  We _deliberately_
round up, since we know we must wait at _least_ as long as the caller asks
us to.

The patch adds a couple of helper functions for magnitude comparison of
timespecs and of timevals, and uses them to prevent the various poll and
select functions from returning a timeout which is larger than the one which
was passed in.

The patch also fixes a bug in compat_sys_pselect7(): it was adding the new
timeout value to the old one and was returning that.  It should just return
the new timeout value.

(We have various handy timespec/timeval-to-from-nsec conversion functions in
time.h.  But this code open-codes it all).

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andi Kleen <ak@muc.de>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: george anzinger <george@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/fs/compat.c b/fs/compat.c
index 70c5af4..a2ba78b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1751,11 +1751,15 @@
 	ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tvp) {
+		struct compat_timeval rtv;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-		tv.tv_sec = timeout;
-		if (copy_to_user(tvp, &tv, sizeof(tv))) {
+		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+		rtv.tv_sec = timeout;
+		if (compat_timeval_compare(&rtv, &tv) < 0)
+			rtv = tv;
+		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -1822,13 +1826,17 @@
 	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
 
 	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
-		ts.tv_sec += timeout / HZ;
-		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
-		if (ts.tv_nsec >= 1000000000) {
-			ts.tv_sec++;
-			ts.tv_nsec -= 1000000000;
+		struct compat_timespec rts;
+
+		rts.tv_sec = timeout / HZ;
+		rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
+		if (rts.tv_nsec >= NSEC_PER_SEC) {
+			rts.tv_sec++;
+			rts.tv_nsec -= NSEC_PER_SEC;
 		}
-		(void)copy_to_user(tsp, &ts, sizeof(ts));
+		if (compat_timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		copy_to_user(tsp, &rts, sizeof(rts));
 	}
 
 	if (ret == -ERESTARTNOHAND) {
@@ -1918,12 +1926,17 @@
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
 	if (tsp && timeout >= 0) {
+		struct compat_timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
 		/* Yes, we know it's actually an s64, but it's also positive. */
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+					1000;
+		rts.tv_sec = timeout;
+		if (compat_timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
diff --git a/fs/select.c b/fs/select.c
index bc60a3e..6ce68a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -398,11 +398,15 @@
 	ret = core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tvp) {
+		struct timeval rtv;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-		tv.tv_sec = timeout;
-		if (copy_to_user(tvp, &tv, sizeof(tv))) {
+		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+		rtv.tv_sec = timeout;
+		if (timeval_compare(&rtv, &tv) < 0)
+			rtv = tv;
+		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -460,11 +464,16 @@
 	ret = core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tsp) {
+		struct timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+						1000;
+		rts.tv_sec = timeout;
+		if (timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -758,12 +767,17 @@
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
 	if (tsp && timeout >= 0) {
+		struct timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
 		/* Yes, we know it's actually an s64, but it's also positive. */
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+						1000;
+		rts.tv_sec = timeout;
+		if (timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 		sticky:
 			/*
 			 * If an application puts its timeval in read-only
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f9ca534..c9ab2a2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -161,5 +161,25 @@
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
+static inline int compat_timeval_compare(struct compat_timeval *lhs,
+					struct compat_timeval *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_usec - rhs->tv_usec;
+}
+
+static inline int compat_timespec_compare(struct compat_timespec *lhs,
+					struct compat_timespec *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_nsec - rhs->tv_nsec;
+}
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/time.h b/include/linux/time.h
index 7b4dc365..d9cdba5 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -33,11 +33,34 @@
 #define NSEC_PER_SEC		1000000000L
 #define NSEC_PER_USEC		1000L
 
-static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
+static inline int timespec_equal(struct timespec *a, struct timespec *b)
 {
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 }
 
+/*
+ * lhs < rhs:  return <0
+ * lhs == rhs: return 0
+ * lhs > rhs:  return >0
+ */
+static inline int timespec_compare(struct timespec *lhs, struct timespec *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_nsec - rhs->tv_nsec;
+}
+
+static inline int timeval_compare(struct timeval *lhs, struct timeval *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_usec - rhs->tv_usec;
+}
+
 extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 			    const unsigned int day, const unsigned int hour,
 			    const unsigned int min, const unsigned int sec);