MIPS: Octeon: Use non-overflowing arithmetic in sched_clock

With typical mult and shift values, the calculation for Octeon's sched_clock
overflows when using 64-bit arithmetic.  Use 128-bit calculations instead.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/849/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 96df821..0bf4bbe 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -52,9 +52,34 @@
 
 unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(read_c0_cvmcount(),
-				  clocksource_mips.mult,
-				  clocksource_mips.shift);
+	/* 64-bit arithmatic can overflow, so use 128-bit.  */
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 3))
+	u64 t1, t2, t3;
+	unsigned long long rv;
+	u64 mult = clocksource_mips.mult;
+	u64 shift = clocksource_mips.shift;
+	u64 cnt = read_c0_cvmcount();
+
+	asm (
+		"dmultu\t%[cnt],%[mult]\n\t"
+		"nor\t%[t1],$0,%[shift]\n\t"
+		"mfhi\t%[t2]\n\t"
+		"mflo\t%[t3]\n\t"
+		"dsll\t%[t2],%[t2],1\n\t"
+		"dsrlv\t%[rv],%[t3],%[shift]\n\t"
+		"dsllv\t%[t1],%[t2],%[t1]\n\t"
+		"or\t%[rv],%[t1],%[rv]\n\t"
+		: [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+		: [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
+		: "hi", "lo");
+	return rv;
+#else
+	/* GCC > 4.3 do it the easy way.  */
+	unsigned int __attribute__((mode(TI))) t;
+	t = read_c0_cvmcount();
+	t = t * clocksource_mips.mult;
+	return (unsigned long long)(t >> clocksource_mips.shift);
+#endif
 }
 
 void __init plat_time_init(void)