[ARM] orion: sched_clock implementation for orion platforms

sched_clock implementation for orion platform. Its realized using
free-running clocksource timer, which provides a resolution of 7.5ns
(depending on tclk). It's derived from PXA's sched_clock implementation.

[ nico: renamed orion2ns to tclk2ns, fixed max value in the comment ]

Signed-off-by: Stefan Agner <stefan.agner@yahoo.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index de8a001..b856cec 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -17,6 +17,9 @@
 #include <linux/irq.h>
 #include <asm/mach/time.h>
 #include <mach/bridge-regs.h>
+#include <mach/hardware.h>
+#include <linux/sched.h>
+#include <linux/cnt32_to_63.h>
 
 /*
  * Number of timer ticks per jiffy.
@@ -39,6 +42,36 @@
 
 
 /*
+ * Orion's sched_clock implementation. It has a resolution of
+ * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
+ */
+#define TCLK2NS_SCALE_FACTOR 8
+
+static unsigned long tclk2ns_scale;
+
+static void __init set_tclk2ns_scale(unsigned long tclk)
+{
+	unsigned long long v = NSEC_PER_SEC;
+	v <<= TCLK2NS_SCALE_FACTOR;
+	v += tclk/2;
+	do_div(v, tclk);
+	/*
+	 * We want an even value to automatically clear the top bit
+	 * returned by cnt32_to_63() without an additional run time
+	 * instruction. So if the LSB is 1 then round it up.
+	 */
+	if (v & 1)
+		v++;
+	tclk2ns_scale = v;
+}
+
+unsigned long long sched_clock(void)
+{
+	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
+	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+}
+
+/*
  * Clocksource handling.
  */
 static cycle_t orion_clksrc_read(struct clocksource *cs)
@@ -176,6 +209,10 @@
 
 	ticks_per_jiffy = (tclk + HZ/2) / HZ;
 
+	/*
+	 * Set scale for sched_clock
+	 */
+	set_tclk2ns_scale(tclk);
 
 	/*
 	 * Setup free-running clocksource timer (interrupts