arm: Move to upstream udelay via timer implementation
This is a squash of a handful of changes and reverts of the
Qualcomm specific implementation:
Revert "arm: Implement a timer based __delay() loop"
This reverts commit 976eafa8b18252876e15f861944acf693b07ce7e.
Revert "arm: Allow machines to override __delay()"
This reverts commit bc0ef8ab167272890f1aab62928b04a9aeb87ce9.
Revert "arm: Translate delay.S into (mostly) C"
This reverts commit 8d5868d8205d10a0a8e423f53e9cc9bb3e9d1a34.
ARM: 7451/1: arch timer: implement read_current_timer and get_cycles
This patch implements read_current_timer using the architected timers
when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected
not to be usable at runtime, we return -ENXIO to the caller.
Furthermore, if read_current_timer is exported then we can implement
get_cycles in terms of it for use as both an entropy source and for
implementing __udelay and friends.
Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
ARM: 7452/1: delay: allow timer-based delay implementation to be
selected
This patch allows a timer-based delay implementation to be selected by
switching the delay routines over to use get_cycles, which is
implemented in terms of read_current_timer. This further allows us to
skip the loop calibration and have a consistent delay function in the
face of core frequency scaling.
To avoid the pain of dealing with memory-mapped counters, this
implementation uses the co-processor interface to the architected timers
when they are available. The previous loop-based implementation is
kept around for CPUs without the architected timers and we retain both
the maximum delay (2ms) and the corresponding conversion factors for
determining the number of loops required for a given interval. Since the
indirection of the timer routines will only work when called from C,
the sa1100 sleep routines are modified to branch to the loop-based delay
functions directly.
Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
ARM: delay: set loops_per_jiffy when moving to timer-based loop
The delay functions may be called by some platforms between switching to
the timer-based delay loop but before calibration. In this case, the
initial loops_per_jiffy may not be suitable for the timer (although a
compromise may be achievable) and delay times may be considered too
inaccurate.
This patch updates loops_per_jiffy when switching to the timer-based
delay loop so that delays are consistent prior to calibration.
Signed-off-by: Will Deacon <will.deacon@arm.com>
ARM: delay: add registration mechanism for delay timer sources
The current timer-based delay loop relies on the architected timer to
initiate the switch away from the polling-based implementation. This is
unfortunate for platforms without the architected timers but with a
suitable delay source (that is, constant frequency, always powered-up
and ticking as long as the CPUs are online).
This patch introduces a registration mechanism for the delay timer
(which provides an unconditional read_current_timer implementation) and
updates the architected timer code to use the new interface.
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
ARM: export default read_current_timer
read_current_timer is used by get_cycles since "ARM: 7538/1: delay:
add registration mechanism for delay timer sources", and get_cycles
can be used by device drivers in loadable modules, so it has to
be exported.
Without this patch, building imote2_defconfig fails with
ERROR: "read_current_timer" [crypto/tcrypt.ko] undefined!
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Jonathan Austin <jonathan.austin@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Change-Id: If1ad095d6852f5966ea995856103e06de6ab2f59
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index e32194f..bdfdfa0 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -3,6 +3,7 @@
#include <linux/ioport.h>
#include <linux/clocksource.h>
+#include <asm/errno.h>
struct arch_timer {
struct resource res[3];
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index 5c6b9a3..ab98fdd 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,27 @@
#ifndef __ASM_ARM_DELAY_H
#define __ASM_ARM_DELAY_H
+#include <asm/memory.h>
#include <asm/param.h> /* HZ */
-extern void __delay(unsigned long loops);
+#define MAX_UDELAY_MS 2
+#define UDELAY_MULT ((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT 30
+
+#ifndef __ASSEMBLY__
+
+struct delay_timer {
+ unsigned long (*read_current_timer)(void);
+ unsigned long freq;
+};
+
+extern struct arm_delay_ops {
+ void (*delay)(unsigned long);
+ void (*const_udelay)(unsigned long);
+ void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n) arm_delay_ops.delay(n)
/*
* This function intentionally does not exist; if you see references to
@@ -23,25 +41,31 @@
* division by multiplication: you don't have to worry about
* loss of precision.
*
- * Use only for very small delays ( < 1 msec). Should probably use a
+ * Use only for very small delays ( < 2 msec). Should probably use a
* lookup table, really, as the multiplications take much too long with
* short delays. This is a "reasonable" implementation, though (and the
* first constant multiplications gets optimized away if the delay is
* a constant)
*/
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n) arm_delay_ops.udelay(n)
+#define __const_udelay(n) arm_delay_ops.const_udelay(n)
#define udelay(n) \
(__builtin_constant_p(n) ? \
((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \
- __const_udelay((n) * ((2199023U*HZ)>>11))) : \
+ __const_udelay((n) * UDELAY_MULT)) : \
__udelay(n))
-extern void set_delay_fn(void (*fn)(unsigned long));
-extern void read_current_timer_delay_loop(unsigned long loops);
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+/* Delay-loop timer registration. */
+#define ARCH_HAS_READ_CURRENT_TIMER
+extern void register_current_timer_delay(const struct delay_timer *timer);
+
+#endif /* __ASSEMBLY__ */
#endif /* defined(_ARM_DELAY_H) */
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3..9acc135 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -15,10 +15,6 @@
#include <mach/timex.h>
typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
- return 0;
-}
+#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
#endif
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index 2455d1f..2b3667f 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -25,6 +25,7 @@
#include <linux/export.h>
#include <asm/cputype.h>
+#include <asm/delay.h>
#include <asm/localtimer.h>
#include <asm/arch_timer.h>
#include <asm/sched_clock.h>
@@ -72,6 +73,8 @@
static struct arch_timer_operations *arch_specific_timer = &arch_timer_ops_cp15;
+static struct delay_timer arch_delay_timer;
+
/*
* Architected system timer support.
*/
@@ -331,13 +334,10 @@
return arch_counter_get_cntpct();
}
-#ifdef ARCH_HAS_READ_CURRENT_TIMER
-int read_current_timer(unsigned long *timer_val)
+static unsigned long arch_timer_read_current_timer(void)
{
- *timer_val = (unsigned long)arch_specific_timer->get_cntpct();
- return 0;
+ return arch_counter_get_cntpct();
}
-#endif
static struct clocksource clocksource_counter = {
.name = "arch_sys_counter",
@@ -402,10 +402,6 @@
setup_sched_clock(arch_timer_update_sched_clock, 32, arch_timer_rate);
-#ifdef ARCH_HAS_READ_CURRENT_TIMER
- set_delay_fn(read_current_timer_delay_loop);
-#endif
-
if (is_irq_percpu)
err = request_percpu_irq(arch_timer_ppi, arch_timer_handler,
"arch_timer", arch_timer_evt);
@@ -439,6 +435,10 @@
goto out_free_irq;
percpu_timer_setup();
+ /* Use the architected timer for the delay loop. */
+ arch_delay_timer.read_current_timer = &arch_timer_read_current_timer;
+ arch_delay_timer.freq = arch_timer_rate;
+ register_current_timer_delay(&arch_delay_timer);
return 0;
out_free_irq:
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index f1a50f3..7196228 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -48,6 +48,9 @@
extern void fpundefinstr(void);
+ /* platform dependent support */
+EXPORT_SYMBOL(arm_delay_ops);
+
/* networking */
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 0ade0ac..8ade75d 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,7 +6,7 @@
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
- delay.o findbit.o memchr.o memcpy.o \
+ delay.o delay-loop.o findbit.o memchr.o memcpy.o \
memmove.o memset.o memzero.o setbit.o \
strncpy_from_user.o strnlen_user.o \
strchr.o strrchr.o \
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
new file mode 100644
index 0000000..36b668d
--- /dev/null
+++ b/arch/arm/lib/delay-loop.S
@@ -0,0 +1,67 @@
+/*
+ * linux/arch/arm/lib/delay.S
+ *
+ * Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/delay.h>
+ .text
+
+.LC0: .word loops_per_jiffy
+.LC1: .word UDELAY_MULT
+
+/*
+ * r0 <= 2000
+ * lpj <= 0x01ffffff (max. 3355 bogomips)
+ * HZ <= 1000
+ */
+
+ENTRY(__loop_udelay)
+ ldr r2, .LC1
+ mul r0, r2, r0
+ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
+ mov r1, #-1
+ ldr r2, .LC0
+ ldr r2, [r2] @ max = 0x01ffffff
+ add r0, r0, r1, lsr #32-14
+ mov r0, r0, lsr #14 @ max = 0x0001ffff
+ add r2, r2, r1, lsr #32-10
+ mov r2, r2, lsr #10 @ max = 0x00007fff
+ mul r0, r2, r0 @ max = 2^32-1
+ add r0, r0, r1, lsr #32-6
+ movs r0, r0, lsr #6
+ moveq pc, lr
+
+/*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ */
+
+@ Delay routine
+ENTRY(__loop_delay)
+ subs r0, r0, #1
+#if 0
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+#endif
+ bhi __loop_delay
+ mov pc, lr
+ENDPROC(__loop_udelay)
+ENDPROC(__loop_const_udelay)
+ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index fc9a37c..0dc5385 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -1,90 +1,90 @@
/*
- * Originally from linux/arch/arm/lib/delay.S
+ * Delay loops based on the OpenRISC implementation.
*
- * Copyright (C) 1995, 1996 Russell King
- * Copyright (c) 2010, Code Aurora Forum. All rights reserved.
- * Copyright (C) 1993 Linus Torvalds
- * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- * Copyright (C) 2005-2006 Atmel Corporation
+ * Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
*/
-#include <linux/module.h>
+
#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/timex.h>
/*
- * Oh, if only we had a cycle counter...
+ * Default to the loop-based delay implementation.
*/
-void delay_loop(unsigned long loops)
+struct arm_delay_ops arm_delay_ops = {
+ .delay = __loop_delay,
+ .const_udelay = __loop_const_udelay,
+ .udelay = __loop_udelay,
+};
+
+static const struct delay_timer *delay_timer;
+static bool delay_calibrated;
+
+int read_current_timer(unsigned long *timer_val)
{
- asm volatile(
- "1: subs %0, %0, #1 \n"
- " bhi 1b \n"
- : /* No output */
- : "r" (loops)
- );
+ if (!delay_timer)
+ return -ENXIO;
+
+ *timer_val = delay_timer->read_current_timer();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(read_current_timer);
+
+static void __timer_delay(unsigned long cycles)
+{
+ cycles_t start = get_cycles();
+
+ while ((get_cycles() - start) < cycles)
+ cpu_relax();
}
-#ifdef ARCH_HAS_READ_CURRENT_TIMER
-/*
- * Assuming read_current_timer() is monotonically increasing
- * across calls.
- */
-void read_current_timer_delay_loop(unsigned long loops)
+static void __timer_const_udelay(unsigned long xloops)
{
- unsigned long bclock, now;
-
- read_current_timer(&bclock);
- do {
- read_current_timer(&now);
- } while ((now - bclock) < loops);
-}
-#endif
-
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void set_delay_fn(void (*fn)(unsigned long))
-{
- delay_fn = fn;
+ unsigned long long loops = xloops;
+ loops *= loops_per_jiffy;
+ __timer_delay(loops >> UDELAY_SHIFT);
}
-/*
- * loops = usecs * HZ * loops_per_jiffy / 1000000
- */
-void __delay(unsigned long loops)
+static void __timer_udelay(unsigned long usecs)
{
- delay_fn(loops);
+ __timer_const_udelay(usecs * UDELAY_MULT);
}
-EXPORT_SYMBOL(__delay);
-/*
- * 0 <= xloops <= 0x7fffff06
- * loops_per_jiffy <= 0x01ffffff (max. 3355 bogomips)
- */
-void __const_udelay(unsigned long xloops)
+void __init register_current_timer_delay(const struct delay_timer *timer)
{
- unsigned long lpj;
- unsigned long loops;
-
- xloops >>= 14; /* max = 0x01ffffff */
- lpj = loops_per_jiffy >> 10; /* max = 0x0001ffff */
- loops = lpj * xloops; /* max = 0x00007fff */
- loops >>= 6; /* max = 2^32-1 */
-
- if (loops)
- __delay(loops);
+ if (!delay_calibrated) {
+ pr_info("Switching to timer-based delay loop\n");
+ delay_timer = timer;
+ lpj_fine = timer->freq / HZ;
+ loops_per_jiffy = lpj_fine;
+ arm_delay_ops.delay = __timer_delay;
+ arm_delay_ops.const_udelay = __timer_const_udelay;
+ arm_delay_ops.udelay = __timer_udelay;
+ delay_calibrated = true;
+ } else {
+ pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
+ }
}
-EXPORT_SYMBOL(__const_udelay);
-/*
- * usecs <= 2000
- * HZ <= 1000
- */
-void __udelay(unsigned long usecs)
+unsigned long __cpuinit calibrate_delay_is_known(void)
{
- __const_udelay(usecs * ((2199023UL*HZ)>>11));
+ delay_calibrated = true;
+ return lpj_fine;
}
-EXPORT_SYMBOL(__udelay);
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 212ad77..e3a29ff 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -942,11 +942,12 @@
return cs->read(NULL);
}
-int read_current_timer(unsigned long *timer_val)
+static struct delay_timer msm_delay_timer;
+
+static unsigned long msm_read_current_timer(void)
{
struct msm_clock *dgt = &msm_clocks[MSM_CLOCK_DGT];
- *timer_val = msm_read_timer_count(dgt, GLOBAL_TIMER);
- return 0;
+ return msm_read_timer_count(dgt, GLOBAL_TIMER);
}
static void __init msm_sched_clock_init(void)
@@ -1183,13 +1184,13 @@
}
}
-#ifdef ARCH_HAS_READ_CURRENT_TIMER
if (is_smp()) {
__raw_writel(1,
msm_clocks[MSM_CLOCK_DGT].regbase + TIMER_ENABLE);
- set_delay_fn(read_current_timer_delay_loop);
+ msm_delay_timer.freq = dgt->freq;
+ msm_delay_timer.read_current_timer = &msm_read_current_timer;
+ register_current_timer_delay(&msm_delay_timer);
}
-#endif
#ifdef CONFIG_LOCAL_TIMERS
local_timer_register(&msm_lt_ops);
diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S
index 30cc672..8586374 100644
--- a/arch/arm/mach-sa1100/sleep.S
+++ b/arch/arm/mach-sa1100/sleep.S
@@ -38,9 +38,9 @@
orr r4, r4, #MDREFR_K1DB2
ldr r5, =PPCR
- @ Pre-load __udelay into the I-cache
+ @ Pre-load __loop_udelay into the I-cache
mov r0, #1
- bl __udelay
+ bl __loop_udelay
mov r0, r0
@ The following must all exist in a single cache line to
@@ -53,11 +53,11 @@
@ delay 90us and set CPU PLL to lowest speed
@ fixes resume problem on high speed SA1110
mov r0, #90
- bl __udelay
+ bl __loop_udelay
mov r1, #0
str r1, [r5]
mov r0, #90
- bl __udelay
+ bl __loop_udelay
/*
* SA1110 SDRAM controller workaround. register values: