Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 2 | #ifndef _M68K_DELAY_H |
| 3 | #define _M68K_DELAY_H |
| 4 | |
| 5 | #include <asm/param.h> |
| 6 | |
| 7 | /* |
| 8 | * Copyright (C) 1994 Hamish Macdonald |
| 9 | * Copyright (C) 2004 Greg Ungerer <gerg@uclinux.com> |
| 10 | * |
| 11 | * Delay routines, using a pre-computed "loops_per_jiffy" value. |
| 12 | */ |
| 13 | |
| 14 | #if defined(CONFIG_COLDFIRE) |
| 15 | /* |
| 16 | * The ColdFire runs the delay loop at significantly different speeds |
| 17 | * depending upon long word alignment or not. We'll pad it to |
| 18 | * long word alignment which is the faster version. |
| 19 | * The 0x4a8e is of course a 'tstl %fp' instruction. This is better |
| 20 | * than using a NOP (0x4e71) instruction because it executes in one |
| 21 | * cycle not three and doesn't allow for an arbitrary delay waiting |
| 22 | * for bus cycles to finish. Also fp/a6 isn't likely to cause a |
| 23 | * stall waiting for the register to become valid if such is added |
| 24 | * to the coldfire at some stage. |
| 25 | */ |
| 26 | #define DELAY_ALIGN ".balignw 4, 0x4a8e\n\t" |
Sam Ravnborg | 4914802 | 2009-01-16 21:58:10 +1000 | [diff] [blame] | 27 | #else |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 28 | /* |
| 29 | * No instruction alignment required for other m68k types. |
| 30 | */ |
| 31 | #define DELAY_ALIGN |
Sam Ravnborg | 4914802 | 2009-01-16 21:58:10 +1000 | [diff] [blame] | 32 | #endif |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 33 | |
| 34 | static inline void __delay(unsigned long loops) |
| 35 | { |
| 36 | __asm__ __volatile__ ( |
| 37 | DELAY_ALIGN |
| 38 | "1: subql #1,%0\n\t" |
| 39 | "jcc 1b" |
| 40 | : "=d" (loops) |
| 41 | : "0" (loops)); |
| 42 | } |
| 43 | |
| 44 | extern void __bad_udelay(void); |
| 45 | |
| 46 | |
Geert Uytterhoeven | 5df58f3 | 2012-06-06 18:35:13 +0200 | [diff] [blame] | 47 | #ifdef CONFIG_CPU_HAS_NO_MULDIV64 |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 48 | /* |
| 49 | * The simpler m68k and ColdFire processors do not have a 32*32->64 |
| 50 | * multiply instruction. So we need to handle them a little differently. |
| 51 | * We use a bit of shifting and a single 32*32->32 multiply to get close. |
| 52 | * This is a macro so that the const version can factor out the first |
| 53 | * multiply and shift. |
| 54 | */ |
| 55 | #define HZSCALE (268435456 / (1000000 / HZ)) |
| 56 | |
| 57 | #define __const_udelay(u) \ |
| 58 | __delay(((((u) * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) |
| 59 | |
| 60 | #else |
| 61 | |
| 62 | static inline void __xdelay(unsigned long xloops) |
| 63 | { |
| 64 | unsigned long tmp; |
| 65 | |
| 66 | __asm__ ("mulul %2,%0:%1" |
| 67 | : "=d" (xloops), "=d" (tmp) |
| 68 | : "d" (xloops), "1" (loops_per_jiffy)); |
| 69 | __delay(xloops * HZ); |
| 70 | } |
| 71 | |
| 72 | /* |
| 73 | * The definition of __const_udelay is specifically made a macro so that |
| 74 | * the const factor (4295 = 2**32 / 1000000) can be optimized out when |
| 75 | * the delay is a const. |
| 76 | */ |
| 77 | #define __const_udelay(n) (__xdelay((n) * 4295)) |
| 78 | |
| 79 | #endif |
| 80 | |
| 81 | static inline void __udelay(unsigned long usecs) |
| 82 | { |
| 83 | __const_udelay(usecs); |
| 84 | } |
| 85 | |
| 86 | /* |
| 87 | * Use only for very small delays ( < 1 msec). Should probably use a |
| 88 | * lookup table, really, as the multiplications take much too long with |
| 89 | * short delays. This is a "reasonable" implementation, though (and the |
| 90 | * first constant multiplications gets optimized away if the delay is |
| 91 | * a constant) |
| 92 | */ |
| 93 | #define udelay(n) (__builtin_constant_p(n) ? \ |
| 94 | ((n) > 20000 ? __bad_udelay() : __const_udelay(n)) : __udelay(n)) |
| 95 | |
Michael Schmitz | c8ee038 | 2013-04-06 13:26:44 +1300 | [diff] [blame] | 96 | /* |
| 97 | * nanosecond delay: |
| 98 | * |
| 99 | * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) is the number of loops |
| 100 | * per microsecond |
| 101 | * |
| 102 | * 1000 / ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) is the number of |
| 103 | * nanoseconds per loop |
| 104 | * |
| 105 | * So n / ( 1000 / ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) ) would |
| 106 | * be the number of loops for n nanoseconds |
| 107 | */ |
| 108 | |
| 109 | /* |
| 110 | * The simpler m68k and ColdFire processors do not have a 32*32->64 |
| 111 | * multiply instruction. So we need to handle them a little differently. |
| 112 | * We use a bit of shifting and a single 32*32->32 multiply to get close. |
| 113 | * This is a macro so that the const version can factor out the first |
| 114 | * multiply and shift. |
| 115 | */ |
| 116 | #define HZSCALE (268435456 / (1000000 / HZ)) |
| 117 | |
Boris Brezillon | 7e251bb | 2016-10-28 17:12:28 +0200 | [diff] [blame] | 118 | #define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)) |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 119 | |
| 120 | #endif /* defined(_M68K_DELAY_H) */ |