Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 1 | #ifndef _M68K_DELAY_H |
| 2 | #define _M68K_DELAY_H |
| 3 | |
| 4 | #include <asm/param.h> |
| 5 | |
| 6 | /* |
| 7 | * Copyright (C) 1994 Hamish Macdonald |
| 8 | * Copyright (C) 2004 Greg Ungerer <gerg@uclinux.com> |
| 9 | * |
| 10 | * Delay routines, using a pre-computed "loops_per_jiffy" value. |
| 11 | */ |
| 12 | |
| 13 | #if defined(CONFIG_COLDFIRE) |
| 14 | /* |
| 15 | * The ColdFire runs the delay loop at significantly different speeds |
| 16 | * depending upon long word alignment or not. We'll pad it to |
| 17 | * long word alignment which is the faster version. |
| 18 | * The 0x4a8e is of course a 'tstl %fp' instruction. This is better |
| 19 | * than using a NOP (0x4e71) instruction because it executes in one |
| 20 | * cycle not three and doesn't allow for an arbitrary delay waiting |
| 21 | * for bus cycles to finish. Also fp/a6 isn't likely to cause a |
| 22 | * stall waiting for the register to become valid if such is added |
| 23 | * to the coldfire at some stage. |
| 24 | */ |
| 25 | #define DELAY_ALIGN ".balignw 4, 0x4a8e\n\t" |
Sam Ravnborg | 4914802 | 2009-01-16 21:58:10 +1000 | [diff] [blame] | 26 | #else |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 27 | /* |
| 28 | * No instruction alignment required for other m68k types. |
| 29 | */ |
| 30 | #define DELAY_ALIGN |
Sam Ravnborg | 4914802 | 2009-01-16 21:58:10 +1000 | [diff] [blame] | 31 | #endif |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 32 | |
| 33 | static inline void __delay(unsigned long loops) |
| 34 | { |
| 35 | __asm__ __volatile__ ( |
| 36 | DELAY_ALIGN |
| 37 | "1: subql #1,%0\n\t" |
| 38 | "jcc 1b" |
| 39 | : "=d" (loops) |
| 40 | : "0" (loops)); |
| 41 | } |
| 42 | |
| 43 | extern void __bad_udelay(void); |
| 44 | |
| 45 | |
Geert Uytterhoeven | 5df58f3 | 2012-06-06 18:35:13 +0200 | [diff] [blame] | 46 | #ifdef CONFIG_CPU_HAS_NO_MULDIV64 |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 47 | /* |
| 48 | * The simpler m68k and ColdFire processors do not have a 32*32->64 |
| 49 | * multiply instruction. So we need to handle them a little differently. |
| 50 | * We use a bit of shifting and a single 32*32->32 multiply to get close. |
| 51 | * This is a macro so that the const version can factor out the first |
| 52 | * multiply and shift. |
| 53 | */ |
| 54 | #define HZSCALE (268435456 / (1000000 / HZ)) |
| 55 | |
| 56 | #define __const_udelay(u) \ |
| 57 | __delay(((((u) * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) |
| 58 | |
| 59 | #else |
| 60 | |
| 61 | static inline void __xdelay(unsigned long xloops) |
| 62 | { |
| 63 | unsigned long tmp; |
| 64 | |
| 65 | __asm__ ("mulul %2,%0:%1" |
| 66 | : "=d" (xloops), "=d" (tmp) |
| 67 | : "d" (xloops), "1" (loops_per_jiffy)); |
| 68 | __delay(xloops * HZ); |
| 69 | } |
| 70 | |
| 71 | /* |
| 72 | * The definition of __const_udelay is specifically made a macro so that |
| 73 | * the const factor (4295 = 2**32 / 1000000) can be optimized out when |
| 74 | * the delay is a const. |
| 75 | */ |
| 76 | #define __const_udelay(n) (__xdelay((n) * 4295)) |
| 77 | |
| 78 | #endif |
| 79 | |
| 80 | static inline void __udelay(unsigned long usecs) |
| 81 | { |
| 82 | __const_udelay(usecs); |
| 83 | } |
| 84 | |
| 85 | /* |
| 86 | * Use only for very small delays ( < 1 msec). Should probably use a |
| 87 | * lookup table, really, as the multiplications take much too long with |
| 88 | * short delays. This is a "reasonable" implementation, though (and the |
| 89 | * first constant multiplications gets optimized away if the delay is |
| 90 | * a constant) |
| 91 | */ |
| 92 | #define udelay(n) (__builtin_constant_p(n) ? \ |
| 93 | ((n) > 20000 ? __bad_udelay() : __const_udelay(n)) : __udelay(n)) |
| 94 | |
Michael Schmitz | c8ee038 | 2013-04-06 13:26:44 +1300 | [diff] [blame] | 95 | /* |
| 96 | * nanosecond delay: |
| 97 | * |
| 98 | * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) is the number of loops |
| 99 | * per microsecond |
| 100 | * |
| 101 | * 1000 / ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) is the number of |
| 102 | * nanoseconds per loop |
| 103 | * |
| 104 | * So n / ( 1000 / ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6) ) would |
| 105 | * be the number of loops for n nanoseconds |
| 106 | */ |
| 107 | |
| 108 | /* |
| 109 | * The simpler m68k and ColdFire processors do not have a 32*32->64 |
| 110 | * multiply instruction. So we need to handle them a little differently. |
| 111 | * We use a bit of shifting and a single 32*32->32 multiply to get close. |
| 112 | * This is a macro so that the const version can factor out the first |
| 113 | * multiply and shift. |
| 114 | */ |
| 115 | #define HZSCALE (268435456 / (1000000 / HZ)) |
| 116 | |
| 117 | #define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)); |
Greg Ungerer | 7c94619 | 2011-07-01 16:47:13 +1000 | [diff] [blame] | 118 | |
| 119 | #endif /* defined(_M68K_DELAY_H) */ |