Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #ifndef _M68KNOMMU_DELAY_H |
| 2 | #define _M68KNOMMU_DELAY_H |
| 3 | |
| 4 | /* |
| 5 | * Copyright (C) 1994 Hamish Macdonald |
| 6 | * Copyright (C) 2004 Greg Ungerer <gerg@snapgear.com> |
| 7 | */ |
| 8 | |
| 9 | #include <asm/param.h> |
| 10 | |
Greg Ungerer | c514b8b | 2005-11-02 14:42:03 +1000 | [diff] [blame] | 11 | static inline void __delay(unsigned long loops) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | { |
| 13 | #if defined(CONFIG_COLDFIRE) |
| 14 | /* The coldfire runs this loop at significantly different speeds |
| 15 | * depending upon long word alignment or not. We'll pad it to |
| 16 | * long word alignment which is the faster version. |
| 17 | * The 0x4a8e is of course a 'tstl %fp' instruction. This is better |
| 18 | * than using a NOP (0x4e71) instruction because it executes in one |
| 19 | * cycle not three and doesn't allow for an arbitary delay waiting |
| 20 | * for bus cycles to finish. Also fp/a6 isn't likely to cause a |
| 21 | * stall waiting for the register to become valid if such is added |
| 22 | * to the coldfire at some stage. |
| 23 | */ |
| 24 | __asm__ __volatile__ ( ".balignw 4, 0x4a8e\n\t" |
| 25 | "1: subql #1, %0\n\t" |
| 26 | "jcc 1b" |
| 27 | : "=d" (loops) : "0" (loops)); |
| 28 | #else |
| 29 | __asm__ __volatile__ ( "1: subql #1, %0\n\t" |
| 30 | "jcc 1b" |
| 31 | : "=d" (loops) : "0" (loops)); |
| 32 | #endif |
| 33 | } |
| 34 | |
| 35 | /* |
| 36 | * Ideally we use a 32*32->64 multiply to calculate the number of |
| 37 | * loop iterations, but the older standard 68k and ColdFire do not |
| 38 | * have this instruction. So for them we have a clsoe approximation |
| 39 | * loop using 32*32->32 multiplies only. This calculation based on |
| 40 | * the ARM version of delay. |
| 41 | * |
| 42 | * We want to implement: |
| 43 | * |
| 44 | * loops = (usecs * 0x10c6 * HZ * loops_per_jiffy) / 2^32 |
| 45 | */ |
| 46 | |
| 47 | #define HZSCALE (268435456 / (1000000/HZ)) |
| 48 | |
| 49 | extern unsigned long loops_per_jiffy; |
| 50 | |
Greg Ungerer | c514b8b | 2005-11-02 14:42:03 +1000 | [diff] [blame] | 51 | static inline void _udelay(unsigned long usecs) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 52 | { |
| 53 | #if defined(CONFIG_M68328) || defined(CONFIG_M68EZ328) || \ |
| 54 | defined(CONFIG_M68VZ328) || defined(CONFIG_M68360) || \ |
| 55 | defined(CONFIG_COLDFIRE) |
| 56 | __delay((((usecs * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6); |
| 57 | #else |
| 58 | unsigned long tmp; |
| 59 | |
| 60 | usecs *= 4295; /* 2**32 / 1000000 */ |
| 61 | __asm__ ("mulul %2,%0:%1" |
| 62 | : "=d" (usecs), "=d" (tmp) |
| 63 | : "d" (usecs), "1" (loops_per_jiffy*HZ)); |
| 64 | __delay(usecs); |
| 65 | #endif |
| 66 | } |
| 67 | |
| 68 | /* |
| 69 | * Moved the udelay() function into library code, no longer inlined. |
| 70 | * I had to change the algorithm because we are overflowing now on |
Joe Perches | ab690d9 | 2008-02-03 17:38:04 +0200 | [diff] [blame] | 71 | * the faster ColdFire parts. The code is a little bigger, so it makes |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 72 | * sense to library it. |
| 73 | */ |
| 74 | extern void udelay(unsigned long usecs); |
| 75 | |
| 76 | #endif /* defined(_M68KNOMMU_DELAY_H) */ |