[MIPS] R4000/R4400 daddiu erratum workaround
This complements the generic R4000/R4400 errata workaround code and adds
bits for the daddiu problem. In most places it just modifies handwritten
assembly code so that the assembler is allowed to use a temporary register
as daddiu may now be treated as a macro that expands to a sequence of li
and daddu. It is the AT register or, where AT is unavailable or used
explicitly for another purpose, an explicitly-named register is selected,
using the .set at=<reg> feature added recently to gas. This feature is
only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the
workaround remains disabled, the required version of binutils stays
unchanged.
Similarly, daddiu instructions put in branch delay slots in noreorder
fragments are now taken out of them and the assembler is allowed to
reorder them itself as possible (which it does making the whole idea of
scheduling them into delay slots manually questionable).
Also in the very few places where such a simple conversion was not
possible, a handcoded longer sequence is implemented.
Other than that there are changes to code responsible for building the
TLB fault and page clear/copy handlers to avoid daddiu as appropriate.
These are only effective if the erratum is verified to be present at the
run time.
Finally there is a trivial update to __delay(), because it uses daddiu in
a branch delay slot.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S
index 3a534b2..d1b08f5 100644
--- a/arch/mips/lib/memcpy-inatomic.S
+++ b/arch/mips/lib/memcpy-inatomic.S
@@ -9,6 +9,7 @@
* Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
* Copyright (C) 2002 Broadcom, Inc.
* memcpy/copy_user author: Mark Vandevoorde
+ * Copyright (C) 2007 Maciej W. Rozycki
*
* Mnemonic names for arguments to memcpy/__copy_user
*/
@@ -175,7 +176,11 @@
.text
.set noreorder
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat
+#else
+ .set at=v1
+#endif
/*
* A combined memcpy/__copy_user
@@ -268,8 +273,10 @@
STORE t1, UNIT(1)(dst)
STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst)
+ .set reorder /* DADDI_WAR */
+ ADD dst, dst, 4*NBYTES
beqz len, done
- ADD dst, dst, 4*NBYTES
+ .set noreorder
less_than_4units:
/*
* rem = len % NBYTES
@@ -281,8 +288,10 @@
ADD src, src, NBYTES
SUB len, len, NBYTES
STORE t0, 0(dst)
+ .set reorder /* DADDI_WAR */
+ ADD dst, dst, NBYTES
bne rem, len, 1b
- ADD dst, dst, NBYTES
+ .set noreorder
/*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -361,8 +370,10 @@
STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst)
PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
+ .set reorder /* DADDI_WAR */
+ ADD dst, dst, 4*NBYTES
bne len, rem, 1b
- ADD dst, dst, 4*NBYTES
+ .set noreorder
cleanup_src_unaligned:
beqz len, done
@@ -375,8 +386,10 @@
ADD src, src, NBYTES
SUB len, len, NBYTES
STORE t0, 0(dst)
+ .set reorder /* DADDI_WAR */
+ ADD dst, dst, NBYTES
bne len, rem, 1b
- ADD dst, dst, NBYTES
+ .set noreorder
copy_bytes_checklen:
beqz len, done
@@ -424,8 +437,10 @@
EXC( lb t1, 0(src), l_exc)
ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user
+ .set reorder /* DADDI_WAR */
+ ADD dst, dst, 1
bne src, t0, 1b
- ADD dst, dst, 1
+ .set noreorder
l_exc:
LOAD t0, TI_TASK($28)
nop