MIPS: Support for 64-bit FP with O32 binaries
CPUs implementing MIPS32 R2 may include a 64-bit FPU, just as MIPS64 CPUs
do. In order to preserve backwards compatibility a 64-bit FPU will act
like a 32-bit FPU (by accessing doubles from the least significant 32
bits of an even-odd pair of FP registers) when the Status.FR bit is
zero, again just like a mips64 CPU. The standard O32 ABI is defined
expecting a 32-bit FPU, however recent toolchains support use of a
64-bit FPU from an O32 MIPS32 executable. When an ELF executable is
built to use a 64-bit FPU a new flag (EF_MIPS_FP64) is set in the ELF
header.
With this patch the kernel will check the EF_MIPS_FP64 flag when
executing an O32 binary, and set Status.FR accordingly. The addition
of O32 64-bit FP support lessens the opportunity for optimisation in
the FPU emulator, so a CONFIG_MIPS_O32_FP64_SUPPORT Kconfig option is
introduced to allow this support to be disabled for those that don't
require it.
Inspired by an earlier patch by Leonid Yegoshin, but implemented more
cleanly & correctly.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Paul Burton <paul.burton@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/6154/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index 2413afe..70e1f17 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -12,27 +12,6 @@
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
- .macro fpu_save_double thread status tmp1=t0
- cfc1 \tmp1, fcr31
- sdc1 $f0, THREAD_FPR0(\thread)
- sdc1 $f2, THREAD_FPR2(\thread)
- sdc1 $f4, THREAD_FPR4(\thread)
- sdc1 $f6, THREAD_FPR6(\thread)
- sdc1 $f8, THREAD_FPR8(\thread)
- sdc1 $f10, THREAD_FPR10(\thread)
- sdc1 $f12, THREAD_FPR12(\thread)
- sdc1 $f14, THREAD_FPR14(\thread)
- sdc1 $f16, THREAD_FPR16(\thread)
- sdc1 $f18, THREAD_FPR18(\thread)
- sdc1 $f20, THREAD_FPR20(\thread)
- sdc1 $f22, THREAD_FPR22(\thread)
- sdc1 $f24, THREAD_FPR24(\thread)
- sdc1 $f26, THREAD_FPR26(\thread)
- sdc1 $f28, THREAD_FPR28(\thread)
- sdc1 $f30, THREAD_FPR30(\thread)
- sw \tmp1, THREAD_FCR31(\thread)
- .endm
-
.macro fpu_save_single thread tmp=t0
cfc1 \tmp, fcr31
swc1 $f0, THREAD_FPR0(\thread)
@@ -70,27 +49,6 @@
sw \tmp, THREAD_FCR31(\thread)
.endm
- .macro fpu_restore_double thread status tmp=t0
- lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0(\thread)
- ldc1 $f2, THREAD_FPR2(\thread)
- ldc1 $f4, THREAD_FPR4(\thread)
- ldc1 $f6, THREAD_FPR6(\thread)
- ldc1 $f8, THREAD_FPR8(\thread)
- ldc1 $f10, THREAD_FPR10(\thread)
- ldc1 $f12, THREAD_FPR12(\thread)
- ldc1 $f14, THREAD_FPR14(\thread)
- ldc1 $f16, THREAD_FPR16(\thread)
- ldc1 $f18, THREAD_FPR18(\thread)
- ldc1 $f20, THREAD_FPR20(\thread)
- ldc1 $f22, THREAD_FPR22(\thread)
- ldc1 $f24, THREAD_FPR24(\thread)
- ldc1 $f26, THREAD_FPR26(\thread)
- ldc1 $f28, THREAD_FPR28(\thread)
- ldc1 $f30, THREAD_FPR30(\thread)
- ctc1 \tmp, fcr31
- .endm
-
.macro fpu_restore_single thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
lwc1 $f0, THREAD_FPR0(\thread)
diff --git a/arch/mips/include/asm/asmmacro-64.h b/arch/mips/include/asm/asmmacro-64.h
index 08a527d..38ea609 100644
--- a/arch/mips/include/asm/asmmacro-64.h
+++ b/arch/mips/include/asm/asmmacro-64.h
@@ -13,102 +13,6 @@
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
- .macro fpu_save_16even thread tmp=t0
- cfc1 \tmp, fcr31
- sdc1 $f0, THREAD_FPR0(\thread)
- sdc1 $f2, THREAD_FPR2(\thread)
- sdc1 $f4, THREAD_FPR4(\thread)
- sdc1 $f6, THREAD_FPR6(\thread)
- sdc1 $f8, THREAD_FPR8(\thread)
- sdc1 $f10, THREAD_FPR10(\thread)
- sdc1 $f12, THREAD_FPR12(\thread)
- sdc1 $f14, THREAD_FPR14(\thread)
- sdc1 $f16, THREAD_FPR16(\thread)
- sdc1 $f18, THREAD_FPR18(\thread)
- sdc1 $f20, THREAD_FPR20(\thread)
- sdc1 $f22, THREAD_FPR22(\thread)
- sdc1 $f24, THREAD_FPR24(\thread)
- sdc1 $f26, THREAD_FPR26(\thread)
- sdc1 $f28, THREAD_FPR28(\thread)
- sdc1 $f30, THREAD_FPR30(\thread)
- sw \tmp, THREAD_FCR31(\thread)
- .endm
-
- .macro fpu_save_16odd thread
- sdc1 $f1, THREAD_FPR1(\thread)
- sdc1 $f3, THREAD_FPR3(\thread)
- sdc1 $f5, THREAD_FPR5(\thread)
- sdc1 $f7, THREAD_FPR7(\thread)
- sdc1 $f9, THREAD_FPR9(\thread)
- sdc1 $f11, THREAD_FPR11(\thread)
- sdc1 $f13, THREAD_FPR13(\thread)
- sdc1 $f15, THREAD_FPR15(\thread)
- sdc1 $f17, THREAD_FPR17(\thread)
- sdc1 $f19, THREAD_FPR19(\thread)
- sdc1 $f21, THREAD_FPR21(\thread)
- sdc1 $f23, THREAD_FPR23(\thread)
- sdc1 $f25, THREAD_FPR25(\thread)
- sdc1 $f27, THREAD_FPR27(\thread)
- sdc1 $f29, THREAD_FPR29(\thread)
- sdc1 $f31, THREAD_FPR31(\thread)
- .endm
-
- .macro fpu_save_double thread status tmp
- sll \tmp, \status, 5
- bgez \tmp, 2f
- fpu_save_16odd \thread
-2:
- fpu_save_16even \thread \tmp
- .endm
-
- .macro fpu_restore_16even thread tmp=t0
- lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0(\thread)
- ldc1 $f2, THREAD_FPR2(\thread)
- ldc1 $f4, THREAD_FPR4(\thread)
- ldc1 $f6, THREAD_FPR6(\thread)
- ldc1 $f8, THREAD_FPR8(\thread)
- ldc1 $f10, THREAD_FPR10(\thread)
- ldc1 $f12, THREAD_FPR12(\thread)
- ldc1 $f14, THREAD_FPR14(\thread)
- ldc1 $f16, THREAD_FPR16(\thread)
- ldc1 $f18, THREAD_FPR18(\thread)
- ldc1 $f20, THREAD_FPR20(\thread)
- ldc1 $f22, THREAD_FPR22(\thread)
- ldc1 $f24, THREAD_FPR24(\thread)
- ldc1 $f26, THREAD_FPR26(\thread)
- ldc1 $f28, THREAD_FPR28(\thread)
- ldc1 $f30, THREAD_FPR30(\thread)
- ctc1 \tmp, fcr31
- .endm
-
- .macro fpu_restore_16odd thread
- ldc1 $f1, THREAD_FPR1(\thread)
- ldc1 $f3, THREAD_FPR3(\thread)
- ldc1 $f5, THREAD_FPR5(\thread)
- ldc1 $f7, THREAD_FPR7(\thread)
- ldc1 $f9, THREAD_FPR9(\thread)
- ldc1 $f11, THREAD_FPR11(\thread)
- ldc1 $f13, THREAD_FPR13(\thread)
- ldc1 $f15, THREAD_FPR15(\thread)
- ldc1 $f17, THREAD_FPR17(\thread)
- ldc1 $f19, THREAD_FPR19(\thread)
- ldc1 $f21, THREAD_FPR21(\thread)
- ldc1 $f23, THREAD_FPR23(\thread)
- ldc1 $f25, THREAD_FPR25(\thread)
- ldc1 $f27, THREAD_FPR27(\thread)
- ldc1 $f29, THREAD_FPR29(\thread)
- ldc1 $f31, THREAD_FPR31(\thread)
- .endm
-
- .macro fpu_restore_double thread status tmp
- sll \tmp, \status, 5
- bgez \tmp, 1f # 16 register mode?
-
- fpu_restore_16odd \thread
-1: fpu_restore_16even \thread \tmp
- .endm
-
.macro cpu_save_nonscratch thread
LONG_S s0, THREAD_REG16(\thread)
LONG_S s1, THREAD_REG17(\thread)
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 6c8342a..3220c93 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -62,6 +62,113 @@
.endm
#endif /* CONFIG_MIPS_MT_SMTC */
+ .macro fpu_save_16even thread tmp=t0
+ cfc1 \tmp, fcr31
+ sdc1 $f0, THREAD_FPR0(\thread)
+ sdc1 $f2, THREAD_FPR2(\thread)
+ sdc1 $f4, THREAD_FPR4(\thread)
+ sdc1 $f6, THREAD_FPR6(\thread)
+ sdc1 $f8, THREAD_FPR8(\thread)
+ sdc1 $f10, THREAD_FPR10(\thread)
+ sdc1 $f12, THREAD_FPR12(\thread)
+ sdc1 $f14, THREAD_FPR14(\thread)
+ sdc1 $f16, THREAD_FPR16(\thread)
+ sdc1 $f18, THREAD_FPR18(\thread)
+ sdc1 $f20, THREAD_FPR20(\thread)
+ sdc1 $f22, THREAD_FPR22(\thread)
+ sdc1 $f24, THREAD_FPR24(\thread)
+ sdc1 $f26, THREAD_FPR26(\thread)
+ sdc1 $f28, THREAD_FPR28(\thread)
+ sdc1 $f30, THREAD_FPR30(\thread)
+ sw \tmp, THREAD_FCR31(\thread)
+ .endm
+
+ .macro fpu_save_16odd thread
+ .set push
+ .set mips64r2
+ sdc1 $f1, THREAD_FPR1(\thread)
+ sdc1 $f3, THREAD_FPR3(\thread)
+ sdc1 $f5, THREAD_FPR5(\thread)
+ sdc1 $f7, THREAD_FPR7(\thread)
+ sdc1 $f9, THREAD_FPR9(\thread)
+ sdc1 $f11, THREAD_FPR11(\thread)
+ sdc1 $f13, THREAD_FPR13(\thread)
+ sdc1 $f15, THREAD_FPR15(\thread)
+ sdc1 $f17, THREAD_FPR17(\thread)
+ sdc1 $f19, THREAD_FPR19(\thread)
+ sdc1 $f21, THREAD_FPR21(\thread)
+ sdc1 $f23, THREAD_FPR23(\thread)
+ sdc1 $f25, THREAD_FPR25(\thread)
+ sdc1 $f27, THREAD_FPR27(\thread)
+ sdc1 $f29, THREAD_FPR29(\thread)
+ sdc1 $f31, THREAD_FPR31(\thread)
+ .set pop
+ .endm
+
+ .macro fpu_save_double thread status tmp
+#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2)
+ sll \tmp, \status, 5
+ bgez \tmp, 10f
+ fpu_save_16odd \thread
+10:
+#endif
+ fpu_save_16even \thread \tmp
+ .endm
+
+ .macro fpu_restore_16even thread tmp=t0
+ lw \tmp, THREAD_FCR31(\thread)
+ ldc1 $f0, THREAD_FPR0(\thread)
+ ldc1 $f2, THREAD_FPR2(\thread)
+ ldc1 $f4, THREAD_FPR4(\thread)
+ ldc1 $f6, THREAD_FPR6(\thread)
+ ldc1 $f8, THREAD_FPR8(\thread)
+ ldc1 $f10, THREAD_FPR10(\thread)
+ ldc1 $f12, THREAD_FPR12(\thread)
+ ldc1 $f14, THREAD_FPR14(\thread)
+ ldc1 $f16, THREAD_FPR16(\thread)
+ ldc1 $f18, THREAD_FPR18(\thread)
+ ldc1 $f20, THREAD_FPR20(\thread)
+ ldc1 $f22, THREAD_FPR22(\thread)
+ ldc1 $f24, THREAD_FPR24(\thread)
+ ldc1 $f26, THREAD_FPR26(\thread)
+ ldc1 $f28, THREAD_FPR28(\thread)
+ ldc1 $f30, THREAD_FPR30(\thread)
+ ctc1 \tmp, fcr31
+ .endm
+
+ .macro fpu_restore_16odd thread
+ .set push
+ .set mips64r2
+ ldc1 $f1, THREAD_FPR1(\thread)
+ ldc1 $f3, THREAD_FPR3(\thread)
+ ldc1 $f5, THREAD_FPR5(\thread)
+ ldc1 $f7, THREAD_FPR7(\thread)
+ ldc1 $f9, THREAD_FPR9(\thread)
+ ldc1 $f11, THREAD_FPR11(\thread)
+ ldc1 $f13, THREAD_FPR13(\thread)
+ ldc1 $f15, THREAD_FPR15(\thread)
+ ldc1 $f17, THREAD_FPR17(\thread)
+ ldc1 $f19, THREAD_FPR19(\thread)
+ ldc1 $f21, THREAD_FPR21(\thread)
+ ldc1 $f23, THREAD_FPR23(\thread)
+ ldc1 $f25, THREAD_FPR25(\thread)
+ ldc1 $f27, THREAD_FPR27(\thread)
+ ldc1 $f29, THREAD_FPR29(\thread)
+ ldc1 $f31, THREAD_FPR31(\thread)
+ .set pop
+ .endm
+
+ .macro fpu_restore_double thread status tmp
+#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2)
+ sll \tmp, \status, 5
+ bgez \tmp, 10f # 16 register mode?
+
+ fpu_restore_16odd \thread
+10:
+#endif
+ fpu_restore_16even \thread \tmp
+ .endm
+
/*
* Temporary until all gas have MT ASE support
*/
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a66359e..d414405 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -36,6 +36,7 @@
#define EF_MIPS_ABI2 0x00000020
#define EF_MIPS_OPTIONS_FIRST 0x00000080
#define EF_MIPS_32BITMODE 0x00000100
+#define EF_MIPS_FP64 0x00000200
#define EF_MIPS_ABI 0x0000f000
#define EF_MIPS_ARCH 0xf0000000
@@ -176,6 +177,18 @@
#ifdef CONFIG_32BIT
/*
+ * In order to be sure that we don't attempt to execute an O32 binary which
+ * requires 64 bit FP (FR=1) on a system which does not support it we refuse
+ * to execute any binary which has bits specified by the following macro set
+ * in its ELF header flags.
+ */
+#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
+# define __MIPS_O32_FP64_MUST_BE_ZERO 0
+#else
+# define __MIPS_O32_FP64_MUST_BE_ZERO EF_MIPS_FP64
+#endif
+
+/*
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch(hdr) \
@@ -192,6 +205,8 @@
if (((__h->e_flags & EF_MIPS_ABI) != 0) && \
((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32)) \
__res = 0; \
+ if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO) \
+ __res = 0; \
\
__res; \
})
@@ -249,6 +264,11 @@
#define SET_PERSONALITY(ex) \
do { \
+ if ((ex).e_flags & EF_MIPS_FP64) \
+ clear_thread_flag(TIF_32BIT_FPREGS); \
+ else \
+ set_thread_flag(TIF_32BIT_FPREGS); \
+ \
if (personality(current->personality) != PER_LINUX) \
set_personality(PER_LINUX); \
\
@@ -271,14 +291,18 @@
#endif
#ifdef CONFIG_MIPS32_O32
-#define __SET_PERSONALITY32_O32() \
+#define __SET_PERSONALITY32_O32(ex) \
do { \
set_thread_flag(TIF_32BIT_REGS); \
set_thread_flag(TIF_32BIT_ADDR); \
+ \
+ if (!((ex).e_flags & EF_MIPS_FP64)) \
+ set_thread_flag(TIF_32BIT_FPREGS); \
+ \
current->thread.abi = &mips_abi_32; \
} while (0)
#else
-#define __SET_PERSONALITY32_O32() \
+#define __SET_PERSONALITY32_O32(ex) \
do { } while (0)
#endif
@@ -289,7 +313,7 @@
((ex).e_flags & EF_MIPS_ABI) == 0) \
__SET_PERSONALITY32_N32(); \
else \
- __SET_PERSONALITY32_O32(); \
+ __SET_PERSONALITY32_O32(ex); \
} while (0)
#else
#define __SET_PERSONALITY32(ex) do { } while (0)
@@ -300,6 +324,7 @@
unsigned int p; \
\
clear_thread_flag(TIF_32BIT_REGS); \
+ clear_thread_flag(TIF_32BIT_FPREGS); \
clear_thread_flag(TIF_32BIT_ADDR); \
\
if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 3bf023f..cfe092f 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -33,11 +33,48 @@
extern void _save_fp(struct task_struct *);
extern void _restore_fp(struct task_struct *);
-#define __enable_fpu() \
-do { \
- set_c0_status(ST0_CU1); \
- enable_fpu_hazard(); \
-} while (0)
+/*
+ * This enum specifies a mode in which we want the FPU to operate, for cores
+ * which implement the Status.FR bit. Note that FPU_32BIT & FPU_64BIT
+ * purposefully have the values 0 & 1 respectively, so that an integer value
+ * of Status.FR can be trivially casted to the corresponding enum fpu_mode.
+ */
+enum fpu_mode {
+ FPU_32BIT = 0, /* FR = 0 */
+ FPU_64BIT, /* FR = 1 */
+ FPU_AS_IS,
+};
+
+static inline int __enable_fpu(enum fpu_mode mode)
+{
+ int fr;
+
+ switch (mode) {
+ case FPU_AS_IS:
+ /* just enable the FPU in its current mode */
+ set_c0_status(ST0_CU1);
+ enable_fpu_hazard();
+ return 0;
+
+ case FPU_64BIT:
+#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_MIPS64))
+ /* we only have a 32-bit FPU */
+ return SIGFPE;
+#endif
+ /* fall through */
+ case FPU_32BIT:
+ /* set CU1 & change FR appropriately */
+ fr = (int)mode;
+ change_c0_status(ST0_CU1 | ST0_FR, ST0_CU1 | (fr ? ST0_FR : 0));
+ enable_fpu_hazard();
+
+ /* check FR has the desired value */
+ return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+
+ default:
+ BUG();
+ }
+}
#define __disable_fpu() \
do { \
@@ -57,27 +94,46 @@
return cpu_has_fpu && __is_fpu_owner();
}
-static inline void __own_fpu(void)
+static inline int __own_fpu(void)
{
- __enable_fpu();
+ enum fpu_mode mode;
+ int ret;
+
+ mode = !test_thread_flag(TIF_32BIT_FPREGS);
+ ret = __enable_fpu(mode);
+ if (ret)
+ return ret;
+
KSTK_STATUS(current) |= ST0_CU1;
+ if (mode == FPU_64BIT)
+ KSTK_STATUS(current) |= ST0_FR;
+ else /* mode == FPU_32BIT */
+ KSTK_STATUS(current) &= ~ST0_FR;
+
set_thread_flag(TIF_USEDFPU);
+ return 0;
}
-static inline void own_fpu_inatomic(int restore)
+static inline int own_fpu_inatomic(int restore)
{
+ int ret = 0;
+
if (cpu_has_fpu && !__is_fpu_owner()) {
- __own_fpu();
- if (restore)
+ ret = __own_fpu();
+ if (restore && !ret)
_restore_fp(current);
}
+ return ret;
}
-static inline void own_fpu(int restore)
+static inline int own_fpu(int restore)
{
+ int ret;
+
preempt_disable();
- own_fpu_inatomic(restore);
+ ret = own_fpu_inatomic(restore);
preempt_enable();
+ return ret;
}
static inline void lose_fpu(int save)
@@ -93,16 +149,21 @@
preempt_enable();
}
-static inline void init_fpu(void)
+static inline int init_fpu(void)
{
+ int ret = 0;
+
preempt_disable();
if (cpu_has_fpu) {
- __own_fpu();
- _init_fpu();
+ ret = __own_fpu();
+ if (!ret)
+ _init_fpu();
} else {
fpu_emulator_init_fpu();
}
+
preempt_enable();
+ return ret;
}
static inline void save_fp(struct task_struct *tsk)
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 4f58ef6..24846f9 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -110,11 +110,12 @@
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_FIXADE 20 /* Fix address errors in software */
#define TIF_LOGADE 21 /* Log address errors to syslog */
-#define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */
+#define TIF_32BIT_REGS 22 /* 32-bit general purpose registers */
#define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */
#define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
#define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */
+#define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -131,6 +132,7 @@
#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
+#define _TIF_32BIT_FPREGS (1<<TIF_32BIT_FPREGS)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \