Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
"The big one is support for fake NUMA, splitting a really large machine
in more manageable piece improves performance in some cases, e.g. for
a KVM host.
The FICON Link Incident handling has been improved, this helps the
operator to identify degraded or non-operational FICON connections.
The save and restore of floating point and vector registers has been
overhauled to allow the future use of vector registers in the kernel.
A few small enhancement, magic sys-requests for the vt220 console via
SCLP, some more assembler code has been converted to C, the PCI error
handling is improved.
And the usual cleanup and bug fixing"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (59 commits)
s390/jump_label: Use %*ph to print small buffers
s390/sclp_vt220: support magic sysrequests
s390/ctrlchar: improve handling of magic sysrequests
s390/numa: remove superfluous ARCH_WANT defines
s390/3270: redraw screen on unsolicited device end
s390/dcssblk: correct out of bounds array indexes
s390/mm: simplify page table alloc/free code
s390/pci: move debug messages to debugfs
s390/nmi: initialize control register 0 earlier
s390/zcrypt: use msleep() instead of mdelay()
s390/hmcdrv: fix interrupt registration
s390/setup: fix novx parameter
s390/uaccess: remove uaccess_primary kernel parameter
s390: remove unneeded sizeof(void *) comparisons
s390/facilities: remove transactional-execution bits
s390/numa: re-add DIE sched_domain_topology_level
s390/dasd: enhance CUIR scope detection
s390/dasd: fix failing path verification
s390/vdso: emit a GNU hash
s390/numa: make core to node mapping data dynamic
...
diff --git a/MAINTAINERS b/MAINTAINERS
index 9730027..8b3115c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5907,7 +5907,6 @@
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Cornelia Huck <cornelia.huck@de.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -8718,7 +8717,6 @@
S390
M: Martin Schwidefsky <schwidefsky@de.ibm.com>
M: Heiko Carstens <heiko.carstens@de.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -8746,7 +8744,6 @@
S390 NETWORK DRIVERS
M: Ursula Braun <ursula.braun@de.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -8763,7 +8760,6 @@
S390 ZCRYPT DRIVER
M: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -8771,7 +8767,6 @@
S390 ZFCP DRIVER
M: Steffen Maier <maier@linux.vnet.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -8779,7 +8774,6 @@
S390 IUCV NETWORK LAYER
M: Ursula Braun <ursula.braun@de.ibm.com>
-M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 2938934..e256592 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -6,3 +6,4 @@
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
+obj-$(CONFIG_NUMA) += numa/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b06dc38..4827870 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,18 +99,22 @@
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANTS_PROT_NUMA_PROT_NONE
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
+ select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_DEVICES if !SMP
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_EARLY_PFN_TO_NID
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -153,6 +157,7 @@
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -385,6 +390,76 @@
config SCHED_SMT
def_bool n
+# Some NUMA nodes have memory ranges that span
+# other nodes. Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node. See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+ def_bool NUMA
+
+config NUMA
+ bool "NUMA support"
+ depends on SMP && 64BIT && SCHED_TOPOLOGY
+ default n
+ help
+ Enable NUMA support
+
+ This option adds NUMA support to the kernel.
+
+ An operation mode can be selected by appending
+ numa=<method> to the kernel command line.
+
+ The default behaviour is identical to appending numa=plain to
+ the command line. This will create just one node with all
+ available memory and all CPUs in it.
+
+config NODES_SHIFT
+ int "Maximum NUMA nodes (as a power of 2)"
+ range 1 10
+ depends on NUMA
+ default "4"
+ help
+ Specify the maximum number of NUMA nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+ depends on NUMA
+
+config NUMA_EMU
+ bool "NUMA emulation"
+ default y
+ help
+ Numa emulation mode will split the available system memory into
+ equal chunks which then are distributed over the configured number
+ of nodes in a round-robin manner.
+
+ The number of fake nodes is limited by the number of available memory
+ chunks (i.e. memory size / fake size) and the number of supported
+ nodes in the kernel.
+
+ The CPUs are assigned to the nodes in a way that partially respects
+ the original machine topology (if supported by the machine).
+ Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+ hex "NUMA emulation memory chunk size"
+ default 0x10000000
+ range 0x400000 0x100000000
+ depends on NUMA_EMU
+ help
+ Select the default size by which the memory is chopped and then
+ assigned to emulated NUMA nodes.
+
+ This can be overridden by specifying
+
+ emu_size=<n>
+
+ on the kernel command line where also suffixes K, M, G, and T are
+ supported.
+
+endmenu
+
config SCHED_MC
def_bool n
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 667b1bc..e8d4423 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -33,6 +33,8 @@
mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
mflags-$(CONFIG_MARCH_Z13) := -march=z13
+export CC_FLAGS_MARCH := $(mflags-y)
+
aflags-y += $(mflags-y)
cflags-y += $(mflags-y)
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 940cbdd..0c98f15 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -13,6 +13,7 @@
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
@@ -50,6 +51,7 @@
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
CONFIG_PREEMPT=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index d793fec..82083e1 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -13,6 +13,7 @@
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
@@ -49,6 +50,7 @@
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 38a77e9..c05c9e0 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -13,6 +13,8 @@
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
@@ -48,6 +50,7 @@
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5566ce8..0b9b95f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -24,6 +24,7 @@
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include "crypt_s390.h"
@@ -976,7 +977,7 @@
crypto_unregister_alg(&aes_alg);
}
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
module_exit(aes_s390_fini);
MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 9e05cc4..fba1c10 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/des.h>
@@ -616,7 +617,7 @@
crypto_unregister_alg(&des_alg);
}
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
module_exit(des_s390_exit);
MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index b258110..26e14ef 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -9,6 +9,7 @@
#include <crypto/internal/hash.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include "crypt_s390.h"
@@ -158,7 +159,7 @@
crypto_unregister_shash(&ghash_alg);
}
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 9d5192c..b8045b9 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
+#include <linux/cpufeature.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <asm/debug.h>
@@ -914,6 +915,5 @@
}
}
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5b2bee3..9208ead 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -26,6 +26,7 @@
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
@@ -100,7 +101,7 @@
crypto_unregister_shash(&alg);
}
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
module_exit(sha1_s390_fini);
MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index b74ff15..667888f 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -16,6 +16,7 @@
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
@@ -140,7 +141,7 @@
crypto_unregister_shash(&sha256_alg);
}
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 0c36989..2ba66b1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include "sha.h"
#include "crypt_s390.h"
@@ -148,7 +149,7 @@
crypto_unregister_shash(&sha384_alg);
}
-module_init(init);
+module_cpu_feature_match(MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 0000000..fa7e69b
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags. Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now. Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat) ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d7697ab..17a3735 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,6 +46,8 @@
__ctl_load(reg, cr, cr);
}
+void __ctl_set_vx(void);
+
void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit);
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
new file mode 100644
index 0000000..55dc2c0
--- /dev/null
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -0,0 +1,110 @@
+/*
+ * General floating pointer and vector register helpers
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#define FPU_USE_VX 1 /* Vector extension is active */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/linkage.h>
+#include <asm/ctl_reg.h>
+#include <asm/sigcontext.h>
+
+struct fpu {
+ __u32 fpc; /* Floating-point control */
+ __u32 flags;
+ union {
+ void *regs;
+ freg_t *fprs; /* Floating-point register save area */
+ __vector128 *vxrs; /* Vector register save area */
+ };
+};
+
+void save_fpu_regs(void);
+
+#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
+#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+static inline int test_fp_ctl(u32 fpc)
+{
+ u32 orig_fpc;
+ int rc;
+
+ asm volatile(
+ " efpc %1\n"
+ " sfpc %2\n"
+ "0: sfpc %1\n"
+ " la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc), "=d" (orig_fpc)
+ : "d" (fpc), "0" (-EINVAL));
+ return rc;
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+ unsigned long cr0, flags;
+
+ flags = arch_local_irq_save();
+ __ctl_store(cr0, 0, 0);
+ __ctl_set_bit(0, 17);
+ __ctl_set_bit(0, 18);
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
+ : "=Q" (*(struct vx_array *) vxrs) : : "1");
+ __ctl_load(cr0, 0, 0);
+ arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpregs->pad = 0;
+ if (is_vx_fpu(fpu))
+ convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+ else
+ memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+ sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ if (is_vx_fpu(fpu))
+ convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+ else
+ memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+ sizeof(fpregs->fprs));
+}
+
+#endif
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index df4db81..3d012e0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+#include <asm/fpu-internal.h>
#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
@@ -501,10 +502,9 @@
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
- s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS];
- s390_fp_regs guest_fpregs;
- struct kvm_s390_vregs *host_vregs;
+ struct fpu host_fpregs;
+ struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index fc8a828..27da78c 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -6,4 +6,26 @@
#define __ALIGN .align 4, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 0000000..a9e834e
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 0000000..2a0efc6
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,35 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a648338..34d9603 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -170,7 +170,11 @@
#endif /* CONFIG_HOTPLUG_PCI_S390 */
/* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+ return pdev->sysdata;
+}
+
struct zpci_dev *get_zdev_by_fid(u32);
/* DMA */
@@ -188,4 +192,20 @@
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+ return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
#endif
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f66d827..bdb2f51 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -576,6 +576,19 @@
return pte_val(a) == pte_val(b);
}
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+ return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ /* pmd_large(pmd) implies pmd_present(pmd) */
+ return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
unsigned long new = 0;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dedb621..085fb0d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,10 +14,12 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
+#define CIF_FPU 3 /* restore vector registers */
#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE)
#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
+#define _CIF_FPU (1<<CIF_FPU)
#ifndef __ASSEMBLY__
@@ -28,6 +30,7 @@
#include <asm/ptrace.h>
#include <asm/setup.h>
#include <asm/runtime_instr.h>
+#include <asm/fpu-internal.h>
static inline void set_cpu_flag(int flag)
{
@@ -85,7 +88,7 @@
* Thread structure
*/
struct thread_struct {
- s390_fp_regs fp_regs;
+ struct fpu fpu; /* FP and VX register save area */
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
@@ -101,7 +104,6 @@
struct runtime_instr_cb *ri_cb;
int ri_signum;
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
- __vector128 *vxrs; /* Vector register save area */
};
/* Flag to disable transactions. */
@@ -231,6 +233,17 @@
}
/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+ unsigned int reg1, reg2;
+
+ asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+ return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+/*
* Rewind PSW instruction address by specified number of bytes.
*/
static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
@@ -336,25 +349,6 @@
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
}
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target) \
- ".section __ex_table,\"a\"\n" \
- ".align 4\n" \
- ".long (" #_fault ") - .\n" \
- ".long (" #_target ") - .\n" \
- ".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target) \
- .section __ex_table,"a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index f6ff060..821dde5 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,6 @@
int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
void sclp_early_detect(void);
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index d62e7a6..dcadfde 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,139 +8,12 @@
#define __ASM_SWITCH_TO_H
#include <linux/thread_info.h>
+#include <asm/fpu-internal.h>
#include <asm/ptrace.h>
extern struct task_struct *__switch_to(void *, void *);
extern void update_cr_regs(struct task_struct *task);
-static inline int test_fp_ctl(u32 fpc)
-{
- u32 orig_fpc;
- int rc;
-
- asm volatile(
- " efpc %1\n"
- " sfpc %2\n"
- "0: sfpc %1\n"
- " la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (orig_fpc)
- : "d" (fpc), "0" (-EINVAL));
- return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
- asm volatile(
- " stfpc %0\n"
- : "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
- int rc;
-
- asm volatile(
- " lfpc %1\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
- return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
- asm volatile("std 0,%0" : "=Q" (fprs[0]));
- asm volatile("std 2,%0" : "=Q" (fprs[2]));
- asm volatile("std 4,%0" : "=Q" (fprs[4]));
- asm volatile("std 6,%0" : "=Q" (fprs[6]));
- asm volatile("std 1,%0" : "=Q" (fprs[1]));
- asm volatile("std 3,%0" : "=Q" (fprs[3]));
- asm volatile("std 5,%0" : "=Q" (fprs[5]));
- asm volatile("std 7,%0" : "=Q" (fprs[7]));
- asm volatile("std 8,%0" : "=Q" (fprs[8]));
- asm volatile("std 9,%0" : "=Q" (fprs[9]));
- asm volatile("std 10,%0" : "=Q" (fprs[10]));
- asm volatile("std 11,%0" : "=Q" (fprs[11]));
- asm volatile("std 12,%0" : "=Q" (fprs[12]));
- asm volatile("std 13,%0" : "=Q" (fprs[13]));
- asm volatile("std 14,%0" : "=Q" (fprs[14]));
- asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
- asm volatile("ld 0,%0" : : "Q" (fprs[0]));
- asm volatile("ld 2,%0" : : "Q" (fprs[2]));
- asm volatile("ld 4,%0" : : "Q" (fprs[4]));
- asm volatile("ld 6,%0" : : "Q" (fprs[6]));
- asm volatile("ld 1,%0" : : "Q" (fprs[1]));
- asm volatile("ld 3,%0" : : "Q" (fprs[3]));
- asm volatile("ld 5,%0" : : "Q" (fprs[5]));
- asm volatile("ld 7,%0" : : "Q" (fprs[7]));
- asm volatile("ld 8,%0" : : "Q" (fprs[8]));
- asm volatile("ld 9,%0" : : "Q" (fprs[9]));
- asm volatile("ld 10,%0" : : "Q" (fprs[10]));
- asm volatile("ld 11,%0" : : "Q" (fprs[11]));
- asm volatile("ld 12,%0" : : "Q" (fprs[12]));
- asm volatile("ld 13,%0" : : "Q" (fprs[13]));
- asm volatile("ld 14,%0" : : "Q" (fprs[14]));
- asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
- typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
- : "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
- unsigned long cr0, flags;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_set_bit(0, 17);
- __ctl_set_bit(0, 18);
- save_vx_regs(vxrs);
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
- typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- : : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
- if (task->thread.vxrs)
- save_vx_regs(task->thread.vxrs);
- else
- save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
- if (task->thread.vxrs)
- restore_vx_regs(task->thread.vxrs);
- else
- restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
static inline void save_access_regs(unsigned int *acrs)
{
typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -157,15 +30,13 @@
#define switch_to(prev,next,last) do { \
if (prev->mm) { \
- save_fp_ctl(&prev->thread.fp_regs.fpc); \
- save_fp_vx_regs(prev); \
+ save_fpu_regs(); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
} \
if (next->mm) { \
update_cr_regs(next); \
- restore_fp_ctl(&next->thread.fp_regs.fpc); \
- restore_fp_vx_regs(next); \
+ set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
} \
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 4990f6c..27ebde6 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -2,6 +2,7 @@
#define _ASM_S390_TOPOLOGY_H
#include <linux/cpumask.h>
+#include <asm/numa.h>
struct sysinfo_15_1_x;
struct cpu;
@@ -13,6 +14,7 @@
unsigned short core_id;
unsigned short socket_id;
unsigned short book_id;
+ unsigned short node_id;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
@@ -52,6 +54,43 @@
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)
+#define SD_BOOK_INIT SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+ return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
#include <asm-generic/topology.h>
#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 91f56b1..525cef7 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -11,16 +11,24 @@
#define __IGNORE_time
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-
-/* Ignore system calls that are also reachable via sys_socket */
+/* Ignore system calls that are also reachable via sys_socketcall */
#define __IGNORE_recvmmsg
#define __IGNORE_sendmmsg
+#define __IGNORE_socket
+#define __IGNORE_socketpair
+#define __IGNORE_bind
+#define __IGNORE_connect
+#define __IGNORE_listen
+#define __IGNORE_accept4
+#define __IGNORE_getsockopt
+#define __IGNORE_setsockopt
+#define __IGNORE_getsockname
+#define __IGNORE_getpeername
+#define __IGNORE_sendto
+#define __IGNORE_sendmsg
+#define __IGNORE_recvfrom
+#define __IGNORE_recvmsg
+#define __IGNORE_shutdown
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_ALARM
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 0000000..4a31356
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID 255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd: Operand to store register number
+ * @r64: String designation register in the format "%rN"
+ */
+.macro GR_NUM opd gr
+ \opd = REG_NUM_INVALID
+ .ifc \gr,%r0
+ \opd = 0
+ .endif
+ .ifc \gr,%r1
+ \opd = 1
+ .endif
+ .ifc \gr,%r2
+ \opd = 2
+ .endif
+ .ifc \gr,%r3
+ \opd = 3
+ .endif
+ .ifc \gr,%r4
+ \opd = 4
+ .endif
+ .ifc \gr,%r5
+ \opd = 5
+ .endif
+ .ifc \gr,%r6
+ \opd = 6
+ .endif
+ .ifc \gr,%r7
+ \opd = 7
+ .endif
+ .ifc \gr,%r8
+ \opd = 8
+ .endif
+ .ifc \gr,%r9
+ \opd = 9
+ .endif
+ .ifc \gr,%r10
+ \opd = 10
+ .endif
+ .ifc \gr,%r11
+ \opd = 11
+ .endif
+ .ifc \gr,%r12
+ \opd = 12
+ .endif
+ .ifc \gr,%r13
+ \opd = 13
+ .endif
+ .ifc \gr,%r14
+ \opd = 14
+ .endif
+ .ifc \gr,%r15
+ \opd = 15
+ .endif
+ .if \opd == REG_NUM_INVALID
+ .error "Invalid general-purpose register designation: \gr"
+ .endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v) (v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd: Operand to store register number
+ * @vxr: String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction. To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro VX_NUM opd vxr
+ \opd = REG_NUM_INVALID
+ .ifc \vxr,%v0
+ \opd = 0
+ .endif
+ .ifc \vxr,%v1
+ \opd = 1
+ .endif
+ .ifc \vxr,%v2
+ \opd = 2
+ .endif
+ .ifc \vxr,%v3
+ \opd = 3
+ .endif
+ .ifc \vxr,%v4
+ \opd = 4
+ .endif
+ .ifc \vxr,%v5
+ \opd = 5
+ .endif
+ .ifc \vxr,%v6
+ \opd = 6
+ .endif
+ .ifc \vxr,%v7
+ \opd = 7
+ .endif
+ .ifc \vxr,%v8
+ \opd = 8
+ .endif
+ .ifc \vxr,%v9
+ \opd = 9
+ .endif
+ .ifc \vxr,%v10
+ \opd = 10
+ .endif
+ .ifc \vxr,%v11
+ \opd = 11
+ .endif
+ .ifc \vxr,%v12
+ \opd = 12
+ .endif
+ .ifc \vxr,%v13
+ \opd = 13
+ .endif
+ .ifc \vxr,%v14
+ \opd = 14
+ .endif
+ .ifc \vxr,%v15
+ \opd = 15
+ .endif
+ .ifc \vxr,%v16
+ \opd = 16
+ .endif
+ .ifc \vxr,%v17
+ \opd = 17
+ .endif
+ .ifc \vxr,%v18
+ \opd = 18
+ .endif
+ .ifc \vxr,%v19
+ \opd = 19
+ .endif
+ .ifc \vxr,%v20
+ \opd = 20
+ .endif
+ .ifc \vxr,%v21
+ \opd = 21
+ .endif
+ .ifc \vxr,%v22
+ \opd = 22
+ .endif
+ .ifc \vxr,%v23
+ \opd = 23
+ .endif
+ .ifc \vxr,%v24
+ \opd = 24
+ .endif
+ .ifc \vxr,%v25
+ \opd = 25
+ .endif
+ .ifc \vxr,%v26
+ \opd = 26
+ .endif
+ .ifc \vxr,%v27
+ \opd = 27
+ .endif
+ .ifc \vxr,%v28
+ \opd = 28
+ .endif
+ .ifc \vxr,%v29
+ \opd = 29
+ .endif
+ .ifc \vxr,%v30
+ \opd = 30
+ .endif
+ .ifc \vxr,%v31
+ \opd = 31
+ .endif
+ .if \opd == REG_NUM_INVALID
+ .error "Invalid vector register designation: \vxr"
+ .endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb: Operand to store computed RXB value
+ * @v1: First vector register designated operand
+ * @v2: Second vector register designated operand
+ * @v3: Third vector register designated operand
+ * @v4: Fourth vector register designated operand
+ */
+.macro RXB rxb v1 v2=0 v3=0 v4=0
+ \rxb = 0
+ .if \v1 & 0x10
+ \rxb = \rxb | 0x08
+ .endif
+ .if \v2 & 0x10
+ \rxb = \rxb | 0x04
+ .endif
+ .if \v3 & 0x10
+ \rxb = \rxb | 0x02
+ .endif
+ .if \v4 & 0x10
+ \rxb = \rxb | 0x01
+ .endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m: Element size control
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXB m v1 v2=0 v3=0 v4=0
+ rxb = 0
+ RXB rxb, \v1, \v2, \v3, \v4
+ .byte (\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m: Element size control
+ * @opc: Opcode
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0
+ MRXB \m, \v1, \v2, \v3, \v4
+ .byte \opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro VGBM vr imm2
+ VX_NUM v1, \vr
+ .word (0xE700 | (VX_R(v1) << 4))
+ .word \imm2
+ MRXBOPC 0, 0x44, v1
+.endm
+.macro VZERO vxr
+ VGBM \vxr, 0
+.endm
+.macro VONE vxr
+ VGBM \vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro VLVG v, gr, disp, m
+ VX_NUM v1, \v
+ GR_NUM b2, "%r0"
+ GR_NUM r3, \gr
+ .word 0xE700 | (VX_R(v1) << 4) | r3
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x22, v1
+.endm
+.macro VLVGB v, gr, index, base
+ VLVG \v, \gr, \index, \base, 0
+.endm
+.macro VLVGH v, gr, index
+ VLVG \v, \gr, \index, 1
+.endm
+.macro VLVGF v, gr, index
+ VLVG \v, \gr, \index, 2
+.endm
+.macro VLVGG v, gr, index
+ VLVG \v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro VL v, disp, index="%r0", base
+ VX_NUM v1, \v
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | (VX_R(v1) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro VLEx vr1, disp, index="%r0", base, m3, opc
+ VX_NUM v1, \vr1
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | (VX_R(v1) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEB vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro VLEH vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro VLEF vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro VLEG vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro VLEIx vr1, imm2, m3, opc
+ VX_NUM v1, \vr1
+ .word 0xE700 | (VX_R(v1) << 4)
+ .word \imm2
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEIB vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x40
+.endm
+.macro VLEIH vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x41
+.endm
+.macro VLEIF vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x43
+.endm
+.macro VLEIG vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro VLGV gr, vr, disp, base="%r0", m
+ GR_NUM r1, \gr
+ GR_NUM b2, \base
+ VX_NUM v3, \vr
+ .word 0xE700 | (r1 << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x21, v3
+.endm
+.macro VLGVB gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 0
+.endm
+.macro VLGVH gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 1
+.endm
+.macro VLGVF gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 2
+.endm
+.macro VLGVG gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro VLM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro VSTM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro VPERM vr1, vr2, vr3, vr4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro VUPLL vr1, vr2, m3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word 0x0000
+ MRXBOPC \m3, 0xD4, v1, v2
+.endm
+.macro VUPLLB vr1, vr2
+ VUPLL \vr1, \vr2, 0
+.endm
+.macro VUPLLH vr1, vr2
+ VUPLL \vr1, \vr2, 1
+.endm
+.macro VUPLLF vr1, vr2
+ VUPLL \vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro VX vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC 0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro VGFM vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC \m4, 0xB4, v1, v2, v3
+.endm
+.macro VGFMB vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 0
+.endm
+.macro VGFMH vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 1
+.endm
+.macro VGFMF vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 2
+.endm
+.macro VGFMG vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro VGFMA vr1, vr2, vr3, vr4, m5
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12) | (\m5 << 8)
+ MRXBOPC VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro VGFMAB vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro VGFMAH vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro VGFMAF vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro VGFMAG vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro VSRLB vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC 0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 67878af..59d2bb4 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -204,9 +204,9 @@
#define __NR_statfs64 265
#define __NR_fstatfs64 266
#define __NR_remap_file_pages 267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind 268
+#define __NR_get_mempolicy 269
+#define __NR_set_mempolicy 270
#define __NR_mq_open 271
#define __NR_mq_unlink 272
#define __NR_mq_timedsend 273
@@ -223,7 +223,7 @@
#define __NR_inotify_init 284
#define __NR_inotify_add_watch 285
#define __NR_inotify_rm_watch 286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages 287
#define __NR_openat 288
#define __NR_mkdirat 289
#define __NR_mknodat 290
@@ -245,7 +245,7 @@
#define __NR_sync_file_range 307
#define __NR_tee 308
#define __NR_vmsplice 309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages 310
#define __NR_getcpu 311
#define __NR_epoll_pwait 312
#define __NR_utimes 313
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ffb8761..b756c63 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -28,6 +28,17 @@
CFLAGS_sysinfo.o += -w
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o += -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a2da259..48c9af7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,6 +28,9 @@
DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK();
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+ DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
+ DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
+ DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index fe8d692..eb46642 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -153,33 +153,14 @@
/* Store registers needed to create the signal frame */
static void store_sigregs(void)
{
- int i;
-
save_access_regs(current->thread.acrs);
- save_fp_ctl(¤t->thread.fp_regs.fpc);
- if (current->thread.vxrs) {
- save_vx_regs(current->thread.vxrs);
- for (i = 0; i < __NUM_FPRS; i++)
- current->thread.fp_regs.fprs[i] =
- *(freg_t *)(current->thread.vxrs + i);
- } else
- save_fp_regs(current->thread.fp_regs.fprs);
+ save_fpu_regs();
}
/* Load registers after signal return */
static void load_sigregs(void)
{
- int i;
-
restore_access_regs(current->thread.acrs);
- /* restore_fp_ctl is done in restore_sigregs */
- if (current->thread.vxrs) {
- for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(current->thread.vxrs + i) =
- current->thread.fp_regs.fprs[i];
- restore_vx_regs(current->thread.vxrs);
- } else
- restore_fp_regs(current->thread.fp_regs.fprs);
}
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +177,7 @@
user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs,
- sizeof(user_sregs.fpregs));
+ fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
return -EFAULT;
return 0;
@@ -217,8 +197,8 @@
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
return -EINVAL;
- /* Loading the floating-point-control word can fail. Do that first. */
- if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
return -EINVAL;
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -235,9 +215,7 @@
regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
memcpy(¤t->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
-
- memcpy(¤t->thread.fp_regs, &user_sregs.fpregs,
- sizeof(current->thread.fp_regs));
+ fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -258,13 +236,13 @@
return -EFAULT;
/* Save vector registers to signal stack */
- if (current->thread.vxrs) {
+ if (is_vx_task(current)) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.vxrs + __NUM_VXRS_LOW,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -286,15 +264,15 @@
*(__u32 *)®s->gprs[i] = gprs_high[i];
/* Restore vector registers from signal stack */
- if (current->thread.vxrs) {
+ if (is_vx_task(current)) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
}
return 0;
}
@@ -308,6 +286,7 @@
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
set_current_blocked(&set);
+ save_fpu_regs();
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -330,6 +309,7 @@
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ save_fpu_regs();
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -472,7 +452,7 @@
*/
uc_flags = UC_GPRS_HIGH;
if (MACHINE_HAS_VX) {
- if (current->thread.vxrs)
+ if (is_vx_task(current))
uc_flags |= UC_VXRS;
} else
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 84062e7..247b7aa 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,6 +20,8 @@
#include <asm/page.h>
#include <asm/sigp.h>
#include <asm/irq.h>
+#include <asm/fpu-internal.h>
+#include <asm/vx-insn.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -46,10 +48,10 @@
_TIF_UPROBE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP)
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
.macro TRACE_IRQS_ON
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -73,38 +75,6 @@
#endif
.endm
- .macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
- tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
- jz .+8
- .insn s,0xb2800000,\newpp
-#endif
- .endm
-
- .macro HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
- tmhh %r8,0x0001 # interrupting from user ?
- jnz .+62
- lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_critical)
- clg \scratch,BASED(.Lsie_critical_length)
- .if \reason==1
- # Some program interrupts are suppressing (e.g. protection).
- # We must also check the instruction after SIE in that case.
- # do_protection_exception will rewind to .Lrewind_pad
- jh .+42
- .else
- jhe .+42
- .endif
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- larl %r9,sie_exit # skip forward to sie_exit
- mvi __SF_EMPTY+31(%r15),\reason # set exit reason
-#endif
- .endm
-
.macro CHECK_STACK stacksize,savearea
#ifdef CONFIG_CHECK_STACK
tml %r15,\stacksize - CONFIG_STACK_GUARD
@@ -113,7 +83,7 @@
#endif
.endm
- .macro SWITCH_ASYNC savearea,stack,shift
+ .macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
lgr %r14,%r9
@@ -124,26 +94,28 @@
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f
-0: lg %r14,\stack # are we already on the target stack?
+0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
slgr %r14,%r15
- srag %r14,%r14,\shift
- jnz 1f
- CHECK_STACK 1<<\shift,\savearea
+ srag %r14,%r14,STACK_SHIFT
+ jnz 2f
+ CHECK_STACK 1<<STACK_SHIFT,\savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: lg %r15,\stack # load target stack
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ j 3f
+1: LAST_BREAK %r14
+ UPDATE_VTIME %r14,%r15,\timer
+2: lg %r15,__LC_ASYNC_STACK # load async stack
+3: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
- .macro UPDATE_VTIME scratch,enter_timer
- lg \scratch,__LC_EXIT_TIMER
- slg \scratch,\enter_timer
- alg \scratch,__LC_USER_TIMER
- stg \scratch,__LC_USER_TIMER
- lg \scratch,__LC_LAST_UPDATE_TIMER
- slg \scratch,__LC_EXIT_TIMER
- alg \scratch,__LC_SYSTEM_TIMER
- stg \scratch,__LC_SYSTEM_TIMER
+ .macro UPDATE_VTIME w1,w2,enter_timer
+ lg \w1,__LC_EXIT_TIMER
+ lg \w2,__LC_LAST_UPDATE_TIMER
+ slg \w1,\enter_timer
+ slg \w2,__LC_EXIT_TIMER
+ alg \w1,__LC_USER_TIMER
+ alg \w2,__LC_SYSTEM_TIMER
+ stg \w1,__LC_USER_TIMER
+ stg \w2,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
@@ -197,6 +169,69 @@
br %r14
.L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ stg %r2,__SF_EMPTY(%r15) # save control block pointer
+ stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+ tm __LC_CPU_FLAGS+7,_CIF_FPU # load guest fp/vx registers ?
+ jno .Lsie_load_guest_gprs
+ brasl %r14,load_fpu_regs # load guest fp/vx regs
+.Lsie_load_guest_gprs:
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lg %r14,__LC_GMAP # get gmap pointer
+ ltgr %r14,%r14
+ jz .Lsie_gmap
+ lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
+.Lsie_gmap:
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),3 # last exit...
+ jnz .Lsie_skip
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ jo .Lsie_skip # exit if fp/vx regs changed
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
+ jz .Lsie_enter
+ .insn s,0xb2800000,__LC_CURRENT_PID # set guest id to pid
+.Lsie_enter:
+ sie 0(%r14)
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
+ jz .Lsie_skip
+ .insn s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+.Lsie_skip:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
+ lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
+ br %r14
+.Lsie_fault:
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
+
+ EX_TABLE(.Lrewind_pad,.Lsie_fault)
+ EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
/*
* SVC interrupt handler routine. System calls are synchronous events and
* are executed with interrupts enabled.
@@ -212,9 +247,9 @@
.Lsysc_per:
lg %r15,__LC_KERNEL_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-.Lsysc_vtime:
- UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
LAST_BREAK %r13
+.Lsysc_vtime:
+ UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -244,8 +279,6 @@
.Lsysc_return:
LOCKDEP_SYS_EXIT
.Lsysc_tif:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno .Lsysc_restore
tm __PT_FLAGS+7(%r11),_PIF_WORK
jnz .Lsysc_work
tm __TI_flags+7(%r12),_TIF_WORK
@@ -280,6 +313,8 @@
jo .Lsysc_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ jo .Lsysc_vxrs
tm __LC_CPU_FLAGS+7,_CIF_ASCE
jo .Lsysc_uaccess
j .Lsysc_return # beware of critical section cleanup
@@ -307,6 +342,13 @@
j .Lsysc_return
#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+ larl %r14,.Lsysc_return
+ jg load_fpu_regs
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
.Lsysc_sigpending:
@@ -405,28 +447,35 @@
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,1
tmhh %r8,0x0001 # test problem state bit
- jnz 1f # -> fault in user space
- tmhh %r8,0x4000 # PER bit set in old PSW ?
- jnz 0f # -> enabled, can't be a double fault
+ jnz 2f # -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+ # cleanup critical section for sie64a
+ lgr %r14,%r9
+ slg %r14,BASED(.Lsie_critical_start)
+ clg %r14,BASED(.Lsie_critical_length)
+ jhe 0f
+ brasl %r14,.Lcleanup_sie
+#endif
+0: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 1f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
- LAST_BREAK %r14
+ j 3f
+2: LAST_BREAK %r14
+ UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
lg %r15,__LC_KERNEL_STACK
lg %r14,__TI_task(%r12)
aghi %r14,__TASK_thread # pointer to thread_struct
lghi %r13,__LC_PGM_TDB
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
- jz 2f
+ jz 3f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+3: la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -435,24 +484,28 @@
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 0f
+ jz 4f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0: REENABLE_IRQS
+4: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
nill %r10,0x007f
sll %r10,2
- je .Lsysc_return
+ je .Lpgm_return
lgf %r1,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
basr %r14,%r1 # branch to interrupt-handler
- j .Lsysc_return
+.Lpgm_return:
+ LOCKDEP_SYS_EXIT
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lsysc_restore
+ j .Lsysc_tif
#
# PER event in supervisor state, must be kprobes
@@ -462,7 +515,7 @@
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_per_trap
- j .Lsysc_return
+ j .Lpgm_return
#
# single stepped system call
@@ -483,15 +536,9 @@
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,2
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user?
- jz .Lio_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-.Lio_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -587,6 +634,8 @@
jo .Lio_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ jo .Lio_vxrs
tm __LC_CPU_FLAGS+7,_CIF_ASCE
jo .Lio_uaccess
j .Lio_return # beware of critical section cleanup
@@ -609,6 +658,13 @@
j .Lio_return
#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+ larl %r14,.Lio_return
+ jg load_fpu_regs
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
@@ -652,15 +708,9 @@
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,3
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user ?
- jz .Lext_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-.Lext_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -690,6 +740,122 @@
br %r14
.Lpsw_idle_end:
+/* Store floating-point controls and floating-point or vector extension
+ * registers instead. A critical section cleanup assures that the registers
+ * are stored even if interrupted for some other work. The register %r2
+ * designates a struct fpu to store register contents. If the specified
+ * structure does not contain a register save area, the register store is
+ * omitted (see also comments in arch_dup_task_struct()).
+ *
+ * The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore
+ * of the register contents at system call or io return.
+ */
+ENTRY(save_fpu_regs)
+ lg %r2,__LC_CURRENT
+ aghi %r2,__TASK_thread
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ bor %r14
+ stfpc __THREAD_FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+ lg %r3,__THREAD_FPU_regs(%r2)
+ ltgr %r3,%r3
+ jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU
+ tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX
+ jz .Lsave_fpu_regs_fp # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+ VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+ VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
+ j .Lsave_fpu_regs_done # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+ std 0,0(%r3)
+ std 1,8(%r3)
+ std 2,16(%r3)
+ std 3,24(%r3)
+ std 4,32(%r3)
+ std 5,40(%r3)
+ std 6,48(%r3)
+ std 7,56(%r3)
+ std 8,64(%r3)
+ std 9,72(%r3)
+ std 10,80(%r3)
+ std 11,88(%r3)
+ std 12,96(%r3)
+ std 13,104(%r3)
+ std 14,112(%r3)
+ std 15,120(%r3)
+.Lsave_fpu_regs_done:
+ oi __LC_CPU_FLAGS+7,_CIF_FPU
+ br %r14
+.Lsave_fpu_regs_end:
+
+/* Load floating-point controls and floating-point or vector extension
+ * registers. A critical section cleanup assures that the register contents
+ * are loaded even if interrupted for some other work. Depending on the saved
+ * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ * %r15: <kernel stack>
+ * The function requires:
+ * %r4 and __SF_EMPTY+32(%r15)
+ */
+load_fpu_regs:
+ lg %r4,__LC_CURRENT
+ aghi %r4,__TASK_thread
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ bnor %r14
+ lfpc __THREAD_FPU_fpc(%r4)
+ stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
+ tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
+ lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
+ jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs
+.Lload_fpu_regs_vx_ctl:
+ tm __SF_EMPTY+32+5(%r15),2 # test VX control
+ jo .Lload_fpu_regs_vx
+ oi __SF_EMPTY+32+5(%r15),2 # set VX control
+ lctlg %c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_vx:
+ VLM %v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+ VLM %v16,%v31,256,%r4
+ j .Lload_fpu_regs_done
+.Lload_fpu_regs_fp_ctl:
+ tm __SF_EMPTY+32+5(%r15),2 # test VX control
+ jz .Lload_fpu_regs_fp
+ ni __SF_EMPTY+32+5(%r15),253 # clear VX control
+ lctlg %c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_fp:
+ ld 0,0(%r4)
+ ld 1,8(%r4)
+ ld 2,16(%r4)
+ ld 3,24(%r4)
+ ld 4,32(%r4)
+ ld 5,40(%r4)
+ ld 6,48(%r4)
+ ld 7,56(%r4)
+ ld 8,64(%r4)
+ ld 9,72(%r4)
+ ld 10,80(%r4)
+ ld 11,88(%r4)
+ ld 12,96(%r4)
+ ld 13,104(%r4)
+ ld 14,112(%r4)
+ ld 15,120(%r4)
+.Lload_fpu_regs_done:
+ ni __LC_CPU_FLAGS+7,255-_CIF_FPU
+ br %r14
+.Lload_fpu_regs_end:
+
+/* Test and set the vector enablement control in CR0.46 */
+ENTRY(__ctl_set_vx)
+ stctg %c0,%c0,__SF_EMPTY(%r15)
+ tm __SF_EMPTY+5(%r15),2
+ bor %r14
+ oi __SF_EMPTY+5(%r15),2
+ lctlg %c0,%c0,__SF_EMPTY(%r15)
+ br %r14
+.L__ctl_set_vx_end:
+
.L__critical_end:
/*
@@ -702,9 +868,8 @@
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
jo .Lmcck_panic # yes -> rest of mcck code invalid
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
@@ -725,11 +890,7 @@
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
jno .Lmcck_panic # no -> skip cleanup critical
- SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
- tm %r8,0x0001 # interrupting from user ?
- jz .Lmcck_skip
- UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
- LAST_BREAK %r14
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
@@ -764,12 +925,8 @@
lpswe __LC_RETURN_MCCK_PSW
.Lmcck_panic:
- lg %r14,__LC_PANIC_STACK
- slgr %r14,%r15
- srag %r14,%r14,PAGE_SHIFT
- jz 0f
lg %r15,__LC_PANIC_STACK
-0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j .Lmcck_skip
#
@@ -819,20 +976,13 @@
jg kernel_stack_overflow
#endif
- .align 8
-.Lcleanup_table:
- .quad system_call
- .quad .Lsysc_do_svc
- .quad .Lsysc_tif
- .quad .Lsysc_restore
- .quad .Lsysc_done
- .quad .Lio_tif
- .quad .Lio_restore
- .quad .Lio_done
- .quad psw_idle
- .quad .Lpsw_idle_end
-
cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+ clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+ jl .Lcleanup_sie
+#endif
clg %r9,BASED(.Lcleanup_table) # system_call
jl 0f
clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
@@ -853,8 +1003,54 @@
jl 0f
clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
jl .Lcleanup_idle
+ clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
+ jl .Lcleanup_save_fpu_regs
+ clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
+ jl .Lcleanup_load_fpu_regs
+ clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end
+ jl .Lcleanup___ctl_set_vx
0: br %r14
+ .align 8
+.Lcleanup_table:
+ .quad system_call
+ .quad .Lsysc_do_svc
+ .quad .Lsysc_tif
+ .quad .Lsysc_restore
+ .quad .Lsysc_done
+ .quad .Lio_tif
+ .quad .Lio_restore
+ .quad .Lio_done
+ .quad psw_idle
+ .quad .Lpsw_idle_end
+ .quad save_fpu_regs
+ .quad .Lsave_fpu_regs_end
+ .quad load_fpu_regs
+ .quad .Lload_fpu_regs_end
+ .quad __ctl_set_vx
+ .quad .L__ctl_set_vx_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+ .quad .Lsie_gmap
+ .quad .Lsie_done
+
+.Lcleanup_sie:
+ lg %r9,__SF_EMPTY(%r15) # get control block pointer
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
+ jz 0f
+ .insn s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+0: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ br %r14
+#endif
.Lcleanup_system_call:
# check if stpt has been executed
@@ -915,7 +1111,7 @@
.quad system_call
.quad .Lsysc_stmg
.quad .Lsysc_per
- .quad .Lsysc_vtime+18
+ .quad .Lsysc_vtime+36
.quad .Lsysc_vtime+42
.Lcleanup_sysc_tif:
@@ -981,6 +1177,145 @@
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
+.Lcleanup_save_fpu_regs:
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ bor %r14
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
+ jhe 5f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
+ jhe 4f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+ jhe 3f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+ jhe 2f
+ clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
+ jhe 1f
+ lg %r2,__LC_CURRENT
+0: # Store floating-point controls
+ stfpc __THREAD_FPU_fpc(%r2)
+1: # Load register save area and check if VX is active
+ lg %r3,__THREAD_FPU_regs(%r2)
+ ltgr %r3,%r3
+ jz 5f # no save area -> set CIF_FPU
+ tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX
+ jz 4f # no VX -> store FP regs
+2: # Store vector registers (V0-V15)
+ VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
+3: # Store vector registers (V16-V31)
+ VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
+ j 5f # -> done, set CIF_FPU flag
+4: # Store floating-point registers
+ std 0,0(%r3)
+ std 1,8(%r3)
+ std 2,16(%r3)
+ std 3,24(%r3)
+ std 4,32(%r3)
+ std 5,40(%r3)
+ std 6,48(%r3)
+ std 7,56(%r3)
+ std 8,64(%r3)
+ std 9,72(%r3)
+ std 10,80(%r3)
+ std 11,88(%r3)
+ std 12,96(%r3)
+ std 13,104(%r3)
+ std 14,112(%r3)
+ std 15,120(%r3)
+5: # Set CIF_FPU flag
+ oi __LC_CPU_FLAGS+7,_CIF_FPU
+ lg %r9,48(%r11) # return from save_fpu_regs
+ br %r14
+.Lcleanup_save_fpu_fpc_end:
+ .quad .Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+ .quad .Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+ .quad .Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+ .quad .Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+ .quad .Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+ tm __LC_CPU_FLAGS+7,_CIF_FPU
+ bnor %r14
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
+ jhe 1f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
+ jhe 2f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
+ jhe 3f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+ jhe 4f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
+ jhe 5f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
+ jhe 6f
+ lg %r4,__LC_CURRENT
+ lfpc __THREAD_FPU_fpc(%r4)
+ tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
+ lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
+ jz 3f # -> no VX, load FP regs
+6: # Set VX-enablement control
+ stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
+ tm __SF_EMPTY+32+5(%r15),2 # test VX control
+ jo 5f
+ oi __SF_EMPTY+32+5(%r15),2 # set VX control
+ lctlg %c0,%c0,__SF_EMPTY+32(%r15)
+5: # Load V0 ..V15 registers
+ VLM %v0,%v15,0,%r4
+4: # Load V16..V31 registers
+ VLM %v16,%v31,256,%r4
+ j 1f
+3: # Clear VX-enablement control for FP
+ stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
+ tm __SF_EMPTY+32+5(%r15),2 # test VX control
+ jz 2f
+ ni __SF_EMPTY+32+5(%r15),253 # clear VX control
+ lctlg %c0,%c0,__SF_EMPTY+32(%r15)
+2: # Load floating-point registers
+ ld 0,0(%r4)
+ ld 1,8(%r4)
+ ld 2,16(%r4)
+ ld 3,24(%r4)
+ ld 4,32(%r4)
+ ld 5,40(%r4)
+ ld 6,48(%r4)
+ ld 7,56(%r4)
+ ld 8,64(%r4)
+ ld 9,72(%r4)
+ ld 10,80(%r4)
+ ld 11,88(%r4)
+ ld 12,96(%r4)
+ ld 13,104(%r4)
+ ld 14,112(%r4)
+ ld 15,120(%r4)
+1: # Clear CIF_FPU bit
+ ni __LC_CPU_FLAGS+7,255-_CIF_FPU
+ lg %r9,48(%r11) # return from load_fpu_regs
+ br %r14
+.Lcleanup_load_fpu_regs_vx_ctl:
+ .quad .Lload_fpu_regs_vx_ctl
+.Lcleanup_load_fpu_regs_vx:
+ .quad .Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+ .quad .Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp_ctl:
+ .quad .Lload_fpu_regs_fp_ctl
+.Lcleanup_load_fpu_regs_fp:
+ .quad .Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+ .quad .Lload_fpu_regs_done
+
+.Lcleanup___ctl_set_vx:
+ stctg %c0,%c0,__SF_EMPTY(%r15)
+ tm __SF_EMPTY+5(%r15),2
+ bor %r14
+ oi __SF_EMPTY+5(%r15),2
+ lctlg %c0,%c0,__SF_EMPTY(%r15)
+ lg %r9,48(%r11) # return from __ctl_set_vx
+ br %r14
+
/*
* Integer constants
*/
@@ -989,62 +1324,11 @@
.quad .L__critical_start
.Lcritical_length:
.quad .L__critical_end - .L__critical_start
-
-
#if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
- stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- stg %r2,__SF_EMPTY(%r15) # save control block pointer
- stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
- tm __SIE_PROG20+3(%r14),3 # last exit...
- jnz .Lsie_done
- LPP __SF_EMPTY(%r15) # set guest id
- sie 0(%r14)
-.Lsie_done:
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
- nop 0
- .globl sie_exit
-sie_exit:
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
- br %r14
-.Lsie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
- j sie_exit
-
- .align 8
-.Lsie_critical:
+.Lsie_critical_start:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
-
- EX_TABLE(.Lrewind_pad,.Lsie_fault)
- EX_TABLE(sie_exit,.Lsie_fault)
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 59b7c64..1255c6c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -370,6 +370,7 @@
xc 0x200(256),0x200 # partially clear lowcore
xc 0x300(256),0x300
xc 0xe00(256),0xe00
+ lctlg %c0,%c15,0x200(%r0) # initialize control registers
stck __LC_LAST_UPDATE_CLOCK
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
@@ -413,9 +414,9 @@
# followed by the facility words.
#if defined(CONFIG_MARCH_Z13)
- .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+ .long 2, 0xc100eff2, 0xf46cc800
#elif defined(CONFIG_MARCH_ZEC12)
- .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+ .long 2, 0xc100eff2, 0xf46cc800
#elif defined(CONFIG_MARCH_Z196)
.long 2, 0xc100eff2, 0xf46c0000
#elif defined(CONFIG_MARCH_Z10)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a902996..c9dac21 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,12 +44,9 @@
unsigned char *ipn = (unsigned char *)new;
pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
- pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n",
- ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
- pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
- ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
- pr_emerg("New: %02x %02x %02x %02x %02x %02x\n",
- ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+ pr_emerg("Found: %6ph\n", ipc);
+ pr_emerg("Expected: %6ph\n", ipe);
+ pr_emerg("New: %6ph\n", ipn);
panic("Corrupted kernel text");
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 56b5508..0ae6f8e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,6 +21,7 @@
#include <asm/nmi.h>
#include <asm/crw.h>
#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
#include <asm/ctl_reg.h>
struct mcck_struct {
@@ -164,8 +165,12 @@
cr0.val = S390_lowcore.cregs_save_area[0];
cr0.afp = cr0.vx = 1;
__ctl_load(cr0.val, 0, 0);
- restore_vx_regs((__vector128 *)
- &S390_lowcore.vector_save_area);
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
+ : : "Q" (*(struct vx_array *)
+ &S390_lowcore.vector_save_area) : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
/* Revalidate access registers */
@@ -358,4 +363,4 @@
ctl_set_bit(14, 24); /* enable warning MCH */
return 0;
}
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index afe05bf..b973972 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,12 +1019,9 @@
break;
}
- /* The host-program-parameter (hpp) contains the sie control
- * block that is set by sie64a() in entry64.S. Check if hpp
- * refers to a valid control block and set sde_regs flags
- * accordingly. This would allow to use hpp values for other
- * purposes too.
- * For now, simply use a non-zero value as guest indicator.
+ /* The host-program-parameter (hpp) contains the pid of
+ * the CPU thread as set by sie64a() in entry.S.
+ * If non-zero assume a guest sample.
*/
if (sfr->basic.hpp)
sde_regs->in_guest = 1;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 8f587d8..f2dac9f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -81,8 +81,38 @@
void arch_release_task_struct(struct task_struct *tsk)
{
- if (tsk->thread.vxrs)
- kfree(tsk->thread.vxrs);
+ /* Free either the floating-point or the vector register save area */
+ kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+ *dst = *src;
+
+ /* Set up a new floating-point register save area */
+ dst->thread.fpu.fpc = 0;
+ dst->thread.fpu.flags = 0; /* Always start with VX disabled */
+ dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+ GFP_KERNEL|__GFP_REPEAT);
+ if (!dst->thread.fpu.fprs)
+ return -ENOMEM;
+
+ /*
+ * Save the floating-point or vector register state of the current
+ * task. The state is not saved for early kernel threads, for example,
+ * the init_task, which do not have an allocated save area.
+ * The CIF_FPU flag is set in any case to lazy clear or restore a saved
+ * state when switching to a different task or returning to user space.
+ */
+ save_fpu_regs();
+ dst->thread.fpu.fpc = current->thread.fpu.fpc;
+ if (is_vx_task(current))
+ convert_vx_to_fp(dst->thread.fpu.fprs,
+ current->thread.fpu.vxrs);
+ else
+ memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
+ sizeof(freg_t) * __NUM_FPRS);
+ return 0;
}
int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -142,11 +172,6 @@
p->thread.ri_signum = 0;
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Save the fpu registers to new thread structure. */
- save_fp_ctl(&p->thread.fp_regs.fpc);
- save_fp_regs(p->thread.fp_regs.fprs);
- p->thread.fp_regs.pad = 0;
- p->thread.vxrs = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
unsigned long tls = frame->childregs.gprs[6];
@@ -162,7 +187,7 @@
asmlinkage void execve_tail(void)
{
- current->thread.fp_regs.fpc = 0;
+ current->thread.fpu.fpc = 0;
asm volatile("sfpc %0" : : "d" (0));
}
@@ -171,8 +196,15 @@
*/
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
{
- save_fp_ctl(&fpregs->fpc);
- save_fp_regs(fpregs->fprs);
+ save_fpu_regs();
+ fpregs->fpc = current->thread.fpu.fpc;
+ fpregs->pad = 0;
+ if (is_vx_task(current))
+ convert_vx_to_fp((freg_t *)&fpregs->fprs,
+ current->thread.fpu.vxrs);
+ else
+ memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+ sizeof(fpregs->fprs));
return 1;
}
EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index dc488e1..e6e077a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -41,6 +41,15 @@
}
/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
+/*
* show_cpuinfo - Get information on one CPU for use by procfs.
*/
static int show_cpuinfo(struct seq_file *m, void *v)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index d363c9c..8b1c8e3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -45,39 +45,27 @@
struct per_regs old, new;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+ if (MACHINE_HAS_TE) {
unsigned long cr, cr_new;
__ctl_store(cr, 0, 0);
- cr_new = cr;
- if (MACHINE_HAS_TE) {
- /* Set or clear transaction execution TXC bit 8. */
- cr_new |= (1UL << 55);
- if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
- }
- if (MACHINE_HAS_VX) {
- /* Enable/disable of vector extension */
- cr_new &= ~(1UL << 17);
- if (task->thread.vxrs)
- cr_new |= (1UL << 17);
- }
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new = cr | (1UL << 55);
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr_new &= ~(1UL << 55);
if (cr_new != cr)
__ctl_load(cr_new, 0, 0);
- if (MACHINE_HAS_TE) {
- /* Set/clear transaction execution TDC bits 62/63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
- if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
- if (task->thread.per_flags &
- PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
- else
- cr_new |= 2UL;
- }
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
}
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
}
/* Copy user specified PER registers */
new.control = thread->per_user.control;
@@ -242,21 +230,21 @@
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fp_regs.fpc;
+ tmp = child->thread.fpu.fpc;
tmp <<= BITS_PER_LONG - 32;
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (is_vx_task(child))
tmp = *(addr_t *)
- ((addr_t) child->thread.vxrs + 2*offset);
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
else
tmp = *(addr_t *)
- ((addr_t) &child->thread.fp_regs.fprs + offset);
+ ((addr_t) &child->thread.fpu.fprs + offset);
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -387,20 +375,20 @@
if ((unsigned int) data != 0 ||
test_fp_ctl(data >> (BITS_PER_LONG - 32)))
return -EINVAL;
- child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+ child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (is_vx_task(child))
*(addr_t *)((addr_t)
- child->thread.vxrs + 2*offset) = data;
+ child->thread.fpu.vxrs + 2*offset) = data;
else
*(addr_t *)((addr_t)
- &child->thread.fp_regs.fprs + offset) = data;
+ &child->thread.fpu.fprs + offset) = data;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -621,20 +609,20 @@
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fp_regs.fpc;
+ tmp = child->thread.fpu.fpc;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (is_vx_task(child))
tmp = *(__u32 *)
- ((addr_t) child->thread.vxrs + 2*offset);
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
else
tmp = *(__u32 *)
- ((addr_t) &child->thread.fp_regs.fprs + offset);
+ ((addr_t) &child->thread.fpu.fprs + offset);
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
@@ -746,20 +734,20 @@
*/
if (test_fp_ctl(tmp))
return -EINVAL;
- child->thread.fp_regs.fpc = data;
+ child->thread.fpu.fpc = data;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (is_vx_task(child))
*(__u32 *)((addr_t)
- child->thread.vxrs + 2*offset) = tmp;
+ child->thread.fpu.vxrs + 2*offset) = tmp;
else
*(__u32 *)((addr_t)
- &child->thread.fp_regs.fprs + offset) = tmp;
+ &child->thread.fpu.fprs + offset) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
@@ -952,18 +940,16 @@
const struct user_regset *regset, unsigned int pos,
unsigned int count, void *kbuf, void __user *ubuf)
{
- if (target == current) {
- save_fp_ctl(&target->thread.fp_regs.fpc);
- save_fp_regs(target->thread.fp_regs.fprs);
- } else if (target->thread.vxrs) {
- int i;
+ _s390_fp_regs fp_regs;
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- target->thread.fp_regs.fprs[i] =
- *(freg_t *)(target->thread.vxrs + i);
- }
+ if (target == current)
+ save_fpu_regs();
+
+ fp_regs.fpc = target->thread.fpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.fpu);
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_regs, 0, -1);
+ &fp_regs, 0, -1);
}
static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +958,33 @@
const void __user *ubuf)
{
int rc = 0;
+ freg_t fprs[__NUM_FPRS];
- if (target == current) {
- save_fp_ctl(&target->thread.fp_regs.fpc);
- save_fp_regs(target->thread.fp_regs.fprs);
- }
+ if (target == current)
+ save_fpu_regs();
/* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+ u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
return -EINVAL;
- target->thread.fp_regs.fpc = ufpc[0];
+ target->thread.fpu.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.fp_regs.fprs,
- offsetof(s390_fp_regs, fprs), -1);
+ fprs, offsetof(s390_fp_regs, fprs), -1);
+ if (rc)
+ return rc;
- if (rc == 0) {
- if (target == current) {
- restore_fp_ctl(&target->thread.fp_regs.fpc);
- restore_fp_regs(target->thread.fp_regs.fprs);
- } else if (target->thread.vxrs) {
- int i;
-
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- *(freg_t *)(target->thread.vxrs + i) =
- target->thread.fp_regs.fprs[i];
- }
- }
+ if (is_vx_task(target))
+ convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+ else
+ memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
return rc;
}
@@ -1069,11 +1047,11 @@
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (target->thread.vxrs) {
+ if (is_vx_task(target)) {
if (target == current)
- save_vx_regs(target->thread.vxrs);
+ save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+ vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
} else
memset(vxrs, 0, sizeof(vxrs));
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1089,20 +1067,17 @@
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (!target->thread.vxrs) {
+ if (!is_vx_task(target)) {
rc = alloc_vector_registers(target);
if (rc)
return rc;
} else if (target == current)
- save_vx_regs(target->thread.vxrs);
+ save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
- if (rc == 0) {
+ if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
- if (target == current)
- restore_vx_regs(target->thread.vxrs);
- }
+ *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
return rc;
}
@@ -1116,10 +1091,10 @@
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (target->thread.vxrs) {
+ if (is_vx_task(target)) {
if (target == current)
- save_vx_regs(target->thread.vxrs);
- memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+ save_fpu_regs();
+ memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(vxrs));
} else
memset(vxrs, 0, sizeof(vxrs));
@@ -1135,18 +1110,15 @@
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (!target->thread.vxrs) {
+ if (!is_vx_task(target)) {
rc = alloc_vector_registers(target);
if (rc)
return rc;
} else if (target == current)
- save_vx_regs(target->thread.vxrs);
+ save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
- if (rc == 0 && target == current)
- restore_vx_regs(target->thread.vxrs);
-
+ target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
return rc;
}
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9f60467..5090d3d 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,5 +1,6 @@
#include <linux/module.h>
#include <linux/kvm_host.h>
+#include <asm/fpu-internal.h>
#include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,8 @@
#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(sie64a);
EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(__ctl_set_vx);
#endif
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
deleted file mode 100644
index ada0c07..0000000
--- a/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
-LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
-LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter
-LC_EXT_INT_CODE = 0x86 # addr of ext int code
-LC_AR_MODE_ID = 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-# R2 = 0 on interrupt, 2 on timeout
-# R3 = external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
- stm %r6,%r15,24(%r15) # save registers
- basr %r13,0 # get base register
-.LbaseS1:
- ahi %r15,-96 # create stack frame
- la %r8,LC_EXT_NEW_PSW # register int handler
- la %r9,.LextpswS1-.LbaseS1(%r13)
- tm LC_AR_MODE_ID,1
- jno .Lesa1
- la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit
- la %r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
- mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8)
- mvc 0(16,%r8),0(%r9)
- epsw %r6,%r7 # set current addressing mode
- nill %r6,0x1 # in new psw (31 or 64 bit mode)
- nilh %r7,0x8000
- stm %r6,%r7,0(%r8)
- lhi %r6,0x0200 # cr mask for ext int (cr0.54)
- ltr %r2,%r2
- jz .LsetctS1
- ahi %r6,0x0800 # cr mask for clock int (cr0.52)
- stck .LtimeS1-.LbaseS1(%r13) # initiate timeout
- al %r2,.LtimeS1-.LbaseS1(%r13)
- st %r2,.LtimeS1-.LbaseS1(%r13)
- sckc .LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
- stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts
- l %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r1,~(0x200 | 0x800) # clear old values
- nr %r1,%r0
- or %r1,%r6 # set new value
- st %r1,.LctlS1-.LbaseS1(%r13)
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13)
- st %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r2,2 # return code for timeout
-.LloopS1:
- lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
-.LwaitS1:
- lh %r7,LC_EXT_INT_CODE
- chi %r7,EXT_IRQ_CLK_COMP # timeout?
- je .LtimeoutS1
- chi %r7,EXT_IRQ_SERVICE_SIG # service int?
- jne .LloopS1
- sr %r2,%r2
- l %r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting
- # restore old handler
- mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
- lm %r6,%r15,120(%r15) # restore registers
- br %r14 # return to caller
-
- .align 8
-.LoldpswS1:
- .long 0, 0, 0, 0 # old ext int PSW
-.LextpswS1:
- .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int
-.LextpswS1_64:
- .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit
-.LwaitpswS1:
- .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int
-.LtimeS1:
- .quad 0 # current time
-.LctlS1:
- .long 0 # CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-# R2 = command word
-# R3 = sccb address
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-# R3 = sccb response code if R2 = 0
-#
-
-_sclp_servc:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- lr %r6,%r2 # save command word
- lr %r7,%r3 # save sccb address
-.LretryS2:
- lhi %r2,1 # error return code
- .insn rre,0xb2200000,%r6,%r7 # servc
- brc 1,.LendS2 # exit if not operational
- brc 8,.LnotbusyS2 # go on if not busy
- sr %r2,%r2 # wait until no longer busy
- bras %r14,_sclp_wait_int
- j .LretryS2 # retry
-.LnotbusyS2:
- sr %r2,%r2 # wait until result
- bras %r14,_sclp_wait_int
- sr %r2,%r2
- lh %r3,6(%r7)
-.LendS2:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-# R2 = 0 to activate, non-zero to deactivate
-#
-# Returns:
-# R2 = 0 on success, non-zero on failure
-#
-
-_sclp_setup:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS3:
- l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb
- mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
- ltr %r2,%r2 # initialization?
- jz .LdoinitS3 # go ahead
- # clear masks
- xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
- l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
- lr %r3,%r6 # get sccb address
- bras %r14,_sclp_servc # issue service call
- ltr %r2,%r2 # servc successful?
- jnz .LerrorS3
- chi %r3,0x20 # write mask successful?
- jne .LerrorS3
- # check masks
- la %r2,.LinitmaskS3-.LinitsccbS3(%r6)
- l %r1,0(%r2) # receive mask ok?
- n %r1,12(%r2)
- cl %r1,0(%r2)
- jne .LerrorS3
- l %r1,4(%r2) # send mask ok?
- n %r1,8(%r2)
- cl %r1,4(%r2)
- sr %r2,%r2
- je .LendS3
-.LerrorS3:
- lhi %r2,1 # error return code
-.LendS3:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-.LwritemaskS3:
- .long 0x00780005 # SCLP command for write mask
-.LinitsccbS3:
- .word .LinitendS3-.LinitsccbS3
- .byte 0,0,0,0
- .word 0
- .word 0
- .word 4
-.LinitmaskS3:
- .long 0x80000000
- .long 0x40000000
- .long 0
- .long 0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-_sclp_print:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS4:
- l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb
- mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
- la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr
- sr %r0,%r0
- l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table
-.LinitmtoS4:
- # initialize mto
- mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
- lhi %r6,.LmtoendS4-.LmtoS4 # current mto length
-.LloopS4:
- ic %r0,0(%r2) # get character
- ahi %r2,1
- ltr %r0,%r0 # end of string?
- jz .LfinalizemtoS4
- chi %r0,0x0a # end of line (NL)?
- jz .LfinalizemtoS4
- stc %r0,0(%r6,%r7) # copy to mto
- la %r11,0(%r6,%r7)
- tr 0(1,%r11),0(%r10) # translate to EBCDIC
- ahi %r6,1
- j .LloopS4
-.LfinalizemtoS4:
- sth %r6,0(%r7) # update mto length
- lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length
- ar %r9,%r6
- sth %r9,.LmdbS4-.LwritesccbS4(%r8)
- lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
- ar %r9,%r6
- sth %r9,.LevbufS4-.LwritesccbS4(%r8)
- lh %r9,0(%r8) # update sccb length
- ar %r9,%r6
- sth %r9,0(%r8)
- ar %r7,%r6 # update current mto address
- ltr %r0,%r0 # more characters?
- jnz .LinitmtoS4
- l %r2,.LwritedataS4-.LbaseS4(%r13)# write data
- lr %r3,%r8
- bras %r14,_sclp_servc
- ltr %r2,%r2 # servc successful?
- jnz .LendS4
- chi %r3,0x20 # write data successful?
- je .LendS4
- lhi %r2,1 # error return code
-.LendS4:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- tm LC_AR_MODE_ID,1
- jno .Lesa2
- ahi %r15,-80
- stmh %r6,%r15,96(%r15) # store upper register halves
- basr %r13,0
- lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves
-.Lesa2:
- lr %r10,%r2 # save string pointer
- lhi %r2,0
- bras %r14,_sclp_setup # enable console
- ltr %r2,%r2
- jnz .LendS5
- lr %r2,%r10
- bras %r14,_sclp_print # print string
- ltr %r2,%r2
- jnz .LendS5
- lhi %r2,1
- bras %r14,_sclp_setup # disable console
-.LendS5:
- tm LC_AR_MODE_ID,1
- jno .Lesa3
- lgfr %r2,%r2 # sign extend return value
- lmh %r6,%r15,96(%r15) # restore upper register halves
- ahi %r15,80
-.Lesa3:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-.Lzeroes:
- .fill 64,4,0
-
-.LwritedataS4:
- .long 0x00760005 # SCLP command for write data
-.LwritesccbS4:
- # sccb
- .word .LmtoS4-.LwritesccbS4
- .byte 0
- .byte 0,0,0
- .word 0
-
- # evbuf
-.LevbufS4:
- .word .LmtoS4-.LevbufS4
- .byte 0x02
- .byte 0
- .word 0
-
-.LmdbS4:
- # mdb
- .word .LmtoS4-.LmdbS4
- .word 1
- .long 0xd4c4c240
- .long 1
-
- # go
-.LgoS4:
- .word .LmtoS4-.LgoS4
- .word 1
- .long 0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0
- .byte 0
- .byte 0,0,0,0,0,0,0
- .byte 0
- .word 0
- .byte 0,0,0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
-
-.LmtoS4:
- .word .LmtoendS4-.LmtoS4
- .word 4
- .word 0x1000
- .byte 0
- .byte 0,0,0
-.LmtoendS4:
-
- # Global constants
-.LsccbS0:
- .long _sclp_work_area
-.Lascebc:
- .long _ascebc
-
-.section .data,"aw",@progbits
- .balign 4096
-_sclp_work_area:
- .fill 4096
-.previous
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
new file mode 100644
index 0000000..fa0bdff
--- /dev/null
+++ b/arch/s390/kernel/sclp.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+ unsigned long cr0, cr0_new, psw_mask, addr;
+ psw_t psw_ext_save, psw_wait;
+
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 | 0x200;
+ __ctl_load(cr0_new, 0, 0);
+
+ psw_ext_save = S390_lowcore.external_new_psw;
+ psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+ S390_lowcore.external_new_psw.mask = psw_mask;
+ psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+ S390_lowcore.ext_int_code = 0;
+
+ do {
+ asm volatile(
+ " larl %[addr],0f\n"
+ " stg %[addr],%[psw_wait_addr]\n"
+ " stg %[addr],%[psw_ext_addr]\n"
+ " lpswe %[psw_wait]\n"
+ "0:\n"
+ : [addr] "=&d" (addr),
+ [psw_wait_addr] "=Q" (psw_wait.addr),
+ [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+ : [psw_wait] "Q" (psw_wait)
+ : "cc", "memory");
+ } while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+ __ctl_load(cr0, 0, 0);
+ S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+ unsigned int cc;
+
+ do {
+ asm volatile(
+ " .insn rre,0xb2200000,%1,%2\n"
+ " ipm %0\n"
+ : "=d" (cc) : "d" (cmd), "a" (sccb)
+ : "cc", "memory");
+ cc >>= 28;
+ if (cc == 3)
+ return -EINVAL;
+ _sclp_wait_int();
+ } while (cc != 0);
+ return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+ static unsigned char init_sccb[] = {
+ 0x00, 0x1c,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x04,
+ 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ unsigned int *masks;
+ int rc;
+
+ memcpy(_sclp_work_area, init_sccb, 28);
+ masks = (unsigned int *)(_sclp_work_area + 12);
+ if (disable)
+ memset(masks, 0, 16);
+ /* SCLP write mask */
+ rc = _sclp_servc(0x00780005, _sclp_work_area);
+ if (rc)
+ return rc;
+ if ((masks[0] & masks[3]) != masks[0] ||
+ (masks[1] & masks[2]) != masks[1])
+ return -EIO;
+ return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+ static unsigned char write_head[] = {
+ /* sccb header */
+ 0x00, 0x52, /* 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */
+ /* evbuf */
+ 0x00, 0x4a, /* 8 */
+ 0x02, 0x00, 0x00, 0x00, /* 10 */
+ /* mdb */
+ 0x00, 0x44, /* 14 */
+ 0x00, 0x01, /* 16 */
+ 0xd4, 0xc4, 0xc2, 0x40, /* 18 */
+ 0x00, 0x00, 0x00, 0x01, /* 22 */
+ /* go */
+ 0x00, 0x38, /* 26 */
+ 0x00, 0x01, /* 28 */
+ 0x00, 0x00, 0x00, 0x00, /* 30 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 34 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 42 */
+ 0x00, 0x00, 0x00, 0x00, /* 50 */
+ 0x00, 0x00, /* 54 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 64 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 72 */
+ 0x00, 0x00, /* 80 */
+ };
+ static unsigned char write_mto[] = {
+ /* mto */
+ 0x00, 0x0a, /* 0 */
+ 0x00, 0x04, /* 2 */
+ 0x10, 0x00, /* 4 */
+ 0x00, 0x00, 0x00, 0x00 /* 6 */
+ };
+ unsigned char *ptr, ch;
+ unsigned int count;
+
+ memcpy(_sclp_work_area, write_head, sizeof(write_head));
+ ptr = _sclp_work_area + sizeof(write_head);
+ do {
+ memcpy(ptr, write_mto, sizeof(write_mto));
+ for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+ if (ch == 0x0a)
+ break;
+ ptr[count] = _ascebc[ch];
+ }
+ /* Update length fields in mto, mdb, evbuf and sccb */
+ *(unsigned short *) ptr = count;
+ *(unsigned short *)(_sclp_work_area + 14) += count;
+ *(unsigned short *)(_sclp_work_area + 8) += count;
+ *(unsigned short *)(_sclp_work_area + 0) += count;
+ ptr += count;
+ } while (ch != 0);
+
+ /* SCLP write data */
+ return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+ int rc;
+
+ rc = _sclp_setup(0);
+ if (rc)
+ return rc;
+ rc = _sclp_print(str);
+ if (rc)
+ return rc;
+ return _sclp_setup(1);
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ca070d2..ce0cbd6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -62,6 +62,7 @@
#include <asm/os_info.h>
#include <asm/sclp.h>
#include <asm/sysinfo.h>
+#include <asm/numa.h>
#include "entry.h"
/*
@@ -76,7 +77,7 @@
unsigned int console_irq = -1;
EXPORT_SYMBOL(console_irq);
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
char elf_platform[ELF_PLATFORM_SIZE];
int __initdata memory_end_set;
@@ -688,7 +689,7 @@
/*
* Setup hardware capabilities.
*/
-static void __init setup_hwcaps(void)
+static int __init setup_hwcaps(void)
{
static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
struct cpuid cpu_id;
@@ -754,9 +755,11 @@
elf_hwcap |= HWCAP_S390_TE;
/*
- * Vector extension HWCAP_S390_VXRS is bit 11.
+ * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+ * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+ * instead of facility bit 129.
*/
- if (test_facility(129))
+ if (MACHINE_HAS_VX)
elf_hwcap |= HWCAP_S390_VXRS;
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
@@ -793,7 +796,9 @@
strcpy(elf_platform, "z13");
break;
}
+ return 0;
}
+arch_initcall(setup_hwcaps);
/*
* Add system information as device randomness
@@ -879,11 +884,7 @@
setup_lowcore();
smp_fill_possible_mask();
cpu_init();
-
- /*
- * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
- */
- setup_hwcaps();
+ numa_setup();
/*
* Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c551f22..9549af1 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -105,32 +105,13 @@
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fp_ctl(¤t->thread.fp_regs.fpc);
- if (current->thread.vxrs) {
- int i;
-
- save_vx_regs(current->thread.vxrs);
- for (i = 0; i < __NUM_FPRS; i++)
- current->thread.fp_regs.fprs[i] =
- *(freg_t *)(current->thread.vxrs + i);
- } else
- save_fp_regs(current->thread.fp_regs.fprs);
+ save_fpu_regs();
}
/* Load registers after signal return */
static void load_sigregs(void)
{
restore_access_regs(current->thread.acrs);
- /* restore_fp_ctl is done in restore_sigregs */
- if (current->thread.vxrs) {
- int i;
-
- for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(current->thread.vxrs + i) =
- current->thread.fp_regs.fprs[i];
- restore_vx_regs(current->thread.vxrs);
- } else
- restore_fp_regs(current->thread.fp_regs.fprs);
}
/* Returns non-zero on fault. */
@@ -146,8 +127,7 @@
memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs,
- sizeof(user_sregs.fpregs));
+ fpregs_store(&user_sregs.fpregs, ¤t->thread.fpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
return -EFAULT;
return 0;
@@ -166,8 +146,8 @@
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
return -EINVAL;
- /* Loading the floating-point-control word can fail. Do that first. */
- if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
return -EINVAL;
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -185,8 +165,7 @@
memcpy(¤t->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- memcpy(¤t->thread.fp_regs, &user_sregs.fpregs,
- sizeof(current->thread.fp_regs));
+ fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -200,13 +179,13 @@
int i;
/* Save vector registers to signal stack */
- if (current->thread.vxrs) {
+ if (is_vx_task(current)) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.vxrs + __NUM_VXRS_LOW,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -220,15 +199,15 @@
int i;
/* Restore vector registers from signal stack */
- if (current->thread.vxrs) {
+ if (is_vx_task(current)) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
}
return 0;
}
@@ -243,6 +222,7 @@
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
+ save_fpu_regs();
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -266,6 +246,7 @@
set_current_blocked(&set);
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ save_fpu_regs();
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -400,7 +381,7 @@
uc_flags = 0;
if (MACHINE_HAS_VX) {
frame_size += sizeof(_sigregs_ext);
- if (current->thread.vxrs)
+ if (is_vx_task(current))
uc_flags |= UC_VXRS;
}
frame = get_sigframe(&ksig->ka, regs, frame_size);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6f54c17..c6355e6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -532,8 +532,8 @@
#ifdef CONFIG_CRASH_DUMP
-static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
- int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+ u16 address, int is_boot_cpu)
{
void *lc = (void *)(unsigned long) store_prefix();
unsigned long vx_sa;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 1acad02..f3f4a13 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -276,9 +276,9 @@
SYSCALL(sys_statfs64,compat_sys_statfs64)
SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL /* 268 sys_mbind */
-NI_SYSCALL /* 269 sys_get_mempolicy */
-NI_SYSCALL /* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
SYSCALL(sys_mq_open,compat_sys_mq_open)
SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -295,7 +295,7 @@
SYSCALL(sys_inotify_init,sys_inotify_init)
SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL /* 287 sys_migrate_pages */
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
SYSCALL(sys_openat,compat_sys_openat)
SYSCALL(sys_mkdirat,compat_sys_mkdirat)
SYSCALL(sys_mknodat,compat_sys_mknodat) /* 290 */
@@ -318,7 +318,7 @@
SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
SYSCALL(sys_tee,compat_sys_tee)
SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL /* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
SYSCALL(sys_getcpu,compat_sys_getcpu)
SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
SYSCALL(sys_utimes,compat_sys_utimes)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5728c5b..bf05e7f 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -18,7 +18,10 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
#include <asm/sysinfo.h>
+#include <asm/numa.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -37,8 +40,10 @@
static int topology_enabled = 1;
static DECLARE_WORK(topology_work, topology_work_fn);
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
static struct mask_info socket_info;
static struct mask_info book_info;
@@ -188,7 +193,6 @@
{
struct cpuid cpu_id;
- spin_lock_irq(&topology_lock);
get_cpu_id(&cpu_id);
clear_masks();
switch (cpu_id.machine) {
@@ -199,7 +203,6 @@
default:
__tl_to_masks_generic(info);
}
- spin_unlock_irq(&topology_lock);
}
static void topology_update_polarization_simple(void)
@@ -244,10 +247,8 @@
static void update_cpu_masks(void)
{
- unsigned long flags;
int cpu;
- spin_lock_irqsave(&topology_lock, flags);
for_each_possible_cpu(cpu) {
per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
@@ -259,7 +260,7 @@
per_cpu(cpu_topology, cpu).book_id = cpu;
}
}
- spin_unlock_irqrestore(&topology_lock, flags);
+ numa_update_cpu_topology();
}
void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +275,21 @@
{
struct sysinfo_15_1_x *info = tl_info;
struct device *dev;
- int cpu;
+ int cpu, rc = 0;
- if (!MACHINE_HAS_TOPOLOGY) {
- update_cpu_masks();
- topology_update_polarization_simple();
- return 0;
+ if (MACHINE_HAS_TOPOLOGY) {
+ rc = 1;
+ store_topology(info);
+ tl_to_masks(info);
}
- store_topology(info);
- tl_to_masks(info);
update_cpu_masks();
+ if (!MACHINE_HAS_TOPOLOGY)
+ topology_update_polarization_simple();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
- return 1;
+ return rc;
}
static void topology_work_fn(struct work_struct *work)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 7bea81d..9861613 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
#include "entry.h"
int show_unhandled_signals = 1;
@@ -151,7 +151,7 @@
DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
"transaction constraint exception")
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
/* FPC[2] is Data Exception Code */
@@ -227,7 +227,7 @@
int alloc_vector_registers(struct task_struct *tsk)
{
__vector128 *vxrs;
- int i;
+ freg_t *fprs;
/* Allocate vector register save area. */
vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -236,15 +236,13 @@
return -ENOMEM;
preempt_disable();
if (tsk == current)
- save_fp_regs(tsk->thread.fp_regs.fprs);
+ save_fpu_regs();
/* Copy the 16 floating point registers */
- for (i = 0; i < 16; i++)
- *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
- tsk->thread.vxrs = vxrs;
- if (tsk == current) {
- __ctl_set_bit(0, 17);
- restore_vx_regs(vxrs);
- }
+ convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+ fprs = tsk->thread.fpu.fprs;
+ tsk->thread.fpu.vxrs = vxrs;
+ tsk->thread.fpu.flags |= FPU_USE_VX;
+ kfree(fprs);
preempt_enable();
return 0;
}
@@ -259,8 +257,8 @@
}
/* get vector interrupt code from fpc */
- asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
- vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+ save_fpu_regs();
+ vic = (current->thread.fpu.fpc & 0xf00) >> 8;
switch (vic) {
case 1: /* invalid vector operation */
si_code = FPE_FLTINV;
@@ -297,22 +295,22 @@
location = get_trap_ip(regs);
- asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
+ save_fpu_regs();
/* Check for vector register enablement */
- if (MACHINE_HAS_VX && !current->thread.vxrs &&
- (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+ if (MACHINE_HAS_VX && !is_vx_task(current) &&
+ (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
alloc_vector_registers(current);
/* Vector data exception is suppressing, rewind psw. */
regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
clear_pt_regs_flag(regs, PIF_PER_TRAP);
return;
}
- if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+ if (current->thread.fpu.fpc & FPC_DXC_MASK)
signal = SIGFPE;
else
signal = SIGILL;
if (signal == SIGFPE)
- do_fp_trap(regs, current->thread.fp_regs.fpc);
+ do_fp_trap(regs, current->thread.fpu.fpc);
else if (signal)
do_trap(regs, signal, ILL_ILLOPN, "data exception");
}
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 8ad2b34..ee8a18e 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 2a8ddfd..c4b03f9 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e53d359..b9ce650 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -28,6 +28,7 @@
static DEFINE_PER_CPU(u64, mt_cycles[32]);
static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
static inline u64 get_vtimer(void)
{
@@ -85,7 +86,8 @@
S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
/* Do MT utilization calculation */
- if (smp_cpu_mtid) {
+ if (smp_cpu_mtid &&
+ time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
u64 cycles_new[32], *cycles_old;
u64 delta, mult, div;
@@ -105,6 +107,7 @@
sizeof(u64) * (smp_cpu_mtid + 1));
}
}
+ __this_cpu_write(mt_scaling_jiffies, jiffies_64);
}
user = S390_lowcore.user_timer - ti->user_timer;
@@ -376,4 +379,11 @@
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
+ /* Setup initial MT scaling values */
+ if (smp_cpu_mtid) {
+ __this_cpu_write(mt_scaling_jiffies, jiffies);
+ __this_cpu_write(mt_scaling_mult, 1);
+ __this_cpu_write(mt_scaling_div, 1);
+ stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+ }
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6861b74..98df53c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1283,21 +1283,54 @@
return 0;
}
+/*
+ * Backs up the current FP/VX register save area on a particular
+ * destination. Used to switch between different register save
+ * areas.
+ */
+static inline void save_fpu_to(struct fpu *dst)
+{
+ dst->fpc = current->thread.fpu.fpc;
+ dst->flags = current->thread.fpu.flags;
+ dst->regs = current->thread.fpu.regs;
+}
+
+/*
+ * Switches the FP/VX register save area from which to lazy
+ * restore register contents.
+ */
+static inline void load_fpu_from(struct fpu *from)
+{
+ current->thread.fpu.fpc = from->fpc;
+ current->thread.fpu.flags = from->flags;
+ current->thread.fpu.regs = from->regs;
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- if (test_kvm_facility(vcpu->kvm, 129))
- save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
- else
- save_fp_regs(vcpu->arch.host_fpregs.fprs);
- save_access_regs(vcpu->arch.host_acrs);
+ /* Save host register state */
+ save_fpu_regs();
+ save_fpu_to(&vcpu->arch.host_fpregs);
+
if (test_kvm_facility(vcpu->kvm, 129)) {
- restore_fp_ctl(&vcpu->run->s.regs.fpc);
- restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- } else {
- restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
- }
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ current->thread.fpu.flags = FPU_USE_VX;
+ /*
+ * Use the register save area in the SIE-control block
+ * for register restore and save in kvm_arch_vcpu_put()
+ */
+ current->thread.fpu.vxrs =
+ (__vector128 *)&vcpu->run->s.regs.vrs;
+ /* Always enable the vector extension for KVM */
+ __ctl_set_vx();
+ } else
+ load_fpu_from(&vcpu->arch.guest_fpregs);
+
+ if (test_fp_ctl(current->thread.fpu.fpc))
+ /* User space provided an invalid FPC, let's clear it */
+ current->thread.fpu.fpc = 0;
+
+ save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1307,19 +1340,22 @@
{
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
- if (test_kvm_facility(vcpu->kvm, 129)) {
- save_fp_ctl(&vcpu->run->s.regs.fpc);
- save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- } else {
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
- }
- save_access_regs(vcpu->run->s.regs.acrs);
- restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+
+ save_fpu_regs();
+
if (test_kvm_facility(vcpu->kvm, 129))
- restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+ /*
+ * kvm_arch_vcpu_load() set up the register save area to
+ * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+ * are already saved. Only the floating-point control must be
+ * copied.
+ */
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
else
- restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+ save_fpu_to(&vcpu->arch.guest_fpregs);
+ load_fpu_from(&vcpu->arch.host_fpregs);
+
+ save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
}
@@ -1464,7 +1500,6 @@
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
- vcpu->arch.host_vregs = &sie_page->vregs;
vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) {
@@ -1486,6 +1521,19 @@
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ /*
+ * Allocate a save area for floating-point registers. If the vector
+ * extension is available, register contents are saved in the SIE
+ * control block. The allocated save area is still required in
+ * particular places, for example, in kvm_s390_vcpu_store_status().
+ */
+ vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+ GFP_KERNEL);
+ if (!vcpu->arch.guest_fpregs.fprs) {
+ rc = -ENOMEM;
+ goto out_free_sie_block;
+ }
+
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
@@ -1708,16 +1756,16 @@
{
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_fpu_regs();
+ load_fpu_from(&vcpu->arch.guest_fpregs);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+ memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->arch.guest_fpregs.fpc;
return 0;
}
@@ -2268,8 +2316,21 @@
* copying in vcpu load/put. Lets update our copies before we save
* it into the save area
*/
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_fpu_regs();
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ /*
+ * If the vector extension is available, the vector registers
+ * which overlaps with floating-point registers are saved in
+ * the SIE-control block. Hence, extract the floating-point
+ * registers and the FPC value and store them in the
+ * guest_fpregs structure.
+ */
+ WARN_ON(!is_vx_task(current)); /* XXX remove later */
+ vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+ convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+ current->thread.fpu.vxrs);
+ } else
+ save_fpu_to(&vcpu->arch.guest_fpregs);
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2296,10 +2357,13 @@
/*
* The guest VXRS are in the host VXRs due to the lazy
- * copying in vcpu load/put. Let's update our copies before we save
- * it into the save area.
+ * copying in vcpu load/put. We can simply call save_fpu_regs()
+ * to save the current register state because we are in the
+ * middle of a load/put cycle.
+ *
+ * Let's update our copies before we save it into the save area.
*/
- save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+ save_fpu_regs();
return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
}
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 16dc42d..246a7eb 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -26,6 +26,7 @@
*/
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
+EXPORT_SYMBOL(__delay);
static void __udelay_disabled(unsigned long long usecs)
{
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 4614d41..0d002a7 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -370,22 +370,9 @@
}
EXPORT_SYMBOL(__strncpy_from_user);
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
- uaccess_primary = 1;
- return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
static int __init uaccess_init(void)
{
- if (!uaccess_primary && test_facility(27))
+ if (test_facility(27))
static_key_slow_inc(&have_mvcos);
return 0;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4c8f5d7..f985856 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -646,7 +646,7 @@
return;
inc_irq_stat(IRQEXT_PFL);
/* Get the token (= pid of the affected task). */
- pid = sizeof(void *) == 4 ? param32 : param64;
+ pid = param64;
rcu_read_lock();
tsk = find_task_by_pid_ns(pid, &init_pid_ns);
if (tsk)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1eb41bb..12bbf0e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@
do {
pte = *ptep;
barrier();
+ /* Similar to the PMD case, NUMA hinting must take slow path */
+ if (pte_protnone(pte))
+ return 0;
if ((pte_val(pte) & mask) != 0)
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_protnone(pmd))
+ return 0;
if (!gup_huge_pmd(pmdp, pmd, addr, next,
write, pages, nr))
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e8737..2963b56 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
#include <linux/initrd.h>
#include <linux/export.h>
#include <linux/gfp.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -138,7 +139,7 @@
cpumask_set_cpu(0, mm_cpumask(&init_mm));
atomic_set(&init_mm.context.attach_count, 1);
- max_mapnr = max_low_pfn;
+ set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* Setup guest page hinting */
@@ -170,37 +171,36 @@
#ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size)
{
- unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+ unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
- struct zone *zone;
- int rc;
+ unsigned long nr_pages;
+ int rc, zone_enum;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- for_each_zone(zone) {
- if (zone_idx(zone) != ZONE_MOVABLE) {
- /* Add range within existing zone limits */
- zone_start_pfn = zone->zone_start_pfn;
- zone_end_pfn = zone->zone_start_pfn +
- zone->spanned_pages;
+
+ while (size_pages > 0) {
+ if (start_pfn < dma_end_pfn) {
+ nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+ dma_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_DMA;
+ } else if (start_pfn < normal_end_pfn) {
+ nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+ normal_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_NORMAL;
} else {
- /* Add remaining range to ZONE_MOVABLE */
- zone_start_pfn = start_pfn;
- zone_end_pfn = start_pfn + size_pages;
+ nr_pages = size_pages;
+ zone_enum = ZONE_MOVABLE;
}
- if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
- continue;
- nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
- zone_end_pfn - start_pfn : size_pages;
- rc = __add_pages(nid, zone, start_pfn, nr_pages);
+ rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+ start_pfn, size_pages);
if (rc)
break;
start_pfn += nr_pages;
size_pages -= nr_pages;
- if (!size_pages)
- break;
}
if (rc)
vmem_remove_mapping(start, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b33f661..54ef3bc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
@@ -28,12 +24,9 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#define ALLOC_ORDER 2
-#define FRAG_MASK 0x03
-
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return NULL;
@@ -42,7 +35,7 @@
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- free_pages((unsigned long) table, ALLOC_ORDER);
+ free_pages((unsigned long) table, 2);
}
static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
goto out_free;
page->index = 0;
@@ -247,7 +240,7 @@
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@
}
spin_unlock(&gmap->mm->page_table_lock);
if (page)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
return 0;
}
@@ -795,40 +788,6 @@
}
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
-static inline int page_table_with_pgste(struct page *page)
-{
- return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- struct page *page;
- unsigned long *table;
-
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- atomic_set(&page->_mapcount, 0);
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
- struct page *page;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq)
{
@@ -957,20 +916,6 @@
#else /* CONFIG_PGSTE */
-static inline int page_table_with_pgste(struct page *page)
-{
- return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
unsigned long vmaddr)
{
@@ -994,44 +939,55 @@
*/
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *uninitialized_var(table);
- struct page *uninitialized_var(page);
+ unsigned long *table;
+ struct page *page;
unsigned int mask, bit;
- if (mm_alloc_pgste(mm))
- return page_table_alloc_pgste(mm);
- /* Allocate fragments of a 4K page as 1K/2K page table */
- spin_lock_bh(&mm->context.list_lock);
- mask = FRAG_MASK;
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- table = (unsigned long *) page_to_phys(page);
- mask = atomic_read(&page->_mapcount);
- mask = mask | (mask >> 4);
- }
- if ((mask & FRAG_MASK) == FRAG_MASK) {
- spin_unlock_bh(&mm->context.list_lock);
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
}
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
atomic_set(&page->_mapcount, 1);
- table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
- } else {
- for (bit = 1; mask & bit; bit <<= 1)
- table += PTRS_PER_PTE;
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if ((mask & FRAG_MASK) == FRAG_MASK)
- list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
}
- spin_unlock_bh(&mm->context.list_lock);
return table;
}
@@ -1041,37 +997,23 @@
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
- spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if (mask & FRAG_MASK)
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
}
-}
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
- struct page *page;
-
- if (bit == FRAG_MASK)
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
}
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
+ if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | FRAG_MASK);
+ table = (unsigned long *) (__pa(table) | 3);
tlb_remove_table(tlb, table);
return;
}
- bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
- if (mask & FRAG_MASK)
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (bit << 4));
+ table = (unsigned long *) (__pa(table) | (1U << bit));
tlb_remove_table(tlb, table);
}
static void __tlb_remove_table(void *_table)
{
- const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
- void *table = (void *)((unsigned long) _table & ~mask);
- unsigned type = (unsigned long) _table & mask;
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (type)
- __page_table_free_rcu(table, type);
- else
- free_pages((unsigned long) table, ALLOC_ORDER);
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
}
static void tlb_remove_table_smp_sync(void *arg)
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
new file mode 100644
index 0000000..f94ecaf
--- /dev/null
+++ b/arch/s390/numa/Makefile
@@ -0,0 +1,3 @@
+obj-y += numa.o
+obj-y += toptree.o
+obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
new file mode 100644
index 0000000..7de4e2f
--- /dev/null
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,530 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY 0
+#define DIST_CORE 1
+#define DIST_MC 2
+#define DIST_BOOK 3
+#define DIST_MAX 4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST 10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE -1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+ s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */
+ int total; /* Total number of pinned cores */
+ int per_node_target; /* Cores per node without extra cores */
+ int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+ if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+ emu_cores->per_node[node_id]++;
+ emu_cores->to_node_id[core_id] = node_id;
+ emu_cores->total++;
+ } else {
+ WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+ }
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+ return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+ return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+ struct toptree *core;
+ int count = 0;
+
+ toptree_for_each(core, tree, CORE) {
+ if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+ count++;
+ }
+ return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+ return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+ return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+ return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+ if (core_book(core1)->id != core_book(core2)->id)
+ return DIST_BOOK;
+ if (core_mc(core1)->id != core_mc(core2)->id)
+ return DIST_MC;
+ /* Same core or sibling on same MC */
+ return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+ struct toptree *core_node;
+ int dist_min = DIST_MAX;
+
+ toptree_for_each(core_node, node, CORE)
+ dist_min = min(dist_min, dist_core_to_core(core_node, core));
+ return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+ int nid;
+
+ toptree_unify(tree);
+ for (nid = 0; nid < emu_nodes; nid++)
+ toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+ int extra)
+{
+ struct toptree *node, *node_best = NULL;
+ int dist_cur, dist_best, cores_target;
+
+ cores_target = emu_cores->per_node_target + extra;
+ dist_best = DIST_MAX;
+ node_best = NULL;
+ toptree_for_each(node, numa, NODE) {
+ /* Already pinned cores must use their nodes */
+ if (core_pinned_to_node_id(core) == node->id) {
+ node_best = node;
+ break;
+ }
+ /* Skip nodes that already have enough cores */
+ if (cores_pinned(node) >= cores_target)
+ continue;
+ dist_cur = dist_node_to_core(node, core);
+ if (dist_cur < dist_best) {
+ dist_best = dist_cur;
+ node_best = node;
+ }
+ }
+ return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+ int extra)
+{
+ struct toptree *node, *core, *tmp;
+
+ toptree_for_each_safe(core, tmp, phys, CORE) {
+ node = node_for_core(numa, core, extra);
+ if (!node)
+ return;
+ toptree_move(core, node);
+ pin_core_to_node(core->id, node->id);
+ }
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ int cores_free, cores_target = emu_cores->per_node_target;
+ struct toptree *cur, *tmp;
+
+ toptree_for_each_safe(cur, tmp, phys, level) {
+ cores_free = cores_target - toptree_count(node, CORE);
+ if (perfect) {
+ if (cores_free == toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ } else {
+ if (cores_free >= toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ }
+ }
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ struct toptree *node;
+
+ toptree_for_each(node, numa, NODE)
+ move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+ struct toptree *core;
+
+ /* Always try to move perfectly fitting structures first */
+ move_level_to_numa(numa, phys, BOOK, true);
+ move_level_to_numa(numa, phys, BOOK, false);
+ move_level_to_numa(numa, phys, MC, true);
+ move_level_to_numa(numa, phys, MC, false);
+ /* Now pin all the moved cores */
+ toptree_for_each(core, numa, CORE)
+ pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+ struct toptree *tree;
+ int nid;
+
+ tree = toptree_alloc(TOPOLOGY, id);
+ if (!tree)
+ goto fail;
+ for (nid = 0; nid < nodes; nid++) {
+ if (!toptree_get_child(tree, nid))
+ goto fail;
+ }
+ return tree;
+fail:
+ panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+ int i;
+
+ emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+ if (emu_cores == NULL)
+ panic("Could not allocate cores to node memory");
+ for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+ emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+ static int first = 1;
+ struct toptree *numa;
+ int cores_total;
+
+ cores_total = emu_cores->total + cores_free(phys);
+ emu_cores->per_node_target = cores_total / emu_nodes;
+ numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+ if (first) {
+ toptree_to_numa_first(numa, phys);
+ first = 0;
+ }
+ toptree_to_numa_single(numa, phys, 0);
+ toptree_to_numa_single(numa, phys, 1);
+ toptree_unify_tree(numa);
+
+ WARN_ON(cpumask_weight(&phys->mask));
+ return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+ struct toptree *phys, *node, *book, *mc, *core;
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+ for_each_online_cpu(cpu) {
+ top = &per_cpu(cpu_topology, cpu);
+ node = toptree_get_child(phys, 0);
+ book = toptree_get_child(node, top->book_id);
+ mc = toptree_get_child(book, top->socket_id);
+ core = toptree_get_child(mc, top->core_id);
+ if (!book || !mc || !core)
+ panic("NUMA emulation could not allocate memory");
+ cpumask_set_cpu(cpu, &core->mask);
+ toptree_update_mask(mc);
+ }
+ return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ for_each_cpu(cpu, &core->mask) {
+ top = &per_cpu(cpu_topology, cpu);
+ cpumask_copy(&top->thread_mask, &core->mask);
+ cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+ cpumask_copy(&top->book_mask, &core_book(core)->mask);
+ cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+ top->node_id = core_node(core)->id;
+ }
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+ struct toptree *core;
+ int i;
+
+ /* Clear all node masks */
+ for (i = 0; i < MAX_NUMNODES; i++)
+ cpumask_clear(node_to_cpumask_map[i]);
+
+ /* Rebuild all masks */
+ toptree_for_each(core, numa, CORE)
+ topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+ int nid, cid;
+
+ if (!numa_debug_enabled)
+ return;
+ printk(KERN_DEBUG "NUMA node to core mapping\n");
+ for (nid = 0; nid < emu_nodes; nid++) {
+ printk(KERN_DEBUG " node %3d: ", nid);
+ for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+ if (emu_cores->to_node_id[cid] == nid)
+ printk(KERN_CONT "%d ", cid);
+ }
+ printk(KERN_CONT "\n");
+ }
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+ struct toptree *phys, *numa;
+
+ if (emu_cores == NULL)
+ create_core_to_node_map();
+ phys = toptree_from_topology();
+ numa = toptree_to_numa(phys);
+ toptree_free(phys);
+ toptree_to_topology(numa);
+ toptree_free(numa);
+ print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+ size = size ? : CONFIG_EMU_SIZE;
+ size = roundup(size, memory_block_size_bytes());
+ return size;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+ int nodes_max;
+
+ nodes_max = memblock.memory.total_size / emu_size;
+ nodes_max = max(nodes_max, 1);
+ if (nodes_max >= nodes)
+ return nodes;
+ pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+ return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+ emu_size = emu_setup_size_adjust(emu_size);
+ emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+ pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+ emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+ return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+ return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+ return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+ .name = "emu",
+ .setup = emu_setup,
+ .update_cpu_topology = emu_update_cpu_topology,
+ .__pfn_to_nid = emu_pfn_to_nid,
+ .align = emu_align,
+ .distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+ int count;
+
+ if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+ return 0;
+ if (count <= 0)
+ return 0;
+ emu_nodes = min(count, MAX_NUMNODES);
+ return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+ emu_size = memparse(p, NULL);
+ return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
new file mode 100644
index 0000000..09b1d23
--- /dev/null
+++ b/arch/s390/numa/numa.c
@@ -0,0 +1,184 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+ .name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+ return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+ if (mode->update_cpu_topology)
+ mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+ return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+ pg_data_t *res;
+
+ res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+ if (!res)
+ panic("Could not allocate memory for node data!\n");
+ memset(res, 0, sizeof(pg_data_t));
+ return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+ unsigned long cur_base, align, end_of_dram;
+ int nid = 0;
+
+ end_of_dram = memblock_end_of_DRAM();
+ align = mode->align ? mode->align() : ULONG_MAX;
+
+ /*
+ * Step through all available memory and assign it to the nodes
+ * indicated by the mode implementation.
+ * All nodes which are seen here will be set online.
+ */
+ cur_base = 0;
+ do {
+ nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+ node_set_online(nid);
+ memblock_set_node(cur_base, align, &memblock.memory, nid);
+ cur_base += align;
+ } while (cur_base < end_of_dram);
+
+ /* Allocate and fill out node_data */
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = alloc_node_data();
+
+ for_each_online_node(nid) {
+ unsigned long start_pfn, end_pfn;
+ unsigned long t_start, t_end;
+ int i;
+
+ start_pfn = ULONG_MAX;
+ end_pfn = 0;
+ for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+ if (t_start < start_pfn)
+ start_pfn = t_start;
+ if (t_end > end_pfn)
+ end_pfn = t_end;
+ }
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+ NODE_DATA(nid)->node_id = nid;
+ }
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+ pr_info("NUMA mode: %s\n", mode->name);
+ if (mode->setup)
+ mode->setup();
+ numa_setup_memory();
+ memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+ /* Attach all possible CPUs to node 0 for now. */
+ cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+ return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+ int nid;
+
+ for_each_online_node(nid)
+ register_one_node(nid);
+ return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+ numa_debug_enabled = 1;
+ return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+ if (strcmp(parm, numa_mode_plain.name) == 0)
+ mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+ if (strcmp(parm, numa_mode_emu.name) == 0)
+ mode = &numa_mode_emu;
+#endif
+ return 0;
+}
+early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
new file mode 100644
index 0000000..08953b0b
--- /dev/null
+++ b/arch/s390/numa/numa_mode.h
@@ -0,0 +1,24 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+ char *name; /* Name of mode */
+ void (*setup)(void); /* Initizalize mode */
+ void (*update_cpu_topology)(void); /* Called by topology code */
+ int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */
+ unsigned long (*align)(void); /* Minimum node alignment */
+ int (*distance)(int a, int b); /* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
new file mode 100644
index 0000000..902d350
--- /dev/null
+++ b/arch/s390/numa/toptree.c
@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+ struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+ if (!res)
+ return res;
+
+ INIT_LIST_HEAD(&res->children);
+ INIT_LIST_HEAD(&res->sibling);
+ cpumask_clear(&res->mask);
+ res->level = level;
+ res->id = id;
+ return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+ struct toptree *oldparent;
+
+ list_del_init(&cand->sibling);
+ oldparent = cand->parent;
+ cand->parent = NULL;
+ toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+ struct toptree *child, *tmp;
+
+ if (cand->parent)
+ toptree_remove(cand);
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_free(child);
+ kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+ struct toptree *child;
+
+ cpumask_clear(&cand->mask);
+ list_for_each_entry(child, &cand->children, sibling)
+ cpumask_or(&cand->mask, &cand->mask, &child->mask);
+ if (cand->parent)
+ toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+ if (!cand || !target)
+ return -1;
+ if (target->level != (cand->level + 1))
+ return -1;
+ list_add_tail(&cand->sibling, &target->children);
+ cand->parent = target;
+ toptree_update_mask(target);
+ return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *child, *tmp;
+
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+ struct toptree *child, *tmp, *cand_copy;
+
+ /* Threads cannot be split, cores are not split */
+ if (cand->level < 2)
+ return;
+
+ cand_copy = toptree_alloc(cand->level, 0);
+ toptree_for_each_child_safe(child, tmp, cand) {
+ struct toptree *tmpchild;
+
+ if (!cpumask_empty(&child->mask)) {
+ tmpchild = toptree_get_child(cand_copy, child->id);
+ toptree_move_children(child, tmpchild);
+ }
+ toptree_free(child);
+ }
+ toptree_move_children(cand_copy, cand);
+ toptree_free(cand_copy);
+
+ toptree_for_each_child(child, cand)
+ toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+ if (cand->level + 1 == target->level) {
+ toptree_remove(cand);
+ toptree_insert(cand, target);
+ return;
+ }
+
+ real_insert_point = NULL;
+ ptr = cand;
+ stack_target = NULL;
+
+ do {
+ tmp = stack_target;
+ stack_target = toptree_alloc(ptr->level + 1,
+ ptr->parent->id);
+ toptree_insert(tmp, stack_target);
+ if (!real_insert_point)
+ real_insert_point = stack_target;
+ ptr = ptr->parent;
+ } while (stack_target->level < (target->level - 1));
+
+ toptree_remove(cand);
+ toptree_insert(cand, real_insert_point);
+ toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+ struct toptree *child;
+
+ toptree_for_each_child(child, cand)
+ if (child->id == id)
+ return child;
+ child = toptree_alloc(cand->level-1, id);
+ toptree_insert(child, cand);
+ return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+ struct toptree *child, *tmp;
+
+ if (context->level == level)
+ return context;
+
+ if (!list_empty(&context->children)) {
+ list_for_each_entry(child, &context->children, sibling) {
+ tmp = toptree_first(child, level);
+ if (tmp)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+ if (cur->parent == NULL)
+ return NULL;
+
+ if (cur == list_last_entry(&cur->parent->children,
+ struct toptree, sibling))
+ return NULL;
+ return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level)
+{
+ struct toptree *cur_context, *tmp;
+
+ if (!cur)
+ return NULL;
+
+ if (context->level == level)
+ return NULL;
+
+ tmp = toptree_next_sibling(cur);
+ if (tmp != NULL)
+ return tmp;
+
+ cur_context = cur;
+ while (cur_context->level < context->level - 1) {
+ /* Step up */
+ cur_context = cur_context->parent;
+ /* Step aside */
+ tmp = toptree_next_sibling(cur_context);
+ if (tmp != NULL) {
+ /* Step down */
+ tmp = toptree_first(tmp, level);
+ if (tmp != NULL)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+ struct toptree *cur;
+ int cnt = 0;
+
+ toptree_for_each(cur, context, level)
+ cnt++;
+ return cnt;
+}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
new file mode 100644
index 0000000..bdf5020
--- /dev/null
+++ b/arch/s390/numa/toptree.h
@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+ int level;
+ int id;
+ cpumask_t mask;
+ struct toptree *parent;
+ struct list_head sibling;
+ struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level);
+
+#define toptree_for_each_child(child, ptree) \
+ list_for_each_entry(child, &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree) \
+ list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree) \
+ ((ptree->parent == NULL) || \
+ (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype); \
+ ptree != NULL; \
+ ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype), \
+ tmp = toptree_next(ptree, cont, ttype); \
+ ptree != NULL; \
+ ptree = tmp, \
+ tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start) \
+ toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 598f023..17c04c7 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -76,11 +76,6 @@
static struct kmem_cache *zdev_fmb_cache;
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
- return (struct zpci_dev *) pdev->sysdata;
-}
-
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@
unsigned long offset,
unsigned long max)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
u64 addr;
int idx;
@@ -385,7 +380,7 @@
int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
unsigned int hwirq, msi_vecs;
unsigned long aisb;
struct msi_desc *msi;
@@ -460,7 +455,7 @@
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
int rc;
@@ -637,7 +632,7 @@
int i;
for (i = 0; i < PCI_BAR_COUNT; i++) {
- if (!zdev->bars[i].size)
+ if (!zdev->bars[i].size || !zdev->bars[i].res)
continue;
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -648,7 +643,7 @@
int pcibios_add_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
@@ -673,7 +668,7 @@
int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@
void pcibios_disable_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@
static int zpci_restore(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@
static int zpci_freeze(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
if (zdev->state != ZPCI_FN_STATE_ONLINE)
return 0;
@@ -777,17 +772,22 @@
ret = zpci_setup_bus_resources(zdev, &resources);
if (ret)
- return ret;
+ goto error;
zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
zdev, &resources);
if (!zdev->bus) {
- zpci_cleanup_bus_resources(zdev);
- return -EIO;
+ ret = -EIO;
+ goto error;
}
zdev->bus->max_bus_speed = zdev->max_bus_speed;
pci_bus_add_devices(zdev->bus);
return 0;
+
+error:
+ zpci_cleanup_bus_resources(zdev);
+ pci_free_resource_list(&resources);
+ return ret;
}
int zpci_enable_device(struct zpci_dev *zdev)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6fd8d58..42b7658 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -277,7 +277,7 @@
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long nr_pages, iommu_page_index;
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
@@ -316,7 +316,7 @@
size_t size, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long iommu_page_index;
int npages;
@@ -337,7 +337,7 @@
dma_addr_t *dma_handle, gfp_t flag,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
struct page *page;
unsigned long pa;
dma_addr_t map;
@@ -367,7 +367,7 @@
void *pa, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
size = PAGE_ALIGN(size);
atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ed2394d..369a3e0 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,15 +46,13 @@
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
- if (!zdev)
- return;
-
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
- pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
}
void zpci_event_error(void *data)
@@ -89,7 +87,9 @@
ret = zpci_enable_device(zdev);
if (ret)
break;
+ pci_lock_rescan_remove();
pci_rescan_bus(zdev->bus);
+ pci_unlock_rescan_remove();
break;
case 0x0302: /* Reserved -> Standby */
if (!zdev)
@@ -97,7 +97,7 @@
break;
case 0x0303: /* Deconfiguration requested */
if (pdev)
- pci_stop_and_remove_bus_device(pdev);
+ pci_stop_and_remove_bus_device_locked(pdev);
ret = zpci_disable_device(zdev);
if (ret)
@@ -114,7 +114,7 @@
/* Give the driver a hint that the function is
* already unusable. */
pdev->error_state = pci_channel_io_perm_failure;
- pci_stop_and_remove_bus_device(pdev);
+ pci_stop_and_remove_bus_device_locked(pdev);
}
zdev->fh = ccdf->fh;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 85267c0..dcc2634 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,10 +8,23 @@
#include <linux/errno.h>
#include <linux/delay.h>
#include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
#include <asm/processor.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+ struct {
+ u8 cc;
+ u8 status;
+ u64 req;
+ u64 offset;
+ } data = {cc, status, req, offset};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
/* Modify PCI Function Controls */
static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
{
@@ -38,8 +51,8 @@
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d\n",
- __func__, cc, status);
+ zpci_err_insn(cc, status, req, 0);
+
return (cc) ? -EIO : 0;
}
@@ -72,8 +85,8 @@
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n",
- __func__, cc, status, addr, range);
+ zpci_err_insn(cc, status, addr, range);
+
return (cc) ? -EIO : 0;
}
@@ -121,8 +134,8 @@
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index fa3ce89..f37a580 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -16,7 +16,7 @@
static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); \
\
return sprintf(buf, fmt, zdev->member); \
} \
@@ -38,23 +38,30 @@
const char *buf, size_t count)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
int ret;
if (!device_remove_file_self(dev, attr))
return count;
+ pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
ret = zpci_disable_device(zdev);
if (ret)
- return ret;
+ goto error;
ret = zpci_enable_device(zdev);
if (ret)
- return ret;
+ goto error;
pci_rescan_bus(zdev->bus);
+ pci_unlock_rescan_remove();
+
return count;
+
+error:
+ pci_unlock_rescan_remove();
+ return ret;
}
static DEVICE_ATTR_WO(recover);
@@ -64,7 +71,7 @@
{
struct device *dev = kobj_to_dev(kobj);
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index ee3a6fa..fe07f31 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -58,7 +58,7 @@
&& !strncmp(pos->uid.serial, uid->serial,
sizeof(uid->serial)))
return pos;
- };
+ }
return NULL;
}
@@ -69,7 +69,7 @@
list_for_each_entry(pos, &server->lculist, lcu) {
if (pos->uid.ssid == uid->ssid)
return pos;
- };
+ }
return NULL;
}
@@ -97,7 +97,7 @@
if (pos->uid.base_unit_addr == search_unit_addr &&
!strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit)))
return pos;
- };
+ }
return NULL;
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 6215f64..62a3235 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1036,7 +1036,7 @@
{
void *conf_data;
int conf_len, conf_data_saved;
- int rc, path_err;
+ int rc, path_err, pos;
__u8 lpm, opm;
struct dasd_eckd_private *private, path_private;
struct dasd_path *path_data;
@@ -1068,6 +1068,17 @@
path_data->opm |= lpm;
continue; /* no error */
}
+ /* translate path mask to position in mask */
+ pos = 8 - ffs(lpm);
+ kfree(private->path_conf_data[pos]);
+ if ((__u8 *)private->path_conf_data[pos] ==
+ private->conf_data) {
+ private->conf_data = NULL;
+ private->conf_len = 0;
+ conf_data_saved = 0;
+ }
+ private->path_conf_data[pos] =
+ (struct dasd_conf_data *) conf_data;
/* save first valid configuration data */
if (!conf_data_saved) {
kfree(private->conf_data);
@@ -1095,7 +1106,6 @@
kfree(conf_data);
continue;
}
-
if (dasd_eckd_compare_path_uid(
device, &path_private)) {
uid = &path_private.uid;
@@ -1157,9 +1167,6 @@
path_data->cablepm &= ~lpm;
path_data->hpfpm &= ~lpm;
path_data->cuirpm &= ~lpm;
-
- if (conf_data != private->conf_data)
- kfree(conf_data);
}
return path_err;
@@ -1259,7 +1266,11 @@
schedule_work(work);
return;
}
-
+ /* check if path verification already running and delay if so */
+ if (test_and_set_bit(DASD_FLAG_PATH_VERIFY, &device->flags)) {
+ schedule_work(work);
+ return;
+ }
opm = 0;
npm = 0;
ppm = 0;
@@ -1402,7 +1413,7 @@
device->path_data.hpfpm |= hpfpm;
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
}
-
+ clear_bit(DASD_FLAG_PATH_VERIFY, &device->flags);
dasd_put_device(device);
if (data->isglobal)
mutex_unlock(&dasd_path_verification_mutex);
@@ -1810,6 +1821,7 @@
static void dasd_eckd_uncheck_device(struct dasd_device *device)
{
struct dasd_eckd_private *private;
+ int i;
private = (struct dasd_eckd_private *) device->private;
dasd_alias_disconnect_device_from_lcu(device);
@@ -1818,6 +1830,15 @@
private->vdsneq = NULL;
private->gneq = NULL;
private->conf_len = 0;
+ for (i = 0; i < 8; i++) {
+ kfree(private->path_conf_data[i]);
+ if ((__u8 *)private->path_conf_data[i] ==
+ private->conf_data) {
+ private->conf_data = NULL;
+ private->conf_len = 0;
+ }
+ private->path_conf_data[i] = NULL;
+ }
kfree(private->conf_data);
private->conf_data = NULL;
}
@@ -3968,7 +3989,7 @@
rc = -EFAULT;
if (copy_from_user(&usrparm, argp, sizeof(usrparm)))
goto out;
- if (is_compat_task() || sizeof(long) == 4) {
+ if (is_compat_task()) {
/* Make sure pointers are sane even on 31 bit. */
rc = -EINVAL;
if ((usrparm.psf_data >> 32) != 0)
@@ -4525,12 +4546,13 @@
cqr->startdev = device;
cqr->memdev = device;
cqr->block = NULL;
- cqr->retries = 256;
cqr->expires = 10 * HZ;
-
- /* we need to check for messages on exactly this path */
set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags);
- cqr->lpm = lpum;
+ /* dasd_sleep_on_immediatly does not do complex error
+ * recovery so clear erp flag and set retry counter to
+ * do basic erp */
+ clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+ cqr->retries = 256;
/* Prepare for Read Subsystem Data */
prssdp = (struct dasd_psf_prssd_data *) cqr->data;
@@ -4605,10 +4627,10 @@
psf_cuir->message_id = message_id;
psf_cuir->cssid = sch_id.cssid;
psf_cuir->ssid = sch_id.ssid;
-
ccw = cqr->cpaddr;
ccw->cmd_code = DASD_ECKD_CCW_PSF;
ccw->cda = (__u32)(addr_t)psf_cuir;
+ ccw->flags = CCW_FLAG_SLI;
ccw->count = sizeof(struct dasd_psf_cuir_response);
cqr->startdev = device;
@@ -4618,6 +4640,7 @@
cqr->expires = 10*HZ;
cqr->buildclk = get_tod_clock();
cqr->status = DASD_CQR_FILLED;
+ set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags);
rc = dasd_sleep_on(cqr);
@@ -4625,118 +4648,252 @@
return rc;
}
-static int dasd_eckd_cuir_change_state(struct dasd_device *device, __u8 lpum)
+/*
+ * return configuration data that is referenced by record selector
+ * if a record selector is specified or per default return the
+ * conf_data pointer for the path specified by lpum
+ */
+static struct dasd_conf_data *dasd_eckd_get_ref_conf(struct dasd_device *device,
+ __u8 lpum,
+ struct dasd_cuir_message *cuir)
{
- unsigned long flags;
- __u8 tbcpm;
+ struct dasd_eckd_private *private;
+ struct dasd_conf_data *conf_data;
+ int path, pos;
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
- tbcpm = device->path_data.opm & ~lpum;
- if (tbcpm) {
- device->path_data.opm = tbcpm;
- device->path_data.cuirpm |= lpum;
+ private = (struct dasd_eckd_private *) device->private;
+ if (cuir->record_selector == 0)
+ goto out;
+ for (path = 0x80, pos = 0; path; path >>= 1, pos++) {
+ conf_data = private->path_conf_data[pos];
+ if (conf_data->gneq.record_selector ==
+ cuir->record_selector)
+ return conf_data;
}
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
- return tbcpm ? 0 : PSF_CUIR_LAST_PATH;
+out:
+ return private->path_conf_data[8 - ffs(lpum)];
}
/*
- * walk through all devices and quiesce them
- * if it is the last path return error
+ * This function determines the scope of a reconfiguration request by
+ * analysing the path and device selection data provided in the CUIR request.
+ * Returns a path mask containing CUIR affected paths for the give device.
+ *
+ * If the CUIR request does not contain the required information return the
+ * path mask of the path the attention message for the CUIR request was reveived
+ * on.
+ */
+static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum,
+ struct dasd_cuir_message *cuir)
+{
+ struct dasd_conf_data *ref_conf_data;
+ unsigned long bitmask = 0, mask = 0;
+ struct dasd_eckd_private *private;
+ struct dasd_conf_data *conf_data;
+ unsigned int pos, path;
+ char *ref_gneq, *gneq;
+ char *ref_ned, *ned;
+ int tbcpm = 0;
+
+ /* if CUIR request does not specify the scope use the path
+ the attention message was presented on */
+ if (!cuir->ned_map ||
+ !(cuir->neq_map[0] | cuir->neq_map[1] | cuir->neq_map[2]))
+ return lpum;
+
+ private = (struct dasd_eckd_private *) device->private;
+ /* get reference conf data */
+ ref_conf_data = dasd_eckd_get_ref_conf(device, lpum, cuir);
+ /* reference ned is determined by ned_map field */
+ pos = 8 - ffs(cuir->ned_map);
+ ref_ned = (char *)&ref_conf_data->neds[pos];
+ ref_gneq = (char *)&ref_conf_data->gneq;
+ /* transfer 24 bit neq_map to mask */
+ mask = cuir->neq_map[2];
+ mask |= cuir->neq_map[1] << 8;
+ mask |= cuir->neq_map[0] << 16;
+
+ for (path = 0x80; path; path >>= 1) {
+ /* initialise data per path */
+ bitmask = mask;
+ pos = 8 - ffs(path);
+ conf_data = private->path_conf_data[pos];
+ pos = 8 - ffs(cuir->ned_map);
+ ned = (char *) &conf_data->neds[pos];
+ /* compare reference ned and per path ned */
+ if (memcmp(ref_ned, ned, sizeof(*ned)) != 0)
+ continue;
+ gneq = (char *)&conf_data->gneq;
+ /* compare reference gneq and per_path gneq under
+ 24 bit mask where mask bit 0 equals byte 7 of
+ the gneq and mask bit 24 equals byte 31 */
+ while (bitmask) {
+ pos = ffs(bitmask) - 1;
+ if (memcmp(&ref_gneq[31 - pos], &gneq[31 - pos], 1)
+ != 0)
+ break;
+ clear_bit(pos, &bitmask);
+ }
+ if (bitmask)
+ continue;
+ /* device and path match the reference values
+ add path to CUIR scope */
+ tbcpm |= path;
+ }
+ return tbcpm;
+}
+
+static void dasd_eckd_cuir_notify_user(struct dasd_device *device,
+ unsigned long paths,
+ struct subchannel_id sch_id, int action)
+{
+ struct channel_path_desc *desc;
+ int pos;
+
+ while (paths) {
+ /* get position of bit in mask */
+ pos = ffs(paths) - 1;
+ /* get channel path descriptor from this position */
+ desc = ccw_device_get_chp_desc(device->cdev, 7 - pos);
+ if (action == CUIR_QUIESCE)
+ pr_warn("Service on the storage server caused path "
+ "%x.%02x to go offline", sch_id.cssid,
+ desc ? desc->chpid : 0);
+ else if (action == CUIR_RESUME)
+ pr_info("Path %x.%02x is back online after service "
+ "on the storage server", sch_id.cssid,
+ desc ? desc->chpid : 0);
+ kfree(desc);
+ clear_bit(pos, &paths);
+ }
+}
+
+static int dasd_eckd_cuir_remove_path(struct dasd_device *device, __u8 lpum,
+ struct dasd_cuir_message *cuir)
+{
+ unsigned long tbcpm;
+
+ tbcpm = dasd_eckd_cuir_scope(device, lpum, cuir);
+ /* nothing to do if path is not in use */
+ if (!(device->path_data.opm & tbcpm))
+ return 0;
+ if (!(device->path_data.opm & ~tbcpm)) {
+ /* no path would be left if the CUIR action is taken
+ return error */
+ return -EINVAL;
+ }
+ /* remove device from operational path mask */
+ device->path_data.opm &= ~tbcpm;
+ device->path_data.cuirpm |= tbcpm;
+ return tbcpm;
+}
+
+/*
+ * walk through all devices and build a path mask to quiesce them
+ * return an error if the last path to a device would be removed
*
* if only part of the devices are quiesced and an error
* occurs no onlining necessary, the storage server will
* notify the already set offline devices again
*/
static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum,
- struct channel_path_desc *desc,
- struct subchannel_id sch_id)
+ struct subchannel_id sch_id,
+ struct dasd_cuir_message *cuir)
{
struct alias_pav_group *pavgroup, *tempgroup;
struct dasd_eckd_private *private;
struct dasd_device *dev, *n;
- int rc;
+ unsigned long paths = 0;
+ unsigned long flags;
+ int tbcpm;
private = (struct dasd_eckd_private *) device->private;
- rc = 0;
-
/* active devices */
- list_for_each_entry_safe(dev, n,
- &private->lcu->active_devices,
+ list_for_each_entry_safe(dev, n, &private->lcu->active_devices,
alias_list) {
- rc = dasd_eckd_cuir_change_state(dev, lpum);
- if (rc)
- goto out;
+ spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+ tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+ spin_unlock_irqrestore(get_ccwdev_lock(dev->cdev), flags);
+ if (tbcpm < 0)
+ goto out_err;
+ paths |= tbcpm;
}
-
/* inactive devices */
- list_for_each_entry_safe(dev, n,
- &private->lcu->inactive_devices,
+ list_for_each_entry_safe(dev, n, &private->lcu->inactive_devices,
alias_list) {
- rc = dasd_eckd_cuir_change_state(dev, lpum);
- if (rc)
- goto out;
+ spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+ tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+ spin_unlock_irqrestore(get_ccwdev_lock(dev->cdev), flags);
+ if (tbcpm < 0)
+ goto out_err;
+ paths |= tbcpm;
}
-
/* devices in PAV groups */
list_for_each_entry_safe(pavgroup, tempgroup,
&private->lcu->grouplist, group) {
list_for_each_entry_safe(dev, n, &pavgroup->baselist,
alias_list) {
- rc = dasd_eckd_cuir_change_state(dev, lpum);
- if (rc)
- goto out;
+ spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+ tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+ spin_unlock_irqrestore(
+ get_ccwdev_lock(dev->cdev), flags);
+ if (tbcpm < 0)
+ goto out_err;
+ paths |= tbcpm;
}
list_for_each_entry_safe(dev, n, &pavgroup->aliaslist,
alias_list) {
- rc = dasd_eckd_cuir_change_state(dev, lpum);
- if (rc)
- goto out;
+ spin_lock_irqsave(get_ccwdev_lock(dev->cdev), flags);
+ tbcpm = dasd_eckd_cuir_remove_path(dev, lpum, cuir);
+ spin_unlock_irqrestore(
+ get_ccwdev_lock(dev->cdev), flags);
+ if (tbcpm < 0)
+ goto out_err;
+ paths |= tbcpm;
}
}
-
- pr_warn("Service on the storage server caused path %x.%02x to go offline",
- sch_id.cssid, desc ? desc->chpid : 0);
- rc = PSF_CUIR_COMPLETED;
-out:
- return rc;
+ /* notify user about all paths affected by CUIR action */
+ dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_QUIESCE);
+ return 0;
+out_err:
+ return tbcpm;
}
static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
- struct channel_path_desc *desc,
- struct subchannel_id sch_id)
+ struct subchannel_id sch_id,
+ struct dasd_cuir_message *cuir)
{
struct alias_pav_group *pavgroup, *tempgroup;
struct dasd_eckd_private *private;
struct dasd_device *dev, *n;
+ unsigned long paths = 0;
+ int tbcpm;
- pr_info("Path %x.%02x is back online after service on the storage server",
- sch_id.cssid, desc ? desc->chpid : 0);
private = (struct dasd_eckd_private *) device->private;
-
/*
* the path may have been added through a generic path event before
* only trigger path verification if the path is not already in use
*/
-
list_for_each_entry_safe(dev, n,
&private->lcu->active_devices,
alias_list) {
- if (!(dev->path_data.opm & lpum)) {
- dev->path_data.tbvpm |= lpum;
+ tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+ paths |= tbcpm;
+ if (!(dev->path_data.opm & tbcpm)) {
+ dev->path_data.tbvpm |= tbcpm;
dasd_schedule_device_bh(dev);
}
}
-
list_for_each_entry_safe(dev, n,
&private->lcu->inactive_devices,
alias_list) {
- if (!(dev->path_data.opm & lpum)) {
- dev->path_data.tbvpm |= lpum;
+ tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+ paths |= tbcpm;
+ if (!(dev->path_data.opm & tbcpm)) {
+ dev->path_data.tbvpm |= tbcpm;
dasd_schedule_device_bh(dev);
}
}
-
/* devices in PAV groups */
list_for_each_entry_safe(pavgroup, tempgroup,
&private->lcu->grouplist,
@@ -4744,21 +4901,27 @@
list_for_each_entry_safe(dev, n,
&pavgroup->baselist,
alias_list) {
- if (!(dev->path_data.opm & lpum)) {
- dev->path_data.tbvpm |= lpum;
+ tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+ paths |= tbcpm;
+ if (!(dev->path_data.opm & tbcpm)) {
+ dev->path_data.tbvpm |= tbcpm;
dasd_schedule_device_bh(dev);
}
}
list_for_each_entry_safe(dev, n,
&pavgroup->aliaslist,
alias_list) {
- if (!(dev->path_data.opm & lpum)) {
- dev->path_data.tbvpm |= lpum;
+ tbcpm = dasd_eckd_cuir_scope(dev, lpum, cuir);
+ paths |= tbcpm;
+ if (!(dev->path_data.opm & tbcpm)) {
+ dev->path_data.tbvpm |= tbcpm;
dasd_schedule_device_bh(dev);
}
}
}
- return PSF_CUIR_COMPLETED;
+ /* notify user about all paths affected by CUIR action */
+ dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_RESUME);
+ return 0;
}
static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
@@ -4768,8 +4931,12 @@
struct channel_path_desc *desc;
struct subchannel_id sch_id;
int pos, response;
- ccw_device_get_schid(device->cdev, &sch_id);
+ DBF_DEV_EVENT(DBF_WARNING, device,
+ "CUIR request: %016llx %016llx %016llx %08x",
+ ((u64 *)cuir)[0], ((u64 *)cuir)[1], ((u64 *)cuir)[2],
+ ((u32 *)cuir)[3]);
+ ccw_device_get_schid(device->cdev, &sch_id);
/* get position of path in mask */
pos = 8 - ffs(lpum);
/* get channel path descriptor from this position */
@@ -4777,18 +4944,26 @@
if (cuir->code == CUIR_QUIESCE) {
/* quiesce */
- response = dasd_eckd_cuir_quiesce(device, lpum, desc, sch_id);
+ if (dasd_eckd_cuir_quiesce(device, lpum, sch_id, cuir))
+ response = PSF_CUIR_LAST_PATH;
+ else
+ response = PSF_CUIR_COMPLETED;
} else if (cuir->code == CUIR_RESUME) {
/* resume */
- response = dasd_eckd_cuir_resume(device, lpum, desc, sch_id);
+ dasd_eckd_cuir_resume(device, lpum, sch_id, cuir);
+ response = PSF_CUIR_COMPLETED;
} else
response = PSF_CUIR_NOT_SUPPORTED;
- dasd_eckd_psf_cuir_response(device, response, cuir->message_id,
- desc, sch_id);
-
+ dasd_eckd_psf_cuir_response(device, response,
+ cuir->message_id, desc, sch_id);
+ DBF_DEV_EVENT(DBF_WARNING, device,
+ "CUIR response: %d on message ID %08x", response,
+ cuir->message_id);
/* free descriptor copy */
kfree(desc);
+ /* to make sure there is no attention left schedule work again */
+ device->discipline->check_attention(device, lpum);
}
static void dasd_eckd_check_attention_work(struct work_struct *work)
@@ -4800,22 +4975,18 @@
data = container_of(work, struct check_attention_work_data, worker);
device = data->device;
-
messages = kzalloc(sizeof(*messages), GFP_KERNEL);
if (!messages) {
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"Could not allocate attention message buffer");
goto out;
}
-
rc = dasd_eckd_read_message_buffer(device, messages, data->lpum);
if (rc)
goto out;
-
if (messages->length == ATTENTION_LENGTH_CUIR &&
messages->format == ATTENTION_FORMAT_CUIR)
dasd_eckd_handle_cuir(device, messages, data->lpum);
-
out:
dasd_put_device(device);
kfree(messages);
diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index ddab7df..f8f91ee 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h
@@ -355,7 +355,8 @@
__u8 identifier:2;
__u8 reserved:6;
} __attribute__ ((packed)) flags;
- __u8 reserved[5];
+ __u8 record_selector;
+ __u8 reserved[4];
struct {
__u8 value:2;
__u8 number:6;
@@ -492,10 +493,18 @@
struct dasd_device *next;
};
+struct dasd_conf_data {
+ struct dasd_ned neds[5];
+ u8 reserved[64];
+ struct dasd_gneq gneq;
+} __packed;
+
struct dasd_eckd_private {
struct dasd_eckd_characteristics rdc_data;
u8 *conf_data;
int conf_len;
+ /* per path configuration data */
+ struct dasd_conf_data *path_conf_data[8];
/* pointers to specific parts in the conf_data */
struct dasd_ned *ned;
struct dasd_sneq *sneq;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 227e3de..4aed5ed 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -534,6 +534,7 @@
#define DASD_FLAG_SAFE_OFFLINE 10 /* safe offline processing requested*/
#define DASD_FLAG_SAFE_OFFLINE_RUNNING 11 /* safe offline running */
#define DASD_FLAG_ABORTALL 12 /* Abort all noretry requests */
+#define DASD_FLAG_PATH_VERIFY 13 /* Path verification worker running */
#define DASD_SLEEPON_START_TAG ((void *) 1)
#define DASD_SLEEPON_END_TAG ((void *) 2)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index da21281..dff3fcb 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -548,10 +548,10 @@
*/
num_of_segments = 0;
for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
- for (j = i; (buf[j] != ':') &&
+ for (j = i; j < count &&
+ (buf[j] != ':') &&
(buf[j] != '\0') &&
- (buf[j] != '\n') &&
- j < count; j++) {
+ (buf[j] != '\n'); j++) {
local_buf[j-i] = toupper(buf[j]);
}
local_buf[j-i] = '\0';
@@ -723,7 +723,7 @@
/*
* parse input
*/
- for (i = 0; ((*(buf+i)!='\0') && (*(buf+i)!='\n') && i < count); i++) {
+ for (i = 0; (i < count && (*(buf+i)!='\0') && (*(buf+i)!='\n')); i++) {
local_buf[i] = toupper(buf[i]);
}
local_buf[i] = '\0';
@@ -904,10 +904,10 @@
for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
i++) {
- for (j = i; (dcssblk_segments[j] != ',') &&
+ for (j = i; (j < DCSSBLK_PARM_LEN) &&
+ (dcssblk_segments[j] != ',') &&
(dcssblk_segments[j] != '\0') &&
- (dcssblk_segments[j] != '(') &&
- (j < DCSSBLK_PARM_LEN); j++)
+ (dcssblk_segments[j] != '('); j++)
{
buf[j-i] = dcssblk_segments[j];
}
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 75ffe99..7c511ad 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -413,6 +413,10 @@
else
/* Normal end. Copy residual count. */
rq->rescnt = irb->scsw.cmd.count;
+ } else if (irb->scsw.cmd.dstat & DEV_STAT_DEV_END) {
+ /* Interrupt without an outstanding request -> update all */
+ cp->update_flags = CON_UPDATE_ALL;
+ con3270_set_timer(cp, 1);
}
return RAW3270_IO_DONE;
}
diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c
index 8de2deb..f7d9258 100644
--- a/drivers/s390/char/ctrlchar.c
+++ b/drivers/s390/char/ctrlchar.c
@@ -14,15 +14,21 @@
#include "ctrlchar.h"
#ifdef CONFIG_MAGIC_SYSRQ
-static int ctrlchar_sysrq_key;
+static struct sysrq_work ctrlchar_sysrq;
static void
ctrlchar_handle_sysrq(struct work_struct *work)
{
- handle_sysrq(ctrlchar_sysrq_key);
+ struct sysrq_work *sysrq = container_of(work, struct sysrq_work, work);
+
+ handle_sysrq(sysrq->key);
}
-static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq);
+void schedule_sysrq_work(struct sysrq_work *sw)
+{
+ INIT_WORK(&sw->work, ctrlchar_handle_sysrq);
+ schedule_work(&sw->work);
+}
#endif
@@ -51,8 +57,8 @@
#ifdef CONFIG_MAGIC_SYSRQ
/* racy */
if (len == 3 && buf[1] == '-') {
- ctrlchar_sysrq_key = buf[2];
- schedule_work(&ctrlchar_work);
+ ctrlchar_sysrq.key = buf[2];
+ schedule_sysrq_work(&ctrlchar_sysrq);
return CTRLCHAR_SYSRQ;
}
#endif
diff --git a/drivers/s390/char/ctrlchar.h b/drivers/s390/char/ctrlchar.h
index 1a53552..59c2d6e 100644
--- a/drivers/s390/char/ctrlchar.h
+++ b/drivers/s390/char/ctrlchar.h
@@ -7,6 +7,8 @@
*/
#include <linux/tty.h>
+#include <linux/sysrq.h>
+#include <linux/workqueue.h>
extern unsigned int
ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty);
@@ -17,3 +19,13 @@
#define CTRLCHAR_SYSRQ (3 << 8)
#define CTRLCHAR_MASK (~0xffu)
+
+
+#ifdef CONFIG_MAGIC_SYSRQ
+struct sysrq_work {
+ int key;
+ struct work_struct work;
+};
+
+void schedule_sysrq_work(struct sysrq_work *sw);
+#endif
diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c
index 9388963..12db8db 100644
--- a/drivers/s390/char/diag_ftp.c
+++ b/drivers/s390/char/diag_ftp.c
@@ -223,7 +223,7 @@
if (rc)
return rc;
- ctl_set_bit(0, 63 - 22);
+ irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
return 0;
}
@@ -232,6 +232,6 @@
*/
void diag_ftp_shutdown(void)
{
- ctl_clear_bit(0, 63 - 22);
+ irq_subclass_unregister(IRQ_SUBCLASS_SERVICE_SIGNAL);
unregister_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler);
}
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 0da3ae3..b7d6030 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -95,7 +95,7 @@
if (ascii_name[i] == '\0')
break;
ebcdic_name[i] = toupper(ascii_name[i]);
- };
+ }
for (; i < 8; i++)
ebcdic_name[i] = ' ';
ASCEBC(ebcdic_name, 8);
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 5e20513..f58bf4c 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -53,7 +53,7 @@
/* Number of console pages to allocate, used by sclp_con.c and sclp_vt220.c */
int sclp_console_pages = SCLP_CONSOLE_PAGES;
/* Flag to indicate if buffer pages are dropped on buffer full condition */
-int sclp_console_drop = 0;
+int sclp_console_drop = 1;
/* Number of times the console dropped buffer pages */
unsigned long sclp_console_full;
@@ -79,8 +79,8 @@
int drop, rc;
rc = kstrtoint(str, 0, &drop);
- if (!rc && drop)
- sclp_console_drop = 1;
+ if (!rc)
+ sclp_console_drop = drop;
return 1;
}
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index e9485fb..806239c 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -25,6 +25,7 @@
#include <asm/setup.h>
#include <asm/page.h>
#include <asm/sclp.h>
+#include <asm/numa.h>
#include "sclp.h"
@@ -388,11 +389,11 @@
};
static void __init align_to_block_size(unsigned long long *start,
- unsigned long long *size)
+ unsigned long long *size,
+ unsigned long long alignment)
{
- unsigned long long start_align, size_align, alignment;
+ unsigned long long start_align, size_align;
- alignment = memory_block_size_bytes();
start_align = roundup(*start, alignment);
size_align = rounddown(*start + *size, alignment) - start_align;
@@ -404,8 +405,8 @@
static void __init add_memory_merged(u16 rn)
{
+ unsigned long long start, size, addr, block_size;
static u16 first_rn, num;
- unsigned long long start, size;
if (rn && first_rn && (first_rn + num == rn)) {
num++;
@@ -423,9 +424,12 @@
goto skip_add;
if (memory_end_set && (start + size > memory_end))
size = memory_end - start;
- align_to_block_size(&start, &size);
- if (size)
- add_memory(0, start, size);
+ block_size = memory_block_size_bytes();
+ align_to_block_size(&start, &size, block_size);
+ if (!size)
+ goto skip_add;
+ for (addr = start; addr < start + size; addr += block_size)
+ add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size);
skip_add:
first_rn = rn;
num = 1;
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index ae67386..68d6ee7 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -12,6 +12,7 @@
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/kernel.h>
+#include <linux/sysrq.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
@@ -27,6 +28,7 @@
#include <asm/uaccess.h>
#include "sclp.h"
+#include "ctrlchar.h"
#define SCLP_VT220_MAJOR TTY_MAJOR
#define SCLP_VT220_MINOR 65
@@ -477,6 +479,53 @@
#define SCLP_VT220_SESSION_STARTED 0x80
#define SCLP_VT220_SESSION_DATA 0x00
+#ifdef CONFIG_MAGIC_SYSRQ
+
+static int sysrq_pressed;
+static struct sysrq_work sysrq;
+
+static void sclp_vt220_reset_session(void)
+{
+ sysrq_pressed = 0;
+}
+
+static void sclp_vt220_handle_input(const char *buffer, unsigned int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ /* Handle magic sys request */
+ if (buffer[i] == ('O' ^ 0100)) { /* CTRL-O */
+ /*
+ * If pressed again, reset sysrq_pressed
+ * and flip CTRL-O character
+ */
+ sysrq_pressed = !sysrq_pressed;
+ if (sysrq_pressed)
+ continue;
+ } else if (sysrq_pressed) {
+ sysrq.key = buffer[i];
+ schedule_sysrq_work(&sysrq);
+ sysrq_pressed = 0;
+ continue;
+ }
+ tty_insert_flip_char(&sclp_vt220_port, buffer[i], 0);
+ }
+}
+
+#else
+
+static void sclp_vt220_reset_session(void)
+{
+}
+
+static void sclp_vt220_handle_input(const char *buffer, unsigned int count)
+{
+ tty_insert_flip_string(&sclp_vt220_port, buffer, count);
+}
+
+#endif
+
/*
* Called by the SCLP to report incoming event buffers.
*/
@@ -492,12 +541,13 @@
switch (*buffer) {
case SCLP_VT220_SESSION_ENDED:
case SCLP_VT220_SESSION_STARTED:
+ sclp_vt220_reset_session();
break;
case SCLP_VT220_SESSION_DATA:
/* Send input to line discipline */
buffer++;
count--;
- tty_insert_flip_string(&sclp_vt220_port, buffer, count);
+ sclp_vt220_handle_input(buffer, count);
tty_flip_buffer_push(&sclp_vt220_port);
break;
}
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index e91b89d..e96fc7f 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -659,6 +659,10 @@
else
/* Normal end. Copy residual count. */
rq->rescnt = irb->scsw.cmd.count;
+ } else if (irb->scsw.cmd.dstat & DEV_STAT_DEV_END) {
+ /* Interrupt without an outstanding request -> update all */
+ tp->update_flags = TTY_UPDATE_ALL;
+ tty3270_set_timer(tp, 1);
}
return RAW3270_IO_DONE;
}
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index e3bf885..548a189 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -21,6 +21,7 @@
#include <asm/chsc.h>
#include <asm/crw.h>
#include <asm/isc.h>
+#include <asm/ebcdic.h>
#include "css.h"
#include "cio.h"
@@ -272,36 +273,6 @@
css_schedule_reprobe();
}
-static int
-__get_chpid_from_lir(void *data)
-{
- struct lir {
- u8 iq;
- u8 ic;
- u16 sci;
- /* incident-node descriptor */
- u32 indesc[28];
- /* attached-node descriptor */
- u32 andesc[28];
- /* incident-specific information */
- u32 isinfo[28];
- } __attribute__ ((packed)) *lir;
-
- lir = data;
- if (!(lir->iq&0x80))
- /* NULL link incident record */
- return -EINVAL;
- if (!(lir->indesc[0]&0xc0000000))
- /* node descriptor not valid */
- return -EINVAL;
- if (!(lir->indesc[0]&0x10000000))
- /* don't handle device-type nodes - FIXME */
- return -EINVAL;
- /* Byte 3 contains the chpid. Could also be CTCA, but we don't care */
-
- return (u16) (lir->indesc[0]&0x000000ff);
-}
-
struct chsc_sei_nt0_area {
u8 flags;
u8 vf; /* validity flags */
@@ -341,22 +312,132 @@
} u;
} __packed;
+/*
+ * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
+ */
+
+#define ND_VALIDITY_VALID 0
+#define ND_VALIDITY_OUTDATED 1
+#define ND_VALIDITY_INVALID 2
+
+struct node_descriptor {
+ /* Flags. */
+ union {
+ struct {
+ u32 validity:3;
+ u32 reserved:5;
+ } __packed;
+ u8 byte0;
+ } __packed;
+
+ /* Node parameters. */
+ u32 params:24;
+
+ /* Node ID. */
+ char type[6];
+ char model[3];
+ char manufacturer[3];
+ char plant[2];
+ char seq[12];
+ u16 tag;
+} __packed;
+
+/*
+ * Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
+ */
+
+#define LIR_IQ_CLASS_INFO 0
+#define LIR_IQ_CLASS_DEGRADED 1
+#define LIR_IQ_CLASS_NOT_OPERATIONAL 2
+
+struct lir {
+ struct {
+ u32 null:1;
+ u32 reserved:3;
+ u32 class:2;
+ u32 reserved2:2;
+ } __packed iq;
+ u32 ic:8;
+ u32 reserved:16;
+ struct node_descriptor incident_node;
+ struct node_descriptor attached_node;
+ u8 reserved2[32];
+} __packed;
+
+#define PARAMS_LEN 10 /* PARAMS=xx,xxxxxx */
+#define NODEID_LEN 35 /* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
+
+/* Copy EBCIDC text, convert to ASCII and optionally add delimiter. */
+static char *store_ebcdic(char *dest, const char *src, unsigned long len,
+ char delim)
+{
+ memcpy(dest, src, len);
+ EBCASC(dest, len);
+
+ if (delim)
+ dest[len++] = delim;
+
+ return dest + len;
+}
+
+/* Format node ID and parameters for output in LIR log message. */
+static void format_node_data(char *params, char *id, struct node_descriptor *nd)
+{
+ memset(params, 0, PARAMS_LEN);
+ memset(id, 0, NODEID_LEN);
+
+ if (nd->validity != ND_VALIDITY_VALID) {
+ strncpy(params, "n/a", PARAMS_LEN - 1);
+ strncpy(id, "n/a", NODEID_LEN - 1);
+ return;
+ }
+
+ /* PARAMS=xx,xxxxxx */
+ snprintf(params, PARAMS_LEN, "%02x,%06x", nd->byte0, nd->params);
+ /* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
+ id = store_ebcdic(id, nd->type, sizeof(nd->type), '/');
+ id = store_ebcdic(id, nd->model, sizeof(nd->model), ',');
+ id = store_ebcdic(id, nd->manufacturer, sizeof(nd->manufacturer), '.');
+ id = store_ebcdic(id, nd->plant, sizeof(nd->plant), 0);
+ id = store_ebcdic(id, nd->seq, sizeof(nd->seq), ',');
+ sprintf(id, "%04X", nd->tag);
+}
+
static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area)
{
- struct chp_id chpid;
- int id;
+ struct lir *lir = (struct lir *) &sei_area->ccdf;
+ char iuparams[PARAMS_LEN], iunodeid[NODEID_LEN], auparams[PARAMS_LEN],
+ aunodeid[NODEID_LEN];
- CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
- sei_area->rs, sei_area->rsid);
- if (sei_area->rs != 4)
+ CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x, iq=%02x)\n",
+ sei_area->rs, sei_area->rsid, sei_area->ccdf[0]);
+
+ /* Ignore NULL Link Incident Records. */
+ if (lir->iq.null)
return;
- id = __get_chpid_from_lir(sei_area->ccdf);
- if (id < 0)
- CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
- else {
- chp_id_init(&chpid);
- chpid.id = id;
- chsc_chp_offline(chpid);
+
+ /* Inform user that a link requires maintenance actions because it has
+ * become degraded or not operational. Note that this log message is
+ * the primary intention behind a Link Incident Record. */
+
+ format_node_data(iuparams, iunodeid, &lir->incident_node);
+ format_node_data(auparams, aunodeid, &lir->attached_node);
+
+ switch (lir->iq.class) {
+ case LIR_IQ_CLASS_DEGRADED:
+ pr_warn("Link degraded: RS=%02x RSID=%04x IC=%02x "
+ "IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
+ sei_area->rs, sei_area->rsid, lir->ic, iuparams,
+ iunodeid, auparams, aunodeid);
+ break;
+ case LIR_IQ_CLASS_NOT_OPERATIONAL:
+ pr_err("Link stopped: RS=%02x RSID=%04x IC=%02x "
+ "IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
+ sei_area->rs, sei_area->rsid, lir->ic, iuparams,
+ iunodeid, auparams, aunodeid);
+ break;
+ default:
+ break;
}
}
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index f3c4179..6acd0b5 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -540,7 +540,7 @@
if (rc)
goto out_unlock;
/* Perform operation. */
- cdev->private->state = DEV_STATE_STEAL_LOCK,
+ cdev->private->state = DEV_STATE_STEAL_LOCK;
ccw_device_stlck_start(cdev, &data, &buffer[0], &buffer[32]);
spin_unlock_irq(sch->lock);
/* Wait for operation to finish. */
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index bee8c11..b3f44bc 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -336,7 +336,6 @@
{
struct eadm_private *private;
unsigned long flags;
- int ret = 0;
spin_lock_irqsave(sch->lock, flags);
if (!device_is_registered(&sch->dev))
@@ -356,7 +355,7 @@
out_unlock:
spin_unlock_irqrestore(sch->lock, flags);
- return ret;
+ return 0;
}
static struct css_device_id eadm_subchannel_ids[] = {
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 559a9dc..d78b3d6 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -1372,7 +1372,7 @@
/* Wait for the test message to complete. */
for (i = 0; i < 6; i++) {
- mdelay(300);
+ msleep(300);
status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
if (status.response_code == AP_RESPONSE_NORMAL &&
psmid == 0x0102030405060708ULL)
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 899ffa1..f418527 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -182,7 +182,7 @@
/* Wait for the test message to complete. */
for (i = 0; i < 6; i++) {
- mdelay(300);
+ msleep(300);
rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
if (rc == 0 && psmid == 0x0102030405060708ULL)
break;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2e65b98..a855669 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -390,10 +390,8 @@
return rc;
}
-static int qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
+static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
{
- int rc = 0;
-
QETH_DBF_TEXT(SETUP , 2, "stopcard");
QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -427,7 +425,6 @@
qeth_clear_cmd_buffers(&card->read);
qeth_clear_cmd_buffers(&card->write);
}
- return rc;
}
static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 70eb2f61..a1aaa36 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2158,10 +2158,8 @@
return card ;
}
-static int qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
+static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
{
- int rc = 0;
-
QETH_DBF_TEXT(SETUP, 2, "stopcard");
QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -2196,7 +2194,6 @@
qeth_clear_cmd_buffers(&card->read);
qeth_clear_cmd_buffers(&card->write);
}
- return rc;
}
/*
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 21ec5e2..4ac73e0 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -204,7 +204,7 @@
break;
case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
zfcp_fsf_link_down_info_eval(req, NULL);
- };
+ }
}
static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h
index c4d4eb8..986c06c 100644
--- a/include/linux/cpufeature.h
+++ b/include/linux/cpufeature.h
@@ -11,6 +11,7 @@
#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+#include <linux/init.h>
#include <linux/mod_devicetable.h>
#include <asm/cpufeature.h>
@@ -43,16 +44,16 @@
* For a list of legal values for 'feature', please consult the file
* 'asm/cpufeature.h' of your favorite architecture.
*/
-#define module_cpu_feature_match(x, __init) \
+#define module_cpu_feature_match(x, __initfunc) \
static struct cpu_feature const cpu_feature_match_ ## x[] = \
{ { .feature = cpu_feature(x) }, { } }; \
MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \
\
-static int cpu_feature_match_ ## x ## _init(void) \
+static int __init cpu_feature_match_ ## x ## _init(void) \
{ \
if (!cpu_have_feature(cpu_feature(x))) \
return -ENODEV; \
- return __init(); \
+ return __initfunc(); \
} \
module_init(cpu_feature_match_ ## x ## _init)