Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Will Deacon:
 - Errata workarounds for Qualcomm's Falkor CPU
 - Qualcomm L2 Cache PMU driver
 - Qualcomm SMCCC firmware quirk
 - Support for DEBUG_VIRTUAL
 - CPU feature detection for userspace via MRS emulation
 - Preliminary work for the Statistical Profiling Extension
 - Misc cleanups and non-critical fixes

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (74 commits)
  arm64/kprobes: consistently handle MRS/MSR with XZR
  arm64: cpufeature: correctly handle MRS to XZR
  arm64: traps: correctly handle MRS/MSR with XZR
  arm64: ptrace: add XZR-safe regs accessors
  arm64: include asm/assembler.h in entry-ftrace.S
  arm64: fix warning about swapper_pg_dir overflow
  arm64: Work around Falkor erratum 1003
  arm64: head.S: Enable EL1 (host) access to SPE when entered at EL2
  arm64: arch_timer: document Hisilicon erratum 161010101
  arm64: use is_vmalloc_addr
  arm64: use linux/sizes.h for constants
  arm64: uaccess: consistently check object sizes
  perf: add qcom l2 cache perf events driver
  arm64: remove wrong CONFIG_PROC_SYSCTL ifdef
  ARM: smccc: Update HVC comment to describe new quirk parameter
  arm64: do not trace atomic operations
  ACPI/IORT: Fix the error return code in iort_add_smmu_platform_device()
  ACPI/IORT: Fix iort_node_get_id() mapping entries indexing
  arm64: mm: enable CONFIG_HOLES_IN_ZONE for NUMA
  perf: xgene: Include module.h
  ...
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
new file mode 100644
index 0000000..61ca21e
--- /dev/null
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -0,0 +1,240 @@
+		ARM64 CPU Feature Registers
+		===========================
+
+Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+
+This file describes the ABI for exporting the AArch64 CPU ID/feature
+registers to userspace. The availability of this ABI is advertised
+via the HWCAP_CPUID in HWCAPs.
+
+1. Motivation
+---------------
+
+The ARM architecture defines a set of feature registers, which describe
+the capabilities of the CPU/system. Access to these system registers is
+restricted from EL0 and there is no reliable way for an application to
+extract this information to make better decisions at runtime. There is
+limited information available to the application via HWCAPs, however
+there are some issues with their usage.
+
+ a) Any change to the HWCAPs requires an update to userspace (e.g libc)
+    to detect the new changes, which can take a long time to appear in
+    distributions. Exposing the registers allows applications to get the
+    information without requiring updates to the toolchains.
+
+ b) Access to HWCAPs is sometimes limited (e.g prior to libc, or
+    when ld is initialised at startup time).
+
+ c) HWCAPs cannot represent non-boolean information effectively. The
+    architecture defines a canonical format for representing features
+    in the ID registers; this is well defined and is capable of
+    representing all valid architecture variations.
+
+
+2. Requirements
+-----------------
+
+ a) Safety :
+    Applications should be able to use the information provided by the
+    infrastructure to run safely across the system. This has greater
+    implications on a system with heterogeneous CPUs.
+    The infrastructure exports a value that is safe across all the
+    available CPU on the system.
+
+    e.g, If at least one CPU doesn't implement CRC32 instructions, while
+    others do, we should report that the CRC32 is not implemented.
+    Otherwise an application could crash when scheduled on the CPU
+    which doesn't support CRC32.
+
+ b) Security :
+    Applications should only be able to receive information that is
+    relevant to the normal operation in userspace. Hence, some of the
+    fields are masked out(i.e, made invisible) and their values are set to
+    indicate the feature is 'not supported'. See Section 4 for the list
+    of visible features. Also, the kernel may manipulate the fields
+    based on what it supports. e.g, If FP is not supported by the
+    kernel, the values could indicate that the FP is not available
+    (even when the CPU provides it).
+
+ c) Implementation Defined Features
+    The infrastructure doesn't expose any register which is
+    IMPLEMENTATION DEFINED as per ARMv8-A Architecture.
+
+ d) CPU Identification :
+    MIDR_EL1 is exposed to help identify the processor. On a
+    heterogeneous system, this could be racy (just like getcpu()). The
+    process could be migrated to another CPU by the time it uses the
+    register value, unless the CPU affinity is set. Hence, there is no
+    guarantee that the value reflects the processor that it is
+    currently executing on. The REVIDR is not exposed due to this
+    constraint, as REVIDR makes sense only in conjunction with the
+    MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
+    at:
+
+	/sys/devices/system/cpu/cpu$ID/regs/identification/
+	                                              \- midr
+	                                              \- revidr
+
+3. Implementation
+--------------------
+
+The infrastructure is built on the emulation of the 'MRS' instruction.
+Accessing a restricted system register from an application generates an
+exception and ends up in SIGILL being delivered to the process.
+The infrastructure hooks into the exception handler and emulates the
+operation if the source belongs to the supported system register space.
+
+The infrastructure emulates only the following system register space:
+	Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
+
+(See Table C5-6 'System instruction encodings for non-Debug System
+register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
+registers).
+
+The following rules are applied to the value returned by the
+infrastructure:
+
+ a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0.
+ b) The value of a reserved field is populated with the reserved
+    value as defined by the architecture.
+ c) The value of a 'visible' field holds the system wide safe value
+    for the particular feature (except for MIDR_EL1, see section 4).
+ d) All other fields (i.e, invisible fields) are set to indicate
+    the feature is missing (as defined by the architecture).
+
+4. List of registers with visible features
+-------------------------------------------
+
+  1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | RES0                         | [63-32] |    n    |
+     |--------------------------------------------------|
+     | RDM                          | [31-28] |    y    |
+     |--------------------------------------------------|
+     | ATOMICS                      | [23-20] |    y    |
+     |--------------------------------------------------|
+     | CRC32                        | [19-16] |    y    |
+     |--------------------------------------------------|
+     | SHA2                         | [15-12] |    y    |
+     |--------------------------------------------------|
+     | SHA1                         | [11-8]  |    y    |
+     |--------------------------------------------------|
+     | AES                          | [7-4]   |    y    |
+     |--------------------------------------------------|
+     | RES0                         | [3-0]   |    n    |
+     x--------------------------------------------------x
+
+
+  2) ID_AA64PFR0_EL1 - Processor Feature Register 0
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | RES0                         | [63-28] |    n    |
+     |--------------------------------------------------|
+     | GIC                          | [27-24] |    n    |
+     |--------------------------------------------------|
+     | AdvSIMD                      | [23-20] |    y    |
+     |--------------------------------------------------|
+     | FP                           | [19-16] |    y    |
+     |--------------------------------------------------|
+     | EL3                          | [15-12] |    n    |
+     |--------------------------------------------------|
+     | EL2                          | [11-8]  |    n    |
+     |--------------------------------------------------|
+     | EL1                          | [7-4]   |    n    |
+     |--------------------------------------------------|
+     | EL0                          | [3-0]   |    n    |
+     x--------------------------------------------------x
+
+
+  3) MIDR_EL1 - Main ID Register
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | Implementer                  | [31-24] |    y    |
+     |--------------------------------------------------|
+     | Variant                      | [23-20] |    y    |
+     |--------------------------------------------------|
+     | Architecture                 | [19-16] |    y    |
+     |--------------------------------------------------|
+     | PartNum                      | [15-4]  |    y    |
+     |--------------------------------------------------|
+     | Revision                     | [3-0]   |    y    |
+     x--------------------------------------------------x
+
+   NOTE: The 'visible' fields of MIDR_EL1 will contain the value
+   as available on the CPU where it is fetched and is not a system
+   wide safe value.
+
+Appendix I: Example
+---------------------------
+
+/*
+ * Sample program to demonstrate the MRS emulation ABI.
+ *
+ * Copyright (C) 2015-2016, ARM Ltd
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/hwcap.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+
+#define get_cpu_ftr(id) ({					\
+		unsigned long __val;				\
+		asm("mrs %0, "#id : "=r" (__val));		\
+		printf("%-20s: 0x%016lx\n", #id, __val);	\
+	})
+
+int main(void)
+{
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
+		fputs("CPUID registers unavailable\n", stderr);
+		return 1;
+	}
+
+	get_cpu_ftr(ID_AA64ISAR0_EL1);
+	get_cpu_ftr(ID_AA64ISAR1_EL1);
+	get_cpu_ftr(ID_AA64MMFR0_EL1);
+	get_cpu_ftr(ID_AA64MMFR1_EL1);
+	get_cpu_ftr(ID_AA64PFR0_EL1);
+	get_cpu_ftr(ID_AA64PFR1_EL1);
+	get_cpu_ftr(ID_AA64DFR0_EL1);
+	get_cpu_ftr(ID_AA64DFR1_EL1);
+
+	get_cpu_ftr(MIDR_EL1);
+	get_cpu_ftr(MPIDR_EL1);
+	get_cpu_ftr(REVIDR_EL1);
+
+#if 0
+	/* Unexposed register access causes SIGILL */
+	get_cpu_ftr(ID_MMFR0_EL1);
+#endif
+
+	return 0;
+}
+
+
+
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 405da11..a71b809 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -42,24 +42,29 @@
 will be updated when new workarounds are committed and backported to
 stable kernels.
 
-| Implementor    | Component       | Erratum ID      | Kconfig                 |
-+----------------+-----------------+-----------------+-------------------------+
-| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319    |
-| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319    |
-| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069    |
-| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472    |
-| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719    |
-| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419    |
-| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
-| ARM            | Cortex-A57      | #852523         | N/A                     |
-| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
-| ARM            | Cortex-A72      | #853709         | N/A                     |
-| ARM            | MMU-500         | #841119,#826419 | N/A                     |
-|                |                 |                 |                         |
-| Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
-| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144    |
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |
-| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456    |
-| Cavium         | ThunderX SMMUv2 | #27704          | N/A		       |
-|                |                 |                 |                         |
-| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585     |
+| Implementor    | Component       | Erratum ID      | Kconfig                     |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
+| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
+| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069        |
+| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472        |
+| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719        |
+| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419        |
+| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075        |
+| ARM            | Cortex-A57      | #852523         | N/A                         |
+| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
+| ARM            | Cortex-A72      | #853709         | N/A                         |
+| ARM            | MMU-500         | #841119,#826419 | N/A                         |
+|                |                 |                 |                             |
+| Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375        |
+| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
+| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
+| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
+|                |                 |                 |                             |
+| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
+|                |                 |                 |                             |
+| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
+|                |                 |                 |                             |
+| Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
+| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
diff --git a/Documentation/perf/qcom_l2_pmu.txt b/Documentation/perf/qcom_l2_pmu.txt
new file mode 100644
index 0000000..b25b976
--- /dev/null
+++ b/Documentation/perf/qcom_l2_pmu.txt
@@ -0,0 +1,38 @@
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=====================================================================
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf:
+
+  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 7e45f69..8e8d20c 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -178,6 +178,6 @@
 #endif
 
 #ifdef CONFIG_HAVE_ARM_SMCCC
-EXPORT_SYMBOL(arm_smccc_smc);
-EXPORT_SYMBOL(arm_smccc_hvc);
+EXPORT_SYMBOL(__arm_smccc_smc);
+EXPORT_SYMBOL(__arm_smccc_hvc);
 #endif
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 09413e7..96b7a47 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -581,9 +581,5 @@
 	.probe		= armv6_pmu_device_probe,
 };
 
-static int __init register_armv6_pmu_driver(void)
-{
-	return platform_driver_register(&armv6_pmu_driver);
-}
-device_initcall(register_armv6_pmu_driver);
+builtin_platform_driver(armv6_pmu_driver);
 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index b942349..ab6522b 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -2034,9 +2034,5 @@
 	.probe		= armv7_pmu_device_probe,
 };
 
-static int __init register_armv7_pmu_driver(void)
-{
-	return platform_driver_register(&armv7_pmu_driver);
-}
-device_initcall(register_armv7_pmu_driver);
+builtin_platform_driver(armv7_pmu_driver);
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index aa0499e..0e51f5e 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -767,9 +767,5 @@
 	.probe		= xscale_pmu_device_probe,
 };
 
-static int __init register_xscale_pmu_driver(void)
-{
-	return platform_driver_register(&xscale_pmu_driver);
-}
-device_initcall(register_xscale_pmu_driver);
+builtin_platform_driver(xscale_pmu_driver);
 #endif	/* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S
index 2e48b67..e5d4306 100644
--- a/arch/arm/kernel/smccc-call.S
+++ b/arch/arm/kernel/smccc-call.S
@@ -46,17 +46,19 @@
 /*
  * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
  *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(arm_smccc_smc)
+ENTRY(__arm_smccc_smc)
 	SMCCC SMCCC_SMC
-ENDPROC(arm_smccc_smc)
+ENDPROC(__arm_smccc_smc)
 
 /*
  * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
  *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(arm_smccc_hvc)
+ENTRY(__arm_smccc_hvc)
 	SMCCC SMCCC_HVC
-ENDPROC(arm_smccc_hvc)
+ENDPROC(__arm_smccc_hvc)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 916024d..a39029b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -6,6 +6,7 @@
 	select ACPI_MCFG if ACPI
 	select ACPI_SPCR_TABLE if ACPI
 	select ARCH_CLOCKSOURCE_DATA
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -479,6 +480,34 @@
 
 	  If unsure, say Y.
 
+config QCOM_FALKOR_ERRATUM_1003
+	bool "Falkor E1003: Incorrect translation due to ASID change"
+	default y
+	select ARM64_PAN if ARM64_SW_TTBR0_PAN
+	help
+	  On Falkor v1, an incorrect ASID may be cached in the TLB when ASID
+	  and BADDR are changed together in TTBRx_EL1. The workaround for this
+	  issue is to use a reserved ASID in cpu_do_switch_mm() before
+	  switching to the new ASID. Saying Y here selects ARM64_PAN if
+	  ARM64_SW_TTBR0_PAN is selected. This is done because implementing and
+	  maintaining the E1003 workaround in the software PAN emulation code
+	  would be an unnecessary complication. The affected Falkor v1 CPU
+	  implements ARMv8.1 hardware PAN support and using hardware PAN
+	  support versus software PAN emulation is mutually exclusive at
+	  runtime.
+
+	  If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_1009
+	bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
+	default y
+	help
+	  On Falkor v1, the CPU may prematurely complete a DSB following a
+	  TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation
+	  one more time to fix the issue.
+
+	  If unsure, say Y.
+
 endmenu
 
 
@@ -614,6 +643,10 @@
 	def_bool y
 	depends on NUMA
 
+config HOLES_IN_ZONE
+	def_bool y
+	depends on NUMA
+
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
@@ -1010,7 +1043,7 @@
 config COMPAT
 	bool "Kernel support for 32-bit EL0"
 	depends on ARM64_4K_PAGES || EXPERT
-	select COMPAT_BINFMT_ELF
+	select COMPAT_BINFMT_ELF if BINFMT_ELF
 	select HAVE_UID16
 	select OLD_SIGSUSPEND3
 	select COMPAT_OLD_SIGACTION
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 560a8d8..fca2f02 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -84,6 +84,14 @@
 
 	  If in doubt, say N.
 
+config DEBUG_EFI
+	depends on EFI && DEBUG_INFO
+	bool "UEFI debugging"
+	help
+	  Enable this option to include EFI specific debugging features into
+	  the kernel that are only useful when using a debug build of the
+	  UEFI firmware
+
 source "drivers/hwtracing/coresight/Kconfig"
 
 endmenu
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3a43011..1b67c37 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,6 +25,7 @@
 
 #include <asm/asm-offsets.h>
 #include <asm/cpufeature.h>
+#include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
@@ -441,6 +442,28 @@
 	.endm
 
 /*
+ * Errata workaround prior to TTBR0_EL1 update
+ *
+ * 	val:	TTBR value with new BADDR, preserved
+ * 	tmp0:	temporary register, clobbered
+ * 	tmp1:	other temporary register, clobbered
+ */
+	.macro	pre_ttbr0_update_workaround, val, tmp0, tmp1
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
+	mrs	\tmp0, ttbr0_el1
+	mov	\tmp1, #FALKOR_RESERVED_ASID
+	bfi	\tmp0, \tmp1, #48, #16		// reserved ASID + old BADDR
+	msr	ttbr0_el1, \tmp0
+	isb
+	bfi	\tmp0, \val, #0, #48		// reserved ASID + new BADDR
+	msr	ttbr0_el1, \tmp0
+	isb
+alternative_else_nop_endif
+#endif
+	.endm
+
+/*
  * Errata workaround post TTBR0_EL1 update.
  */
 	.macro	post_ttbr0_update_workaround
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 4174f09..fb78a5d 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -35,7 +35,9 @@
 #define ARM64_HYP_OFFSET_LOW			14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 #define ARM64_HAS_NO_FPSIMD			16
+#define ARM64_WORKAROUND_REPEAT_TLBI		17
+#define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
 
-#define ARM64_NCAPS				17
+#define ARM64_NCAPS				19
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b4989df..4ce82ed 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -29,7 +29,20 @@
 #include <linux/jump_label.h>
 #include <linux/kernel.h>
 
-/* CPU feature register tracking */
+/*
+ * CPU feature register tracking
+ *
+ * The safe value of a CPUID feature field is dependent on the implications
+ * of the values assigned to it by the architecture. Based on the relationship
+ * between the values, the features are classified into 3 types - LOWER_SAFE,
+ * HIGHER_SAFE and EXACT.
+ *
+ * The lowest value of all the CPUs is chosen for LOWER_SAFE and highest
+ * for HIGHER_SAFE. It is expected that all CPUs have the same value for
+ * a field when EXACT is specified, failing which, the safe value specified
+ * in the table is chosen.
+ */
+
 enum ftr_type {
 	FTR_EXACT,	/* Use a predefined safe value */
 	FTR_LOWER_SAFE,	/* Smaller value is safe */
@@ -42,8 +55,12 @@
 #define FTR_SIGNED	true	/* Value should be treated as signed */
 #define FTR_UNSIGNED	false	/* Value should be treated as unsigned */
 
+#define FTR_VISIBLE	true	/* Feature visible to the user space */
+#define FTR_HIDDEN	false	/* Feature is hidden from the user */
+
 struct arm64_ftr_bits {
 	bool		sign;	/* Value is signed ? */
+	bool		visible;
 	bool		strict;	/* CPU Sanity check: strict matching required ? */
 	enum ftr_type	type;
 	u8		shift;
@@ -59,7 +76,9 @@
 struct arm64_ftr_reg {
 	const char			*name;
 	u64				strict_mask;
+	u64				user_mask;
 	u64				sys_val;
+	u64				user_val;
 	const struct arm64_ftr_bits	*ftr_bits;
 };
 
@@ -159,6 +178,11 @@
 	return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
 }
 
+static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
+{
+	return (reg->user_val | (reg->sys_val & reg->user_mask));
+}
+
 static inline int __attribute_const__
 cpuid_feature_extract_field(u64 features, int field, bool sign)
 {
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 26a68dd..fc50271 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -56,6 +56,9 @@
 	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
 	((partnum)		<< MIDR_PARTNUM_SHIFT))
 
+#define MIDR_CPU_VAR_REV(var, rev) \
+	(((var)	<< MIDR_VARIANT_SHIFT) | (rev))
+
 #define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
 			     MIDR_ARCHITECTURE_MASK)
 
@@ -71,6 +74,7 @@
 #define ARM_CPU_IMP_APM			0x50
 #define ARM_CPU_IMP_CAVIUM		0x43
 #define ARM_CPU_IMP_BRCM		0x42
+#define ARM_CPU_IMP_QCOM		0x51
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
@@ -84,10 +88,13 @@
 
 #define BRCM_CPU_PART_VULCAN		0x516
 
+#define QCOM_CPU_PART_FALKOR_V1		0x800
+
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index bc85366..aecc07e 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -332,6 +332,8 @@
 u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+					 u32 insn);
 u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
 				enum aarch64_insn_branch_type type);
 u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2a2752b..6e99978 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -188,6 +188,9 @@
 #define CPTR_EL2_DEFAULT	0x000033ff
 
 /* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TPMS		(1 << 14)
+#define MDCR_EL2_E2PB_MASK	(UL(0x3))
+#define MDCR_EL2_E2PB_SHIFT	(UL(12))
 #define MDCR_EL2_TDRA		(1 << 11)
 #define MDCR_EL2_TDOSA		(1 << 10)
 #define MDCR_EL2_TDA		(1 << 9)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e505038..443b387 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -229,7 +229,12 @@
 
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
-	struct kvm_guest_debug_arch host_debug_state;
+	struct {
+		/* {Break,watch}point registers */
+		struct kvm_guest_debug_arch regs;
+		/* Statistical profiling extension */
+		u64 pmscr_el1;
+	} host_debug_state;
 
 	/* VGIC state */
 	struct vgic_cpu vgic_cpu;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6f72fe8..55772c1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -47,7 +47,7 @@
  * If the page is in the bottom half, we have to use the top half. If
  * the page is in the top half, we have to use the bottom half:
  *
- * T = __virt_to_phys(__hyp_idmap_text_start)
+ * T = __pa_symbol(__hyp_idmap_text_start)
  * if (T & BIT(VA_BITS - 1))
  *	HYP_VA_MIN = 0  //idmap in upper half
  * else
@@ -271,7 +271,7 @@
 	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
 }
 
-#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
+#define kvm_virt_to_phys(x)		__pa_symbol(x)
 
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index fc756e2..606b209 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -19,7 +19,7 @@
 __asm__(".arch_extension	lse");
 
 /* Move the ll/sc atomics out-of-line */
-#define __LL_SC_INLINE
+#define __LL_SC_INLINE		notrace
 #define __LL_SC_PREFIX(x)	__ll_sc_##x
 #define __LL_SC_EXPORT(x)	EXPORT_SYMBOL(__LL_SC_PREFIX(x))
 
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 90c39a6..32f827233 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -102,25 +102,6 @@
 #endif
 
 /*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#define __virt_to_phys(x) ({						\
-	phys_addr_t __x = (phys_addr_t)(x);				\
-	__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :	\
-				 (__x - kimage_voffset); })
-
-#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
-#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
-
-/*
- * Convert a page to/from a physical address
- */
-#define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
-#define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
-
-/*
  * Memory types available.
  */
 #define MT_DEVICE_nGnRnE	0
@@ -187,6 +168,48 @@
 #define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
 
 /*
+ * Physical vs virtual RAM address space conversion.  These are
+ * private definitions which should NOT be used outside memory.h
+ * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+
+
+/*
+ * The linear kernel range starts in the middle of the virtual adddress
+ * space. Testing the top bit for the start of the region is a
+ * sufficient check.
+ */
+#define __is_lm_address(addr)	(!!((addr) & BIT(VA_BITS - 1)))
+
+#define __lm_to_phys(addr)	(((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
+#define __kimg_to_phys(addr)	((addr) - kimage_voffset)
+
+#define __virt_to_phys_nodebug(x) ({					\
+	phys_addr_t __x = (phys_addr_t)(x);				\
+	__is_lm_address(__x) ? __lm_to_phys(__x) :			\
+			       __kimg_to_phys(__x);			\
+})
+
+#define __pa_symbol_nodebug(x)	__kimg_to_phys((phys_addr_t)(x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x)	__virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x)	__pa_symbol_nodebug(x)
+#endif
+
+#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
+
+/*
+ * Convert a page to/from a physical address
+ */
+#define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
+#define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
+
+/*
  * Note: Drivers should NOT use these.  They are the wrong
  * translation for translating DMA addresses.  Use the driver
  * DMA support - see dma-mapping.h.
@@ -207,9 +230,12 @@
  * Drivers should NOT use these either.
  */
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x)		__phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa_nodebug(x)		__virt_to_phys_nodebug((unsigned long)(x))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
-#define virt_to_pfn(x)      __phys_to_pfn(__virt_to_phys(x))
+#define virt_to_pfn(x)      __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
+#define sym_to_pfn(x)	    __phys_to_pfn(__pa_symbol(x))
 
 /*
  *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 0363fe8..1ef40d8 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -19,6 +19,10 @@
 #ifndef __ASM_MMU_CONTEXT_H
 #define __ASM_MMU_CONTEXT_H
 
+#define FALKOR_RESERVED_ASID	1
+
+#ifndef __ASSEMBLY__
+
 #include <linux/compiler.h>
 #include <linux/sched.h>
 
@@ -45,7 +49,7 @@
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbr = virt_to_phys(empty_zero_page);
+	unsigned long ttbr = __pa_symbol(empty_zero_page);
 
 	write_sysreg(ttbr, ttbr0_el1);
 	isb();
@@ -114,7 +118,7 @@
 	local_flush_tlb_all();
 	cpu_set_idmap_tcr_t0sz();
 
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
 }
 
 /*
@@ -129,7 +133,7 @@
 
 	phys_addr_t pgd_phys = virt_to_phys(pgd);
 
-	replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+	replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
 	cpu_install_idmap();
 	replace_phys(pgd_phys);
@@ -220,4 +224,6 @@
 
 void verify_cpu_asid_bits(void);
 
-#endif
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ffbb9a5..0eef606 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -52,7 +52,7 @@
  * for zero-mapped memory areas etc..
  */
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr)	pfn_to_page(PHYS_PFN(__pa(empty_zero_page)))
+#define ZERO_PAGE(vaddr)	phys_to_page(__pa_symbol(empty_zero_page))
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
 
@@ -71,9 +71,8 @@
 #define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
-#define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
+#define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
-#define pte_ng(pte)		(!!(pte_val(pte) & PTE_NG))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -84,8 +83,12 @@
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_global(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID)
+/*
+ * Execute-only user mappings do not have the PTE_USER bit set. All valid
+ * kernel mappings have the PTE_UXN bit set.
+ */
+#define pte_valid_not_user(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
 #define pte_valid_young(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
 
@@ -178,7 +181,7 @@
 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
 	 * or update_mmu_cache() have the necessary barriers.
 	 */
-	if (pte_valid_global(pte)) {
+	if (pte_valid_not_user(pte)) {
 		dsb(ishst);
 		isb();
 	}
@@ -212,7 +215,7 @@
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
-		if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte))
+		if (pte_user_exec(pte) && !pte_special(pte))
 			__sync_icache_dcache(pte, addr);
 	}
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 747c65a..c97b8bd 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -187,7 +187,6 @@
 #endif
 
 int cpu_enable_pan(void *__unused);
-int cpu_enable_uao(void *__unused);
 int cpu_enable_cache_maint_trap(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 513daf0..11403fd 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -194,6 +194,26 @@
 	return val;
 }
 
+/*
+ * Read a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline unsigned long pt_regs_read_reg(const struct pt_regs *regs, int r)
+{
+	return (r == 31) ? 0 : regs->regs[r];
+}
+
+/*
+ * Write a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
+				     unsigned long val)
+{
+	if (r != 31)
+		regs->regs[r] = val;
+}
+
 /* Valid only for Kernel mode traps. */
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 98ae03f..ac24b6e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -32,8 +32,27 @@
  *	[11-8]  : CRm
  *	[7-5]   : Op2
  */
+#define Op0_shift	19
+#define Op0_mask	0x3
+#define Op1_shift	16
+#define Op1_mask	0x7
+#define CRn_shift	12
+#define CRn_mask	0xf
+#define CRm_shift	8
+#define CRm_mask	0xf
+#define Op2_shift	5
+#define Op2_mask	0x7
+
 #define sys_reg(op0, op1, crn, crm, op2) \
-	((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+	(((op0) << Op0_shift) | ((op1) << Op1_shift) | \
+	 ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
+	 ((op2) << Op2_shift))
+
+#define sys_reg_Op0(id)	(((id) >> Op0_shift) & Op0_mask)
+#define sys_reg_Op1(id)	(((id) >> Op1_shift) & Op1_mask)
+#define sys_reg_CRn(id)	(((id) >> CRn_shift) & CRn_mask)
+#define sys_reg_CRm(id)	(((id) >> CRm_shift) & CRm_mask)
+#define sys_reg_Op2(id)	(((id) >> Op2_shift) & Op2_mask)
 
 #ifndef CONFIG_BROKEN_GAS_INST
 
@@ -190,6 +209,7 @@
 #define ID_AA64MMFR2_CNP_SHIFT		0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_PMSVER_SHIFT	32
 #define ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define ID_AA64DFR0_WRPS_SHIFT		20
 #define ID_AA64DFR0_BRPS_SHIFT		12
@@ -245,6 +265,10 @@
 #define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN64_SUPPORTED
 #endif
 
+
+/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
+#define SYS_MPIDR_SAFE_VAL		(1UL << 31)
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index deab523..af1c769 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -36,9 +36,21 @@
  * not. The macros handles invoking the asm with or without the
  * register argument as appropriate.
  */
-#define __TLBI_0(op, arg)		asm ("tlbi " #op)
-#define __TLBI_1(op, arg)		asm ("tlbi " #op ", %0" : : "r" (arg))
-#define __TLBI_N(op, arg, n, ...)	__TLBI_##n(op, arg)
+#define __TLBI_0(op, arg) asm ("tlbi " #op "\n"				       \
+		   ALTERNATIVE("nop\n			nop",		       \
+			       "dsb ish\n		tlbi " #op,	       \
+			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
+			       CONFIG_QCOM_FALKOR_ERRATUM_1009)		       \
+			    : : )
+
+#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n"			       \
+		   ALTERNATIVE("nop\n			nop",		       \
+			       "dsb ish\n		tlbi " #op ", %0",     \
+			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
+			       CONFIG_QCOM_FALKOR_ERRATUM_1009)		       \
+			    : : "r" (arg))
+
+#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
 
 #define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 46da3ea..5308d69 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -379,9 +379,9 @@
 {
 	unsigned long res = n;
 	kasan_check_write(to, n);
+	check_object_size(to, n, false);
 
 	if (access_ok(VERIFY_READ, from, n)) {
-		check_object_size(to, n, false);
 		res = __arch_copy_from_user(to, from, n);
 	}
 	if (unlikely(res))
@@ -392,9 +392,9 @@
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	kasan_check_read(from, n);
+	check_object_size(from, n, true);
 
 	if (access_ok(VERIFY_WRITE, to, n)) {
-		check_object_size(from, n, true);
 		n = __arch_copy_to_user(to, from, n);
 	}
 	return n;
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a739287..61c263c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -30,5 +30,7 @@
 #define HWCAP_ATOMICS		(1 << 8)
 #define HWCAP_FPHP		(1 << 9)
 #define HWCAP_ASIMDHP		(1 << 10)
+#define HWCAP_CPUID		(1 << 11)
+#define HWCAP_ASIMDRDM		(1 << 12)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d66bba..1606c6b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,3 +55,7 @@
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
 extra-y					+= $(head-y) vmlinux.lds
+
+ifeq ($(CONFIG_DEBUG_EFI),y)
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+endif
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index a32b401..1f5655c 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/acpi.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 
 #include <asm/cpu_ops.h>
@@ -109,7 +110,7 @@
 	 * that read this address need to convert this address to the
 	 * Boot-Loader's endianness before jumping.
 	 */
-	writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+	writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point);
 	writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
 
 	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 78f3680..e9c4dc9 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -73,5 +73,5 @@
 #endif
 
 	/* arm-smccc */
-EXPORT_SYMBOL(arm_smccc_smc);
-EXPORT_SYMBOL(arm_smccc_hvc);
+EXPORT_SYMBOL(__arm_smccc_smc);
+EXPORT_SYMBOL(__arm_smccc_hvc);
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index ecf9298..86032a0 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -636,7 +636,7 @@
 		if(system_supports_mixed_endian_el0())
 			register_insn_emulation(&setend_ops);
 		else
-			pr_info("setend instruction emulation is not supported on the system");
+			pr_info("setend instruction emulation is not supported on this system\n");
 	}
 
 	cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index bc049af..b3bb7ef 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -143,8 +143,11 @@
   DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS,	offsetof(struct sleep_stack_data, system_regs));
   DEFINE(SLEEP_STACK_DATA_CALLEE_REGS,	offsetof(struct sleep_stack_data, callee_saved_regs));
 #endif
-  DEFINE(ARM_SMCCC_RES_X0_OFFS,	offsetof(struct arm_smccc_res, a0));
-  DEFINE(ARM_SMCCC_RES_X2_OFFS,	offsetof(struct arm_smccc_res, a2));
+  DEFINE(ARM_SMCCC_RES_X0_OFFS,		offsetof(struct arm_smccc_res, a0));
+  DEFINE(ARM_SMCCC_RES_X2_OFFS,		offsetof(struct arm_smccc_res, a2));
+  DEFINE(ARM_SMCCC_QUIRK_ID_OFFS,	offsetof(struct arm_smccc_quirk, id));
+  DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS,	offsetof(struct arm_smccc_quirk, state));
+
   BLANK();
   DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
   DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 9617301..3f2250f 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -84,7 +84,7 @@
 
 static int __init_cache_level(unsigned int cpu)
 {
-	unsigned int ctype, level, leaves;
+	unsigned int ctype, level, leaves, of_level;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 	for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -97,6 +97,17 @@
 		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
 	}
 
+	of_level = of_find_last_cache_level(cpu);
+	if (level < of_level) {
+		/*
+		 * some external caches not specified in CLIDR_EL1
+		 * the information may be available in the device tree
+		 * only unified external caches are considered here
+		 */
+		leaves += (of_level - level);
+		level = of_level;
+	}
+
 	this_cpu_ci->num_levels = level;
 	this_cpu_ci->num_leaves = leaves;
 	return 0;
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
index d4e9ecb..6c2b1b4 100644
--- a/arch/arm64/kernel/cpu-reset.h
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -24,7 +24,7 @@
 
 	el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
 		is_hyp_mode_available();
-	restart = (void *)virt_to_phys(__cpu_soft_restart);
+	restart = (void *)__pa_symbol(__cpu_soft_restart);
 
 	cpu_install_idmap();
 	restart(el2_switch, entry, arg0, arg1, arg2);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b75e917..f6cc67e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -79,8 +79,9 @@
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 832075",
 		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
-		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
-			   (1 << MIDR_VARIANT_SHIFT) | 2),
+		MIDR_RANGE(MIDR_CORTEX_A57,
+			   MIDR_CPU_VAR_REV(0, 0),
+			   MIDR_CPU_VAR_REV(1, 2)),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_834220
@@ -88,8 +89,9 @@
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 834220",
 		.capability = ARM64_WORKAROUND_834220,
-		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
-			   (1 << MIDR_VARIANT_SHIFT) | 2),
+		MIDR_RANGE(MIDR_CORTEX_A57,
+			   MIDR_CPU_VAR_REV(0, 0),
+			   MIDR_CPU_VAR_REV(1, 2)),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_845719
@@ -113,8 +115,9 @@
 	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
 		.desc = "Cavium erratum 27456",
 		.capability = ARM64_WORKAROUND_CAVIUM_27456,
-		MIDR_RANGE(MIDR_THUNDERX, 0x00,
-			   (1 << MIDR_VARIANT_SHIFT) | 1),
+		MIDR_RANGE(MIDR_THUNDERX,
+			   MIDR_CPU_VAR_REV(0, 0),
+			   MIDR_CPU_VAR_REV(1, 1)),
 	},
 	{
 	/* Cavium ThunderX, T81 pass 1.0 */
@@ -130,6 +133,24 @@
 		.def_scope = SCOPE_LOCAL_CPU,
 		.enable = cpu_enable_trap_ctr_access,
 	},
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+	{
+		.desc = "Qualcomm Technologies Falkor erratum 1003",
+		.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+		MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
+			   MIDR_CPU_VAR_REV(0, 0),
+			   MIDR_CPU_VAR_REV(0, 0)),
+	},
+#endif
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+	{
+		.desc = "Qualcomm Technologies Falkor erratum 1009",
+		.capability = ARM64_WORKAROUND_REPEAT_TLBI,
+		MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
+			   MIDR_CPU_VAR_REV(0, 0),
+			   MIDR_CPU_VAR_REV(0, 0)),
+	},
+#endif
 	{
 	}
 };
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fdf8f04..abda8e8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -23,12 +23,14 @@
 #include <linux/sort.h>
 #include <linux/stop_machine.h>
 #include <linux/types.h>
+#include <linux/mm.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
+#include <asm/traps.h>
 #include <asm/virt.h>
 
 unsigned long elf_hwcap __read_mostly;
@@ -52,9 +54,10 @@
 DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcap_keys);
 
-#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
 		.sign = SIGNED,				\
+		.visible = VISIBLE,			\
 		.strict = STRICT,			\
 		.type = TYPE,				\
 		.shift = SHIFT,				\
@@ -63,12 +66,12 @@
 	}
 
 /* Define a feature with unsigned values */
-#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
-	__ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	__ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
 
 /* Define a feature with a signed value */
-#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
-	__ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	__ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
 
 #define ARM64_FTR_END					\
 	{						\
@@ -80,85 +83,80 @@
 cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
 
 
+/*
+ * NOTE: Any changes to the visibility of features should be kept in
+ * sync with the documentation of the CPU feature register ABI.
+ */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
 	/* Linux doesn't care about the EL3 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
 	/* Linux shouldn't care about secure memory */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
 	/*
 	 * Differing PARange is fine as long as all peripherals and memory are mapped
 	 * within the minimum PARange of all CPUs
 	 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
 	 * make use of *minLine.
 	 * If we have differing I-cache policies, report it as the weakest - AIVIVT.
 	 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT),	/* L1Ip */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT),	/* L1Ip */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
 	ARM64_FTR_END,
 };
 
@@ -168,79 +166,78 @@
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf),	/* InnerShr */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0),	/* TCM */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* ShareLvl */
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf),	/* OuterShr */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),	/* PMSA */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* VMSA */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf),	/* InnerShr */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0),	/* TCM */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* ShareLvl */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf),	/* OuterShr */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),	/* PMSA */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* VMSA */
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+	/*
+	 * We can instantiate multiple PMU instances with different levels
+	 * of support.
+	 */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_mvfr2[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_dczid[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1),		/* DZP */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* BS */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1),		/* DZP */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* BS */
 	ARM64_FTR_END,
 };
 
 
 static const struct arm64_ftr_bits ftr_id_isar5[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* RAZ */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_pfr0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* State1 */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* State0 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* State1 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* State0 */
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_dfr0[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
-	S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),	/* PerfMon */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),	/* PerfMon */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -251,29 +248,24 @@
  * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
  */
 static const struct arm64_ftr_bits ftr_generic_32bits[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
 	ARM64_FTR_END,
 };
 
-static const struct arm64_ftr_bits ftr_generic[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+/* Table for a single 32bit feature value */
+static const struct arm64_ftr_bits ftr_single32[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 32, 0),
 	ARM64_FTR_END,
 };
 
-static const struct arm64_ftr_bits ftr_generic32[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
-	ARM64_FTR_END,
-};
-
-static const struct arm64_ftr_bits ftr_aa64raz[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+static const struct arm64_ftr_bits ftr_raz[] = {
 	ARM64_FTR_END,
 };
 
@@ -314,15 +306,15 @@
 
 	/* Op1 = 0, CRn = 0, CRm = 4 */
 	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
-	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
 
 	/* Op1 = 0, CRn = 0, CRm = 5 */
 	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
-	ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+	ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz),
 
 	/* Op1 = 0, CRn = 0, CRm = 6 */
 	ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
-	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz),
 
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -334,7 +326,7 @@
 	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
 
 	/* Op1 = 3, CRn = 14, CRm = 0 */
-	ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+	ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_single32),
 };
 
 static int search_cmp_ftr_reg(const void *id, const void *regp)
@@ -410,25 +402,43 @@
 /*
  * Initialise the CPU feature register from Boot CPU values.
  * Also initiliases the strict_mask for the register.
+ * Any bits that are not covered by an arm64_ftr_bits entry are considered
+ * RES0 for the system-wide value, and must strictly match.
  */
 static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
 {
 	u64 val = 0;
 	u64 strict_mask = ~0x0ULL;
+	u64 user_mask = 0;
+	u64 valid_mask = 0;
+
 	const struct arm64_ftr_bits *ftrp;
 	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
 
 	BUG_ON(!reg);
 
 	for (ftrp  = reg->ftr_bits; ftrp->width; ftrp++) {
+		u64 ftr_mask = arm64_ftr_mask(ftrp);
 		s64 ftr_new = arm64_ftr_value(ftrp, new);
 
 		val = arm64_ftr_set_value(ftrp, val, ftr_new);
+
+		valid_mask |= ftr_mask;
 		if (!ftrp->strict)
-			strict_mask &= ~arm64_ftr_mask(ftrp);
+			strict_mask &= ~ftr_mask;
+		if (ftrp->visible)
+			user_mask |= ftr_mask;
+		else
+			reg->user_val = arm64_ftr_set_value(ftrp,
+							    reg->user_val,
+							    ftrp->safe_val);
 	}
+
+	val &= valid_mask;
+
 	reg->sys_val = val;
 	reg->strict_mask = strict_mask;
+	reg->user_mask = user_mask;
 }
 
 void __init init_cpu_features(struct cpuinfo_arm64 *info)
@@ -635,6 +645,9 @@
 	return regp->sys_val;
 }
 
+#define read_sysreg_case(r)	\
+	case r:		return read_sysreg_s(r)
+
 /*
  * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
  * Read the system register on the current CPU
@@ -642,36 +655,37 @@
 static u64 __raw_read_system_reg(u32 sys_id)
 {
 	switch (sys_id) {
-	case SYS_ID_PFR0_EL1:		return read_cpuid(ID_PFR0_EL1);
-	case SYS_ID_PFR1_EL1:		return read_cpuid(ID_PFR1_EL1);
-	case SYS_ID_DFR0_EL1:		return read_cpuid(ID_DFR0_EL1);
-	case SYS_ID_MMFR0_EL1:		return read_cpuid(ID_MMFR0_EL1);
-	case SYS_ID_MMFR1_EL1:		return read_cpuid(ID_MMFR1_EL1);
-	case SYS_ID_MMFR2_EL1:		return read_cpuid(ID_MMFR2_EL1);
-	case SYS_ID_MMFR3_EL1:		return read_cpuid(ID_MMFR3_EL1);
-	case SYS_ID_ISAR0_EL1:		return read_cpuid(ID_ISAR0_EL1);
-	case SYS_ID_ISAR1_EL1:		return read_cpuid(ID_ISAR1_EL1);
-	case SYS_ID_ISAR2_EL1:		return read_cpuid(ID_ISAR2_EL1);
-	case SYS_ID_ISAR3_EL1:		return read_cpuid(ID_ISAR3_EL1);
-	case SYS_ID_ISAR4_EL1:		return read_cpuid(ID_ISAR4_EL1);
-	case SYS_ID_ISAR5_EL1:		return read_cpuid(ID_ISAR4_EL1);
-	case SYS_MVFR0_EL1:		return read_cpuid(MVFR0_EL1);
-	case SYS_MVFR1_EL1:		return read_cpuid(MVFR1_EL1);
-	case SYS_MVFR2_EL1:		return read_cpuid(MVFR2_EL1);
+	read_sysreg_case(SYS_ID_PFR0_EL1);
+	read_sysreg_case(SYS_ID_PFR1_EL1);
+	read_sysreg_case(SYS_ID_DFR0_EL1);
+	read_sysreg_case(SYS_ID_MMFR0_EL1);
+	read_sysreg_case(SYS_ID_MMFR1_EL1);
+	read_sysreg_case(SYS_ID_MMFR2_EL1);
+	read_sysreg_case(SYS_ID_MMFR3_EL1);
+	read_sysreg_case(SYS_ID_ISAR0_EL1);
+	read_sysreg_case(SYS_ID_ISAR1_EL1);
+	read_sysreg_case(SYS_ID_ISAR2_EL1);
+	read_sysreg_case(SYS_ID_ISAR3_EL1);
+	read_sysreg_case(SYS_ID_ISAR4_EL1);
+	read_sysreg_case(SYS_ID_ISAR5_EL1);
+	read_sysreg_case(SYS_MVFR0_EL1);
+	read_sysreg_case(SYS_MVFR1_EL1);
+	read_sysreg_case(SYS_MVFR2_EL1);
 
-	case SYS_ID_AA64PFR0_EL1:	return read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64PFR1_EL1:	return read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64DFR0_EL1:	return read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64DFR1_EL1:	return read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64MMFR0_EL1:	return read_cpuid(ID_AA64MMFR0_EL1);
-	case SYS_ID_AA64MMFR1_EL1:	return read_cpuid(ID_AA64MMFR1_EL1);
-	case SYS_ID_AA64MMFR2_EL1:	return read_cpuid(ID_AA64MMFR2_EL1);
-	case SYS_ID_AA64ISAR0_EL1:	return read_cpuid(ID_AA64ISAR0_EL1);
-	case SYS_ID_AA64ISAR1_EL1:	return read_cpuid(ID_AA64ISAR1_EL1);
+	read_sysreg_case(SYS_ID_AA64PFR0_EL1);
+	read_sysreg_case(SYS_ID_AA64PFR1_EL1);
+	read_sysreg_case(SYS_ID_AA64DFR0_EL1);
+	read_sysreg_case(SYS_ID_AA64DFR1_EL1);
+	read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
+	read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
+	read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
+	read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
+	read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
 
-	case SYS_CNTFRQ_EL0:		return read_cpuid(CNTFRQ_EL0);
-	case SYS_CTR_EL0:		return read_cpuid(CTR_EL0);
-	case SYS_DCZID_EL0:		return read_cpuid(DCZID_EL0);
+	read_sysreg_case(SYS_CNTFRQ_EL0);
+	read_sysreg_case(SYS_CTR_EL0);
+	read_sysreg_case(SYS_DCZID_EL0);
+
 	default:
 		BUG();
 		return 0;
@@ -720,13 +734,11 @@
 static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
 {
 	u32 midr = read_cpuid_id();
-	u32 rv_min, rv_max;
 
 	/* Cavium ThunderX pass 1.x and 2.x */
-	rv_min = 0;
-	rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
-
-	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX,
+		MIDR_CPU_VAR_REV(0, 0),
+		MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
 }
 
 static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
@@ -737,7 +749,7 @@
 static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
 			   int __unused)
 {
-	phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start);
+	phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
 
 	/*
 	 * Activate the lower HYP offset only if:
@@ -806,7 +818,10 @@
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
 		.min_field_value = 1,
-		.enable = cpu_enable_uao,
+		/*
+		 * We rely on stop_machine() calling uao_thread_switch() to set
+		 * UAO immediately after patching.
+		 */
 	},
 #endif /* CONFIG_ARM64_UAO */
 #ifdef CONFIG_ARM64_PAN
@@ -868,6 +883,7 @@
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -933,6 +949,8 @@
 
 static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
 {
+	/* We support emulation of accesses to CPU ID feature registers */
+	elf_hwcap |= HWCAP_CPUID;
 	for (; hwcaps->matches; hwcaps++)
 		if (hwcaps->matches(hwcaps, hwcaps->def_scope))
 			cap_set_elf_hwcap(hwcaps);
@@ -1120,3 +1138,101 @@
 {
 	return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
 }
+
+/*
+ * We emulate only the following system register space.
+ * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7]
+ * See Table C5-6 System instruction encodings for System register accesses,
+ * ARMv8 ARM(ARM DDI 0487A.f) for more details.
+ */
+static inline bool __attribute_const__ is_emulated(u32 id)
+{
+	return (sys_reg_Op0(id) == 0x3 &&
+		sys_reg_CRn(id) == 0x0 &&
+		sys_reg_Op1(id) == 0x0 &&
+		(sys_reg_CRm(id) == 0 ||
+		 ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7))));
+}
+
+/*
+ * With CRm == 0, reg should be one of :
+ * MIDR_EL1, MPIDR_EL1 or REVIDR_EL1.
+ */
+static inline int emulate_id_reg(u32 id, u64 *valp)
+{
+	switch (id) {
+	case SYS_MIDR_EL1:
+		*valp = read_cpuid_id();
+		break;
+	case SYS_MPIDR_EL1:
+		*valp = SYS_MPIDR_SAFE_VAL;
+		break;
+	case SYS_REVIDR_EL1:
+		/* IMPLEMENTATION DEFINED values are emulated with 0 */
+		*valp = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int emulate_sys_reg(u32 id, u64 *valp)
+{
+	struct arm64_ftr_reg *regp;
+
+	if (!is_emulated(id))
+		return -EINVAL;
+
+	if (sys_reg_CRm(id) == 0)
+		return emulate_id_reg(id, valp);
+
+	regp = get_arm64_ftr_reg(id);
+	if (regp)
+		*valp = arm64_ftr_reg_user_value(regp);
+	else
+		/*
+		 * The untracked registers are either IMPLEMENTATION DEFINED
+		 * (e.g, ID_AFR0_EL1) or reserved RAZ.
+		 */
+		*valp = 0;
+	return 0;
+}
+
+static int emulate_mrs(struct pt_regs *regs, u32 insn)
+{
+	int rc;
+	u32 sys_reg, dst;
+	u64 val;
+
+	/*
+	 * sys_reg values are defined as used in mrs/msr instruction.
+	 * shift the imm value to get the encoding.
+	 */
+	sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5;
+	rc = emulate_sys_reg(sys_reg, &val);
+	if (!rc) {
+		dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+		pt_regs_write_reg(regs, dst, val);
+		regs->pc += 4;
+	}
+
+	return rc;
+}
+
+static struct undef_hook mrs_hook = {
+	.instr_mask = 0xfff00000,
+	.instr_val  = 0xd5300000,
+	.pstate_mask = COMPAT_PSR_MODE_MASK,
+	.pstate_val = PSR_MODE_EL0t,
+	.fn = emulate_mrs,
+};
+
+static int __init enable_mrs_emulation(void)
+{
+	register_undef_hook(&mrs_hook);
+	return 0;
+}
+
+late_initcall(enable_mrs_emulation);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 7b7be71..5b22c68 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -63,6 +63,8 @@
 	"atomics",
 	"fphp",
 	"asimdhp",
+	"cpuid",
+	"asimdrdm",
 	NULL
 };
 
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index e88c064..4e6ad35 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -46,8 +46,7 @@
 	 *                         efi_system_table_t *sys_table,
 	 *                         unsigned long *image_addr) ;
 	 */
-	adrp	x8, _text
-	add	x8, x8, #:lo12:_text
+	adr_l	x8, _text
 	add	x2, sp, 16
 	str	x8, [x2]
 	bl	efi_entry
@@ -68,10 +67,8 @@
 	/*
 	 * Calculate size of the kernel Image (same for original and copy).
 	 */
-	adrp	x1, _text
-	add	x1, x1, #:lo12:_text
-	adrp	x2, _edata
-	add	x2, x2, #:lo12:_edata
+	adr_l	x1, _text
+	adr_l	x2, _edata
 	sub	x1, x2, x1
 
 	/*
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index aef02d2..e1be42e 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
 
@@ -98,8 +99,7 @@
 ENTRY(_mcount)
 	mcount_enter
 
-	adrp	x0, ftrace_trace_function
-	ldr	x2, [x0, #:lo12:ftrace_trace_function]
+	ldr_l	x2, ftrace_trace_function
 	adr	x0, ftrace_stub
 	cmp	x0, x2			// if (ftrace_trace_function
 	b.eq	skip_ftrace_call	//     != ftrace_stub) {
@@ -115,15 +115,12 @@
 	mcount_exit			//   return;
 					// }
 skip_ftrace_call:
-	adrp	x1, ftrace_graph_return
-	ldr	x2, [x1, #:lo12:ftrace_graph_return]
+	ldr_l	x2, ftrace_graph_return
 	cmp	x0, x2			//   if ((ftrace_graph_return
 	b.ne	ftrace_graph_caller	//        != ftrace_stub)
 
-	adrp	x1, ftrace_graph_entry	//     || (ftrace_graph_entry
-	adrp	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
-	ldr	x2, [x1, #:lo12:ftrace_graph_entry]
-	add	x0, x0, #:lo12:ftrace_graph_entry_stub
+	ldr_l	x2, ftrace_graph_entry	//     || (ftrace_graph_entry
+	adr_l	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
 	cmp	x0, x2
 	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4b1abac..4fb6ccd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -149,7 +149,7 @@
 	.quad	0				// SizeOfHeapReserve
 	.quad	0				// SizeOfHeapCommit
 	.long	0				// LoaderFlags
-	.long	0x6				// NumberOfRvaAndSizes
+	.long	(section_table - .) / 8		// NumberOfRvaAndSizes
 
 	.quad	0				// ExportTable
 	.quad	0				// ImportTable
@@ -158,6 +158,11 @@
 	.quad	0				// CertificationTable
 	.quad	0				// BaseRelocationTable
 
+#ifdef CONFIG_DEBUG_EFI
+	.long	efi_debug_table - _head		// DebugTable
+	.long	efi_debug_table_size
+#endif
+
 	// Section table
 section_table:
 
@@ -195,6 +200,46 @@
 	.short	0		// NumberOfLineNumbers  (0 for executables)
 	.long	0xe0500020	// Characteristics (section flags)
 
+#ifdef CONFIG_DEBUG_EFI
+	/*
+	 * The debug table is referenced via its Relative Virtual Address (RVA),
+	 * which is only defined for those parts of the image that are covered
+	 * by a section declaration. Since this header is not covered by any
+	 * section, the debug table must be emitted elsewhere. So stick it in
+	 * the .init.rodata section instead.
+	 *
+	 * Note that the EFI debug entry itself may legally have a zero RVA,
+	 * which means we can simply put it right after the section headers.
+	 */
+	__INITRODATA
+
+	.align	2
+efi_debug_table:
+	// EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
+	.long	0			// Characteristics
+	.long	0			// TimeDateStamp
+	.short	0			// MajorVersion
+	.short	0			// MinorVersion
+	.long	2			// Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW
+	.long	efi_debug_entry_size	// SizeOfData
+	.long	0			// RVA
+	.long	efi_debug_entry - _head	// FileOffset
+
+	.set	efi_debug_table_size, . - efi_debug_table
+	.previous
+
+efi_debug_entry:
+	// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
+	.ascii	"NB10"			// Signature
+	.long	0			// Unknown
+	.long	0			// Unknown2
+	.long	0			// Unknown3
+
+	.asciz	VMLINUX_PATH
+
+	.set	efi_debug_entry_size, . - efi_debug_entry
+#endif
+
 	/*
 	 * EFI will load .text onwards at the 4k section alignment
 	 * described in the PE/COFF header. To ensure that instruction
@@ -483,7 +528,7 @@
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
  *
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
  * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
@@ -592,15 +637,26 @@
 #endif
 
 	/* EL2 debug */
-	mrs	x0, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
-	sbfx	x0, x0, #8, #4
+	mrs	x1, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	x0, x1, #8, #4
 	cmp	x0, #1
 	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
 	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
 4:
-	csel	x0, xzr, x0, lt			// all PMU counters from EL1
-	msr	mdcr_el2, x0			// (if they exist)
+	csel	x3, xzr, x0, lt			// all PMU counters from EL1
+
+	/* Statistical profiling */
+	ubfx	x0, x1, #32, #4			// Check ID_AA64DFR0_EL1 PMSVer
+	cbz	x0, 6f				// Skip if SPE not present
+	cbnz	x2, 5f				// VHE?
+	mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+	orr	x3, x3, x1			// If we don't have VHE, then
+	b	6f				// use EL1&0 translation.
+5:						// For VHE, use EL2 translation
+	orr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
+6:
+	msr	mdcr_el2, x3			// Configure debug traps
 
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
@@ -613,8 +669,7 @@
 
 install_el2_stub:
 	/* Hypervisor stub */
-	adrp	x0, __hyp_stub_vectors
-	add	x0, x0, #:lo12:__hyp_stub_vectors
+	adr_l	x0, __hyp_stub_vectors
 	msr	vbar_el2, x0
 
 	/* spsr */
@@ -628,7 +683,7 @@
 
 /*
  * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
- * in x20. See arch/arm64/include/asm/virt.h for more info.
+ * in w0. See arch/arm64/include/asm/virt.h for more info.
  */
 set_cpu_boot_mode_flag:
 	adr_l	x1, __boot_cpu_mode
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index fe301cb..97a7384 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -50,9 +50,6 @@
  */
 extern int in_suspend;
 
-/* Find a symbols alias in the linear map */
-#define LMADDR(x)	phys_to_virt(virt_to_phys(x))
-
 /* Do we need to reset el2? */
 #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
 
@@ -102,8 +99,8 @@
 
 int pfn_is_nosave(unsigned long pfn)
 {
-	unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
-	unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+	unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
+	unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);
 
 	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
 }
@@ -125,12 +122,12 @@
 		return -EOVERFLOW;
 
 	arch_hdr_invariants(&hdr->invariants);
-	hdr->ttbr1_el1		= virt_to_phys(swapper_pg_dir);
+	hdr->ttbr1_el1		= __pa_symbol(swapper_pg_dir);
 	hdr->reenter_kernel	= _cpu_resume;
 
 	/* We can't use __hyp_get_vectors() because kvm may still be loaded */
 	if (el2_reset_needed())
-		hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+		hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors);
 	else
 		hdr->__hyp_stub_vectors = 0;
 
@@ -460,7 +457,6 @@
 	void *zero_page;
 	size_t exit_size;
 	pgd_t *tmp_pg_dir;
-	void *lm_restore_pblist;
 	phys_addr_t phys_hibernate_exit;
 	void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
 					  void *, phys_addr_t, phys_addr_t);
@@ -472,7 +468,7 @@
 	 */
 	tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
 	if (!tmp_pg_dir) {
-		pr_err("Failed to allocate memory for temporary page tables.");
+		pr_err("Failed to allocate memory for temporary page tables.\n");
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -481,18 +477,12 @@
 		goto out;
 
 	/*
-	 * Since we only copied the linear map, we need to find restore_pblist's
-	 * linear map address.
-	 */
-	lm_restore_pblist = LMADDR(restore_pblist);
-
-	/*
 	 * We need a zero page that is zero before & after resume in order to
 	 * to break before make on the ttbr1 page tables.
 	 */
 	zero_page = (void *)get_safe_page(GFP_ATOMIC);
 	if (!zero_page) {
-		pr_err("Failed to allocate zero page.");
+		pr_err("Failed to allocate zero page.\n");
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -512,7 +502,7 @@
 				   &phys_hibernate_exit,
 				   (void *)get_safe_page, GFP_ATOMIC);
 	if (rc) {
-		pr_err("Failed to create safe executable page for hibernate_exit code.");
+		pr_err("Failed to create safe executable page for hibernate_exit code.\n");
 		goto out;
 	}
 
@@ -537,7 +527,7 @@
 	}
 
 	hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
-		       resume_hdr.reenter_kernel, lm_restore_pblist,
+		       resume_hdr.reenter_kernel, restore_pblist,
 		       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
 
 out:
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 67f9cb9..b6badff 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -96,7 +96,7 @@
 	if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
 		page = vmalloc_to_page(addr);
 	else if (!module)
-		page = pfn_to_page(PHYS_PFN(__pa(addr)));
+		page = phys_to_page(__pa_symbol(addr));
 	else
 		return addr;
 
@@ -417,6 +417,35 @@
 	return insn;
 }
 
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+					u32 insn)
+{
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_REGTYPE_RT:
+	case AARCH64_INSN_REGTYPE_RD:
+		shift = 0;
+		break;
+	case AARCH64_INSN_REGTYPE_RN:
+		shift = 5;
+		break;
+	case AARCH64_INSN_REGTYPE_RT2:
+	case AARCH64_INSN_REGTYPE_RA:
+		shift = 10;
+		break;
+	case AARCH64_INSN_REGTYPE_RM:
+		shift = 16;
+		break;
+	default:
+		pr_err("%s: unknown register type encoding %d\n", __func__,
+		       type);
+		return 0;
+	}
+
+	return (insn >> shift) & GENMASK(4, 0);
+}
+
 static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
 					u32 insn,
 					enum aarch64_insn_register reg)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 357d3ef..be05868 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -17,6 +17,8 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 
+#include <asm/ptrace.h>
+
 #include "simulate-insn.h"
 
 #define bbl_displacement(insn)		\
@@ -36,30 +38,22 @@
 
 static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
 {
-	if (reg < 31)
-		regs->regs[reg] = val;
+	pt_regs_write_reg(regs, reg, val);
 }
 
 static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val)
 {
-	if (reg < 31)
-		regs->regs[reg] = lower_32_bits(val);
+	pt_regs_write_reg(regs, reg, lower_32_bits(val));
 }
 
 static inline u64 get_x_reg(struct pt_regs *regs, int reg)
 {
-	if (reg < 31)
-		return regs->regs[reg];
-	else
-		return 0;
+	return pt_regs_read_reg(regs, reg);
 }
 
 static inline u32 get_w_reg(struct pt_regs *regs, int reg)
 {
-	if (reg < 31)
-		return lower_32_bits(regs->regs[reg]);
-	else
-		return 0;
+	return lower_32_bits(pt_regs_read_reg(regs, reg));
 }
 
 static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a3a2816..1ad48f9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -339,7 +339,7 @@
 /*
  * Thread switching.
  */
-struct task_struct *__switch_to(struct task_struct *prev,
+__notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 				struct task_struct *next)
 {
 	struct task_struct *last;
@@ -407,7 +407,7 @@
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
 	if (is_compat_task())
-		return randomize_page(mm->brk, 0x02000000);
+		return randomize_page(mm->brk, SZ_32M);
 	else
-		return randomize_page(mm->brk, 0x40000000);
+		return randomize_page(mm->brk, SZ_1G);
 }
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 42816be..e8edbf1 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,6 +20,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/psci.h>
+#include <linux/mm.h>
 
 #include <uapi/linux/psci.h>
 
@@ -45,7 +46,7 @@
 
 static int cpu_psci_cpu_boot(unsigned int cpu)
 {
-	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry));
 	if (err)
 		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b051367..952e2c0 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/of_fdt.h>
 #include <linux/efi.h>
 #include <linux/psci.h>
+#include <linux/mm.h>
 
 #include <asm/acpi.h>
 #include <asm/fixmap.h>
@@ -199,16 +200,16 @@
 	struct memblock_region *region;
 	struct resource *res;
 
-	kernel_code.start   = virt_to_phys(_text);
-	kernel_code.end     = virt_to_phys(__init_begin - 1);
-	kernel_data.start   = virt_to_phys(_sdata);
-	kernel_data.end     = virt_to_phys(_end - 1);
+	kernel_code.start   = __pa_symbol(_text);
+	kernel_code.end     = __pa_symbol(__init_begin - 1);
+	kernel_data.start   = __pa_symbol(_sdata);
+	kernel_data.end     = __pa_symbol(_end - 1);
 
 	for_each_memblock(memory, region) {
 		res = alloc_bootmem_low(sizeof(*res));
 		if (memblock_is_nomap(region)) {
 			res->name  = "reserved";
-			res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			res->flags = IORESOURCE_MEM;
 		} else {
 			res->name  = "System RAM";
 			res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
@@ -297,7 +298,7 @@
 	 * faults in case uaccess_enable() is inadvertently called by the init
 	 * thread.
 	 */
-	init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+	init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
 #endif
 
 #ifdef CONFIG_VT
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index ae0496f..6252234 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -12,6 +12,7 @@
  *
  */
 #include <linux/linkage.h>
+#include <linux/arm-smccc.h>
 #include <asm/asm-offsets.h>
 
 	.macro SMCCC instr
@@ -20,24 +21,32 @@
 	ldr	x4, [sp]
 	stp	x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
 	stp	x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
-	ret
+	ldr	x4, [sp, #8]
+	cbz	x4, 1f /* no quirk structure */
+	ldr	x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
+	cmp	x9, #ARM_SMCCC_QUIRK_QCOM_A6
+	b.ne	1f
+	str	x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+1:	ret
 	.cfi_endproc
 	.endm
 
 /*
  * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
  *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(arm_smccc_smc)
+ENTRY(__arm_smccc_smc)
 	SMCCC	smc
-ENDPROC(arm_smccc_smc)
+ENDPROC(__arm_smccc_smc)
 
 /*
  * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
  *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(arm_smccc_hvc)
+ENTRY(__arm_smccc_hvc)
 	SMCCC	hvc
-ENDPROC(arm_smccc_hvc)
+ENDPROC(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index cb87234..a8ec5da 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -603,9 +603,9 @@
  */
 static void __init of_parse_and_init_cpus(void)
 {
-	struct device_node *dn = NULL;
+	struct device_node *dn;
 
-	while ((dn = of_find_node_by_type(dn, "cpu"))) {
+	for_each_node_by_type(dn, "cpu") {
 		u64 hwid = of_get_cpu_mpidr(dn);
 
 		if (hwid == INVALID_HWID)
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 9a00eee..9303465 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/smp.h>
 #include <linux/types.h>
+#include <linux/mm.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
@@ -98,7 +99,7 @@
 	 * boot-loader's endianess before jumping. This is mandated by
 	 * the boot protocol.
 	 */
-	writeq_relaxed(__pa(secondary_holding_pen), release_addr);
+	writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
 	__flush_dcache_area((__force void *)release_addr,
 			    sizeof(*release_addr));
 
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 655e65f..565dd69 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -41,7 +41,6 @@
 	per_cpu(cpu_scale, cpu) = capacity;
 }
 
-#ifdef CONFIG_PROC_SYSCTL
 static ssize_t cpu_capacity_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
@@ -98,7 +97,6 @@
 	return 0;
 }
 subsys_initcall(register_cpu_capacity_sysctl);
-#endif
 
 static u32 capacity_scale;
 static u32 *raw_capacity;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 659b2e6..7d47c2c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -466,7 +466,7 @@
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int ret = 0;
 
-	address = (rt == 31) ? 0 : regs->regs[rt];
+	address = pt_regs_read_reg(regs, rt);
 
 	switch (crm) {
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
@@ -495,8 +495,10 @@
 static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
 {
 	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+	unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
 
-	regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val;
+	pt_regs_write_reg(regs, rt, val);
+
 	regs->pc += 4;
 }
 
@@ -531,7 +533,12 @@
 			return;
 		}
 
-	force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+	/*
+	 * New SYS instructions may previously have been undefined at EL0. Fall
+	 * back to our usual undefined instruction handler so that we handle
+	 * these consistently.
+	 */
+	do_undefinstr(regs);
 }
 
 long compat_arm_syscall(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a2c2478..41b6e31 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -123,6 +123,7 @@
 {
 	int i;
 	struct page **vdso_pagelist;
+	unsigned long pfn;
 
 	if (memcmp(&vdso_start, "\177ELF", 4)) {
 		pr_err("vDSO is not a valid ELF object!\n");
@@ -140,11 +141,14 @@
 		return -ENOMEM;
 
 	/* Grab the vDSO data page. */
-	vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
+	vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
 
 	/* Grab the vDSO code pages. */
+	pfn = sym_to_pfn(&vdso_start);
+
 	for (i = 0; i < vdso_pages; i++)
-		vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
+		vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
 
 	vdso_spec[0].pages = &vdso_pagelist[0];
 	vdso_spec[1].pages = &vdso_pagelist[1];
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 47e5f0f..dbadfaf 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -95,6 +95,7 @@
  *  - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
  *  - Debug ROM Address (MDCR_EL2_TDRA)
  *  - OS related registers (MDCR_EL2_TDOSA)
+ *  - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
  *
  * Additionally, KVM only traps guest accesses to the debug registers if
  * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
@@ -110,8 +111,13 @@
 
 	trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
 
+	/*
+	 * This also clears MDCR_EL2_E2PB_MASK to disable guest access
+	 * to the profiling buffer.
+	 */
 	vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
 	vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
+				MDCR_EL2_TPMS |
 				MDCR_EL2_TPMCR |
 				MDCR_EL2_TDRA |
 				MDCR_EL2_TDOSA);
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 4ba5c90..f5154ed 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -65,6 +65,66 @@
 	default:	write_debug(ptr[0], reg, 0);			\
 	}
 
+#define PMSCR_EL1		sys_reg(3, 0, 9, 9, 0)
+
+#define PMBLIMITR_EL1		sys_reg(3, 0, 9, 10, 0)
+#define PMBLIMITR_EL1_E		BIT(0)
+
+#define PMBIDR_EL1		sys_reg(3, 0, 9, 10, 7)
+#define PMBIDR_EL1_P		BIT(4)
+
+#define psb_csync()		asm volatile("hint #17")
+
+static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
+{
+	/* The vcpu can run. but it can't hide. */
+}
+
+static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
+{
+	u64 reg;
+
+	/* SPE present on this CPU? */
+	if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+						  ID_AA64DFR0_PMSVER_SHIFT))
+		return;
+
+	/* Yes; is it owned by EL3? */
+	reg = read_sysreg_s(PMBIDR_EL1);
+	if (reg & PMBIDR_EL1_P)
+		return;
+
+	/* No; is the host actually using the thing? */
+	reg = read_sysreg_s(PMBLIMITR_EL1);
+	if (!(reg & PMBLIMITR_EL1_E))
+		return;
+
+	/* Yes; save the control register and disable data generation */
+	*pmscr_el1 = read_sysreg_s(PMSCR_EL1);
+	write_sysreg_s(0, PMSCR_EL1);
+	isb();
+
+	/* Now drain all buffered data to memory */
+	psb_csync();
+	dsb(nsh);
+}
+
+static hyp_alternate_select(__debug_save_spe,
+			    __debug_save_spe_nvhe, __debug_save_spe_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+{
+	if (!pmscr_el1)
+		return;
+
+	/* The host page table is installed, but not yet synchronised */
+	isb();
+
+	/* Re-enable data generation */
+	write_sysreg_s(pmscr_el1, PMSCR_EL1);
+}
+
 void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
 				   struct kvm_guest_debug_arch *dbg,
 				   struct kvm_cpu_context *ctxt)
@@ -118,13 +178,15 @@
 	    (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 
-	__debug_save_state(vcpu, &vcpu->arch.host_debug_state,
+	__debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
 			   kern_hyp_va(vcpu->arch.host_cpu_context));
+	__debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
 }
 
 void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
 {
-	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
+	__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
 			      kern_hyp_va(vcpu->arch.host_cpu_context));
 
 	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 75e83dd..aede165 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -103,7 +103,13 @@
 static void __hyp_text __deactivate_traps_vhe(void)
 {
 	extern char vectors[];	/* kernel exception vectors */
+	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+	mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+		    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+		    MDCR_EL2_TPMS;
+
+	write_sysreg(mdcr_el2, mdcr_el2);
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 	write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
@@ -111,6 +117,12 @@
 
 static void __hyp_text __deactivate_traps_nvhe(void)
 {
+	u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+	mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+	write_sysreg(mdcr_el2, mdcr_el2);
 	write_sysreg(HCR_RW, hcr_el2);
 	write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
@@ -132,7 +144,6 @@
 
 	__deactivate_traps_arch()();
 	write_sysreg(0, hstr_el2);
-	write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
 	write_sysreg(0, pmuserenr_el0);
 }
 
@@ -357,6 +368,10 @@
 	}
 
 	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+	/*
+	 * This must come after restoring the host sysregs, since a non-VHE
+	 * system may enable SPE here and make use of the TTBRs.
+	 */
 	__debug_cond_restore_host_state(vcpu);
 
 	return exit_code;
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 88e2f2b..e8e7ba2 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
  */
 
 #include <asm/kvm_hyp.h>
+#include <asm/tlbflush.h>
 
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
@@ -32,7 +33,7 @@
 	 * whole of Stage-1. Weep...
 	 */
 	ipa >>= 12;
-	asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+	__tlbi(ipas2e1is, ipa);
 
 	/*
 	 * We have to ensure completion of the invalidation at Stage-2,
@@ -41,7 +42,7 @@
 	 * the Stage-1 invalidation happened first.
 	 */
 	dsb(ish);
-	asm volatile("tlbi vmalle1is" : : );
+	__tlbi(vmalle1is);
 	dsb(ish);
 	isb();
 
@@ -57,7 +58,7 @@
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
 	isb();
 
-	asm volatile("tlbi vmalls12e1is" : : );
+	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
@@ -72,7 +73,7 @@
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
 	isb();
 
-	asm volatile("tlbi vmalle1" : : );
+	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
@@ -82,7 +83,7 @@
 void __hyp_text __kvm_flush_vm_context(void)
 {
 	dsb(ishst);
-	asm volatile("tlbi alle1is	\n"
-		     "ic ialluis	  ": : );
+	__tlbi(alle1is);
+	asm volatile("ic ialluis" : : );
 	dsb(ish);
 }
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index e703fb9..9b0ba19 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -6,6 +6,8 @@
 obj-$(CONFIG_ARM64_PTDUMP_CORE)	+= dump.o
 obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
 obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
+KASAN_SANITIZE_physaddr.o	+= n
 
 obj-$(CONFIG_KASAN)		+= kasan_init.o
 KASAN_SANITIZE_kasan_init.o	:= n
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 4c63cb1..68634c6 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -79,6 +79,13 @@
 	}
 }
 
+static void set_reserved_asid_bits(void)
+{
+	if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
+	    cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
+		__set_bit(FALKOR_RESERVED_ASID, asid_map);
+}
+
 static void flush_context(unsigned int cpu)
 {
 	int i;
@@ -87,6 +94,8 @@
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 
+	set_reserved_asid_bits();
+
 	/*
 	 * Ensure the generation bump is observed before we xchg the
 	 * active_asids.
@@ -244,6 +253,8 @@
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
+	set_reserved_asid_bits();
+
 	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
 	return 0;
 }
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4a14b25..351f759 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -211,7 +211,8 @@
 	dma_addr_t dev_addr;
 
 	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
-	if (!is_device_dma_coherent(dev))
+	if (!is_device_dma_coherent(dev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
 
 	return dev_addr;
@@ -222,7 +223,8 @@
 				 size_t size, enum dma_data_direction dir,
 				 unsigned long attrs)
 {
-	if (!is_device_dma_coherent(dev))
+	if (!is_device_dma_coherent(dev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
 	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
 }
@@ -235,7 +237,8 @@
 	int i, ret;
 
 	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
-	if (!is_device_dma_coherent(dev))
+	if (!is_device_dma_coherent(dev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		for_each_sg(sgl, sg, ret, i)
 			__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
 				       sg->length, dir);
@@ -251,7 +254,8 @@
 	struct scatterlist *sg;
 	int i;
 
-	if (!is_device_dma_coherent(dev))
+	if (!is_device_dma_coherent(dev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		for_each_sg(sgl, sg, nelems, i)
 			__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
 					 sg->length, dir);
@@ -352,6 +356,13 @@
 	return 1;
 }
 
+static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
+{
+	if (swiotlb)
+		return swiotlb_dma_mapping_error(hwdev, addr);
+	return 0;
+}
+
 static struct dma_map_ops swiotlb_dma_ops = {
 	.alloc = __dma_alloc,
 	.free = __dma_free,
@@ -366,7 +377,7 @@
 	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
 	.dma_supported = __swiotlb_dma_supported,
-	.mapping_error = swiotlb_dma_mapping_error,
+	.mapping_error = __swiotlb_dma_mapping_error,
 };
 
 static int __init atomic_pool_init(void)
@@ -830,14 +841,21 @@
 	 * then the IOMMU core will have already configured a group for this
 	 * device, and allocated the default domain for that group.
 	 */
-	if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
-		pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
-			dev_name(dev));
-		return false;
+	if (!domain)
+		goto out_err;
+
+	if (domain->type == IOMMU_DOMAIN_DMA) {
+		if (iommu_dma_init_domain(domain, dma_base, size, dev))
+			goto out_err;
+
+		dev->archdata.dma_ops = &iommu_dma_ops;
 	}
 
-	dev->archdata.dma_ops = &iommu_dma_ops;
 	return true;
+out_err:
+	pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+		 dev_name(dev));
+	return false;
 }
 
 static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 156169c..8128385 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -691,17 +691,3 @@
 	return 0;
 }
 #endif /* CONFIG_ARM64_PAN */
-
-#ifdef CONFIG_ARM64_UAO
-/*
- * Kernel threads have fs=KERNEL_DS by default, and don't need to call
- * set_fs(), devtmpfs in particular relies on this behaviour.
- * We need to enable the feature at runtime (instead of adding it to
- * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
- */
-int cpu_enable_uao(void *__unused)
-{
-	asm(SET_PSTATE_UAO(1));
-	return 0;
-}
-#endif /* CONFIG_ARM64_UAO */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 380ebe7..e19e065 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -209,8 +210,8 @@
 	 * linear mapping. Take care not to clip the kernel which may be
 	 * high in memory.
 	 */
-	memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
-			ULLONG_MAX);
+	memblock_remove(max_t(u64, memstart_addr + linear_region_size,
+			__pa_symbol(_end)), ULLONG_MAX);
 	if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
 		/* ensure that memstart_addr remains sufficiently aligned */
 		memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
@@ -225,7 +226,7 @@
 	 */
 	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
 		memblock_mem_limit_remove_map(memory_limit);
-		memblock_add(__pa(_text), (u64)(_end - _text));
+		memblock_add(__pa_symbol(_text), (u64)(_end - _text));
 	}
 
 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
@@ -278,7 +279,7 @@
 	 * Register the kernel text, kernel data, initrd, and initial
 	 * pagetables with memblock.
 	 */
-	memblock_reserve(__pa(_text), _end - _text);
+	memblock_reserve(__pa_symbol(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_start) {
 		memblock_reserve(initrd_start, initrd_end - initrd_start);
@@ -486,7 +487,8 @@
 
 void free_initmem(void)
 {
-	free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
+	free_reserved_area(lm_alias(__init_begin),
+			   lm_alias(__init_end),
 			   0, "unused kernel");
 	/*
 	 * Unmap the __init region but leave the VM area in place. This
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 01e88c8..c4c8cd4c 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -88,7 +88,7 @@
 	 * We could get an address outside vmalloc range in case
 	 * of ioremap_cache() reusing a RAM mapping.
 	 */
-	if (VMALLOC_START <= addr && addr < VMALLOC_END)
+	if (is_vmalloc_addr((void *)addr))
 		vunmap((void *)addr);
 }
 EXPORT_SYMBOL(__iounmap);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 757009d..201d918 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
+#include <linux/mm.h>
 
 #include <asm/mmu_context.h>
 #include <asm/kernel-pgtable.h>
@@ -26,6 +27,13 @@
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
 
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). All the early functions are called too
+ * early to use lm_alias so __p*d_populate functions must be used to populate
+ * with the physical address from __pa_symbol.
+ */
+
 static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 					unsigned long end)
 {
@@ -33,12 +41,12 @@
 	unsigned long next;
 
 	if (pmd_none(*pmd))
-		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+		__pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE);
 
 	pte = pte_offset_kimg(pmd, addr);
 	do {
 		next = addr + PAGE_SIZE;
-		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+		set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page),
 					PAGE_KERNEL));
 	} while (pte++, addr = next, addr != end && pte_none(*pte));
 }
@@ -51,7 +59,7 @@
 	unsigned long next;
 
 	if (pud_none(*pud))
-		pud_populate(&init_mm, pud, kasan_zero_pmd);
+		__pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE);
 
 	pmd = pmd_offset_kimg(pud, addr);
 	do {
@@ -68,7 +76,7 @@
 	unsigned long next;
 
 	if (pgd_none(*pgd))
-		pgd_populate(&init_mm, pgd, kasan_zero_pud);
+		__pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE);
 
 	pud = pud_offset_kimg(pgd, addr);
 	do {
@@ -148,7 +156,7 @@
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
 	dsb(ishst);
-	cpu_replace_ttbr1(tmp_pg_dir);
+	cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -199,10 +207,10 @@
 	 */
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		set_pte(&kasan_zero_pte[i],
-			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+			pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
 
 	memset(kasan_zero_page, 0, PAGE_SIZE);
-	cpu_replace_ttbr1(swapper_pg_dir);
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
 	/* At this point kasan is fully initialized. Enable error messages */
 	init_task.kasan_depth = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 17243e4..b805c01 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,6 +28,7 @@
 #include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/mm.h>
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -359,8 +360,8 @@
 
 static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-	unsigned long kernel_start = __pa(_text);
-	unsigned long kernel_end = __pa(__init_begin);
+	phys_addr_t kernel_start = __pa_symbol(_text);
+	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 
 	/*
 	 * Take care not to create a writable alias for the
@@ -427,14 +428,14 @@
 	unsigned long section_size;
 
 	section_size = (unsigned long)_etext - (unsigned long)_text;
-	create_mapping_late(__pa(_text), (unsigned long)_text,
+	create_mapping_late(__pa_symbol(_text), (unsigned long)_text,
 			    section_size, PAGE_KERNEL_ROX);
 	/*
 	 * mark .rodata as read only. Use __init_begin rather than __end_rodata
 	 * to cover NOTES and EXCEPTION_TABLE.
 	 */
 	section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
-	create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+	create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
 			    section_size, PAGE_KERNEL_RO);
 
 	/* flush the TLBs after updating live kernel mappings */
@@ -446,7 +447,7 @@
 static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 				      pgprot_t prot, struct vm_struct *vma)
 {
-	phys_addr_t pa_start = __pa(va_start);
+	phys_addr_t pa_start = __pa_symbol(va_start);
 	unsigned long size = va_end - va_start;
 
 	BUG_ON(!PAGE_ALIGNED(pa_start));
@@ -494,7 +495,7 @@
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
 		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
-			__pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+			__pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
 		pud_clear_fixmap();
 	} else {
 		BUG();
@@ -524,8 +525,8 @@
 	 * To do this we need to go via a temporary pgd.
 	 */
 	cpu_replace_ttbr1(__va(pgd_phys));
-	memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
-	cpu_replace_ttbr1(swapper_pg_dir);
+	memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
 	pgd_clear_fixmap();
 	memblock_free(pgd_phys, PAGE_SIZE);
@@ -534,7 +535,7 @@
 	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
 	 * allocated with it.
 	 */
-	memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
 		      SWAPPER_DIR_SIZE - PAGE_SIZE);
 }
 
@@ -645,6 +646,12 @@
 	return &bm_pte[pte_index(addr)];
 }
 
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). This function is called too early to use
+ * lm_alias so __p*d_populate functions must be used to populate with the
+ * physical address from __pa_symbol.
+ */
 void __init early_fixmap_init(void)
 {
 	pgd_t *pgd;
@@ -654,7 +661,7 @@
 
 	pgd = pgd_offset_k(addr);
 	if (CONFIG_PGTABLE_LEVELS > 3 &&
-	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
 		/*
 		 * We only end up here if the kernel mapping and the fixmap
 		 * share the top level pgd entry, which should only happen on
@@ -663,12 +670,14 @@
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
 		pud = pud_offset_kimg(pgd, addr);
 	} else {
-		pgd_populate(&init_mm, pgd, bm_pud);
+		if (pgd_none(*pgd))
+			__pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
 		pud = fixmap_pud(addr);
 	}
-	pud_populate(&init_mm, pud, bm_pmd);
+	if (pud_none(*pud))
+		__pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
 	pmd = fixmap_pmd(addr);
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	__pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
new file mode 100644
index 0000000..91371da
--- /dev/null
+++ b/arch/arm64/mm/physaddr.c
@@ -0,0 +1,30 @@
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/memory.h>
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+	WARN(!__is_lm_address(x),
+	     "virt_to_phys used for non-linear address: %pK (%pS)\n",
+	      (void *)x,
+	      (void *)x);
+
+	return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+	/*
+	 * This is bounds checking against the kernel image only.
+	 * __pa_symbol should only be used on kernel symbol addresses.
+	 */
+	VIRTUAL_BUG_ON(x < (unsigned long) KERNEL_START ||
+		       x > (unsigned long) KERNEL_END);
+	return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 32682be..cd4d53d 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,6 +138,7 @@
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
+	pre_ttbr0_update_workaround x0, x1, x2
 	mmid	x1, x1				// get mm->context.id
 	bfi	x0, x1, #48, #16		// set the ASID
 	msr	ttbr0_el1, x0			// set TTBR0
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f793820..874c123 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -46,6 +46,7 @@
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3752521..4a5bb96 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -333,7 +333,7 @@
 		return NULL;
 
 	map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
-			   node->mapping_offset);
+			   node->mapping_offset + index * sizeof(*map));
 
 	/* Firmware bug! */
 	if (!map->output_reference) {
@@ -348,10 +348,10 @@
 	if (!(IORT_TYPE_MASK(parent->type) & type_mask))
 		return NULL;
 
-	if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) {
+	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
 		if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
 		    node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-			*id_out = map[index].output_base;
+			*id_out = map->output_base;
 			return parent;
 		}
 	}
@@ -828,7 +828,7 @@
 
 	pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
 	if (!pdev)
-		return PTR_ERR(pdev);
+		return -ENOMEM;
 
 	count = ops->iommu_count_resources(node);
 
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index 6c60a50..493a56a 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -383,7 +383,7 @@
 	u32 *state = __this_cpu_read(psci_power_state);
 
 	return psci_ops.cpu_suspend(state[index - 1],
-				    virt_to_phys(cpu_resume));
+				    __pa_symbol(cpu_resume));
 }
 
 int psci_cpu_suspend_enter(unsigned long index)
@@ -419,7 +419,7 @@
 static int psci_system_suspend(unsigned long unused)
 {
 	return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND),
-			      virt_to_phys(cpu_resume), 0, 0);
+			      __pa_symbol(cpu_resume), 0, 0);
 }
 
 static int psci_system_suspend_enter(suspend_state_t state)
diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 4a0f5ea..1e2e519 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -91,6 +91,7 @@
 	dma_addr_t args_phys = 0;
 	void *args_virt = NULL;
 	size_t alloc_len;
+	struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
 
 	if (unlikely(arglen > N_REGISTER_ARGS)) {
 		alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
@@ -131,10 +132,16 @@
 					 qcom_smccc_convention,
 					 ARM_SMCCC_OWNER_SIP, fn_id);
 
+		quirk.state.a6 = 0;
+
 		do {
-			arm_smccc_smc(cmd, desc->arginfo, desc->args[0],
-				      desc->args[1], desc->args[2], x5, 0, 0,
-				      res);
+			arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+				      desc->args[1], desc->args[2], x5,
+				      quirk.state.a6, 0, res, &quirk);
+
+			if (res->a0 == QCOM_SCM_INTERRUPTED)
+				cmd = res->a0;
+
 		} while (res->a0 == QCOM_SCM_INTERRUPTED);
 
 		mutex_unlock(&qcom_scm_lock);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 84cf2f3..a88387b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -25,6 +25,7 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -2268,6 +2269,31 @@
 }
 
 /**
+ * of_find_last_cache_level - Find the level at which the last cache is
+ * 		present for the given logical cpu
+ *
+ * @cpu: cpu number(logical index) for which the last cache level is needed
+ *
+ * Returns the the level at which the last cache is present. It is exactly
+ * same as  the total number of cache levels for the given logical cpu.
+ */
+int of_find_last_cache_level(unsigned int cpu)
+{
+	u32 cache_level = 0;
+	struct device_node *prev = NULL, *np = of_cpu_device_node_get(cpu);
+
+	while (np) {
+		prev = np;
+		of_node_put(np);
+		np = of_find_next_cache_node(np);
+	}
+
+	of_property_read_u32(prev, "cache-level", &cache_level);
+
+	return cache_level;
+}
+
+/**
  * of_graph_parse_endpoint() - parse common endpoint node properties
  * @node: pointer to endpoint device_node
  * @endpoint: pointer to the OF endpoint data structure
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..9365190 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,15 @@
 	  Say y if you want to use CPU performance monitors on ARM-based
 	  systems.
 
+config QCOM_L2_PMU
+	bool "Qualcomm Technologies L2-cache PMU"
+	depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+	  help
+	  Provides support for the L2 cache performance monitor unit (PMU)
+	  in Qualcomm Technologies processors.
+	  Adds the L2 cache PMU into the perf events subsystem for
+	  monitoring L2 cache events.
+
 config XGENE_PMU
         depends on PERF_EVENTS && ARCH_XGENE
         bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b116e98..ef24833 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o
+obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
new file mode 100644
index 0000000..c259848
--- /dev/null
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -0,0 +1,1013 @@
+/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/local64.h>
+#include <asm/sysreg.h>
+
+#define MAX_L2_CTRS             9
+
+#define L2PMCR_NUM_EV_SHIFT     11
+#define L2PMCR_NUM_EV_MASK      0x1F
+
+#define L2PMCR                  0x400
+#define L2PMCNTENCLR            0x403
+#define L2PMCNTENSET            0x404
+#define L2PMINTENCLR            0x405
+#define L2PMINTENSET            0x406
+#define L2PMOVSCLR              0x407
+#define L2PMOVSSET              0x408
+#define L2PMCCNTCR              0x409
+#define L2PMCCNTR               0x40A
+#define L2PMCCNTSR              0x40C
+#define L2PMRESR                0x410
+#define IA_L2PMXEVCNTCR_BASE    0x420
+#define IA_L2PMXEVCNTR_BASE     0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE    0x424
+
+#define IA_L2_REG_OFFSET        0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL      0x000E0000
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004
+#define L2PMXEVFILTER_ORGFILTER_ALL     0x00000003
+
+#define L2EVTYPER_REG_SHIFT     3
+
+#define L2PMRESR_GROUP_BITS     8
+#define L2PMRESR_GROUP_MASK     GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT         31
+#define L2CYCLE_CTR_RAW_CODE    0xFE
+
+#define L2PMCR_RESET_ALL        0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN             BIT_ULL(63)
+
+#define L2_EVT_MASK             0x00000FFF
+#define L2_EVT_CODE_MASK        0x00000FF0
+#define L2_EVT_GRP_MASK         0x0000000F
+#define L2_EVT_CODE_SHIFT       4
+#define L2_EVT_GRP_SHIFT        0
+
+#define L2_EVT_CODE(event)   (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT)
+#define L2_EVT_GROUP(event)  (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT)
+
+#define L2_EVT_GROUP_MAX        7
+
+#define L2_COUNTER_RELOAD       BIT_ULL(31)
+#define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
+
+#define L2CPUSRSELR_EL1         sys_reg(3, 3, 15, 0, 6)
+#define L2CPUSRDR_EL1           sys_reg(3, 3, 15, 0, 7)
+
+#define reg_idx(reg, i)         (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+static void set_l2_indirect_reg(u64 reg, u64 val)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2_access_lock, flags);
+	write_sysreg_s(reg, L2CPUSRSELR_EL1);
+	isb();
+	write_sysreg_s(val, L2CPUSRDR_EL1);
+	isb();
+	raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+static u64 get_l2_indirect_reg(u64 reg)
+{
+	u64 val;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2_access_lock, flags);
+	write_sysreg_s(reg, L2CPUSRSELR_EL1);
+	isb();
+	val = read_sysreg_s(L2CPUSRDR_EL1);
+	raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+	return val;
+}
+
+struct cluster_pmu;
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+	struct hlist_node node;
+	u32 num_pmus;
+	struct pmu pmu;
+	int num_counters;
+	cpumask_t cpumask;
+	struct platform_device *pdev;
+	struct cluster_pmu * __percpu *pmu_cluster;
+	struct list_head clusters;
+};
+
+/*
+ * The cache is made up of one or more clusters, each cluster has its own PMU.
+ * Each cluster is associated with one or more CPUs.
+ * This structure represents one of the hardware PMUs.
+ *
+ * Events can be envisioned as a 2-dimensional array. Each column represents
+ * a group of events. There are 8 groups. Only one entry from each
+ * group can be in use at a time.
+ *
+ * Events are specified as 0xCCG, where CC is 2 hex digits specifying
+ * the code (array row) and G specifies the group (column).
+ *
+ * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE
+ * which is outside the above scheme.
+ */
+struct cluster_pmu {
+	struct list_head next;
+	struct perf_event *events[MAX_L2_CTRS];
+	struct l2cache_pmu *l2cache_pmu;
+	DECLARE_BITMAP(used_counters, MAX_L2_CTRS);
+	DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1);
+	int irq;
+	int cluster_id;
+	/* The CPU that is used for collecting events on this cluster */
+	int on_cpu;
+	/* All the CPUs associated with this cluster */
+	cpumask_t cluster_cpus;
+	spinlock_t pmu_lock;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static u32 l2_cycle_ctr_idx;
+static u32 l2_counter_present_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+	if (idx == l2_cycle_ctr_idx)
+		return BIT(L2CYCLE_CTR_BIT);
+
+	return BIT(idx);
+}
+
+static inline struct cluster_pmu *get_cluster_pmu(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
+}
+
+static void cluster_pmu_reset(void)
+{
+	/* Reset all counters */
+	set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+	set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
+	set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
+	set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
+}
+
+static inline void cluster_pmu_enable(void)
+{
+	set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
+}
+
+static inline void cluster_pmu_disable(void)
+{
+	set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
+}
+
+static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
+{
+	if (idx == l2_cycle_ctr_idx)
+		set_l2_indirect_reg(L2PMCCNTR, value);
+	else
+		set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
+}
+
+static inline u64 cluster_pmu_counter_get_value(u32 idx)
+{
+	u64 value;
+
+	if (idx == l2_cycle_ctr_idx)
+		value = get_l2_indirect_reg(L2PMCCNTR);
+	else
+		value = get_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx));
+
+	return value;
+}
+
+static inline void cluster_pmu_counter_enable(u32 idx)
+{
+	set_l2_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable(u32 idx)
+{
+	set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_enable_interrupt(u32 idx)
+{
+	set_l2_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable_interrupt(u32 idx)
+{
+	set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_set_evccntcr(u32 val)
+{
+	set_l2_indirect_reg(L2PMCCNTCR, val);
+}
+
+static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val)
+{
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val);
+}
+
+static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val)
+{
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val);
+}
+
+static void cluster_pmu_set_resr(struct cluster_pmu *cluster,
+			       u32 event_group, u32 event_cc)
+{
+	u64 field;
+	u64 resr_val;
+	u32 shift;
+	unsigned long flags;
+
+	shift = L2PMRESR_GROUP_BITS * event_group;
+	field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift);
+
+	spin_lock_irqsave(&cluster->pmu_lock, flags);
+
+	resr_val = get_l2_indirect_reg(L2PMRESR);
+	resr_val &= ~(L2PMRESR_GROUP_MASK << shift);
+	resr_val |= field;
+	resr_val |= L2PMRESR_EN;
+	set_l2_indirect_reg(L2PMRESR, resr_val);
+
+	spin_unlock_irqrestore(&cluster->pmu_lock, flags);
+}
+
+/*
+ * Hardware allows filtering of events based on the originating
+ * CPU. Turn this off by setting filter bits to allow events from
+ * all CPUS, subunits and ID independent events in this cluster.
+ */
+static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr)
+{
+	u32 val =  L2PMXEVFILTER_SUFILTER_ALL |
+		   L2PMXEVFILTER_ORGFILTER_IDINDEP |
+		   L2PMXEVFILTER_ORGFILTER_ALL;
+
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val);
+}
+
+static inline u32 cluster_pmu_getreset_ovsr(void)
+{
+	u32 result = get_l2_indirect_reg(L2PMOVSSET);
+
+	set_l2_indirect_reg(L2PMOVSCLR, result);
+	return result;
+}
+
+static inline bool cluster_pmu_has_overflowed(u32 ovsr)
+{
+	return !!(ovsr & l2_counter_present_mask);
+}
+
+static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
+{
+	return !!(ovsr & idx_to_reg_bit(idx));
+}
+
+static void l2_cache_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = cluster_pmu_counter_get_value(idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/*
+	 * The cycle counter is 64-bit, but all other counters are
+	 * 32-bit, and we must handle 32-bit overflow explicitly.
+	 */
+	delta = now - prev;
+	if (idx != l2_cycle_ctr_idx)
+		delta &= 0xffffffff;
+
+	local64_add(delta, &event->count);
+}
+
+static void l2_cache_cluster_set_period(struct cluster_pmu *cluster,
+				       struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 new;
+
+	/*
+	 * We limit the max period to half the max counter value so
+	 * that even in the case of extreme interrupt latency the
+	 * counter will (hopefully) not wrap past its initial value.
+	 */
+	if (idx == l2_cycle_ctr_idx)
+		new = L2_CYCLE_COUNTER_RELOAD;
+	else
+		new = L2_COUNTER_RELOAD;
+
+	local64_set(&hwc->prev_count, new);
+	cluster_pmu_counter_set_value(idx, new);
+}
+
+static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
+				   struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int num_ctrs = cluster->l2cache_pmu->num_counters - 1;
+	unsigned int group;
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters))
+			return -EAGAIN;
+
+		return l2_cycle_ctr_idx;
+	}
+
+	idx = find_first_zero_bit(cluster->used_counters, num_ctrs);
+	if (idx == num_ctrs)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	/*
+	 * Check for column exclusion: event column already in use by another
+	 * event. This is for events which are not in the same group.
+	 * Conflicting events in the same group are detected in event_init.
+	 */
+	group = L2_EVT_GROUP(hwc->config_base);
+	if (test_bit(group, cluster->used_groups))
+		return -EAGAIN;
+
+	set_bit(idx, cluster->used_counters);
+	set_bit(group, cluster->used_groups);
+
+	return idx;
+}
+
+static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cluster->used_counters);
+	if (hwc->config_base != L2CYCLE_CTR_RAW_CODE)
+		clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups);
+}
+
+static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
+{
+	struct cluster_pmu *cluster = data;
+	int num_counters = cluster->l2cache_pmu->num_counters;
+	u32 ovsr;
+	int idx;
+
+	ovsr = cluster_pmu_getreset_ovsr();
+	if (!cluster_pmu_has_overflowed(ovsr))
+		return IRQ_NONE;
+
+	for_each_set_bit(idx, cluster->used_counters, num_counters) {
+		struct perf_event *event = cluster->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
+			continue;
+
+		l2_cache_event_update(event);
+		hwc = &event->hw;
+
+		l2_cache_cluster_set_period(cluster, hwc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void l2_cache_pmu_enable(struct pmu *pmu)
+{
+	/*
+	 * Although there is only one PMU (per socket) controlling multiple
+	 * physical PMUs (per cluster), because we do not support per-task mode
+	 * each event is associated with a CPU. Each event has pmu_enable
+	 * called on its CPU, so here it is only necessary to enable the
+	 * counters for the current CPU.
+	 */
+
+	cluster_pmu_enable();
+}
+
+static void l2_cache_pmu_disable(struct pmu *pmu)
+{
+	cluster_pmu_disable();
+}
+
+static int l2_cache_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	struct perf_event *sibling;
+	struct l2cache_pmu *l2cache_pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	l2cache_pmu = to_l2cache_pmu(event->pmu);
+
+	if (hwc->sample_period) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Sampling not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* We cannot filter accurately so we just don't allow it. */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_hv || event->attr.exclude_idle) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Can't exclude execution levels\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
+	     ((event->attr.config & ~L2_EVT_MASK) != 0)) &&
+	    (event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Invalid config %llx\n",
+				    event->attr.config);
+		return -EINVAL;
+	}
+
+	/* Don't allow groups with mixed PMUs, except for s/w events */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create mixed PMU group\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			    group_entry)
+		if (sibling->pmu != event->pmu &&
+		    !is_software_event(sibling)) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				 "Can't create mixed PMU group\n");
+			return -EINVAL;
+		}
+
+	cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
+	if (!cluster) {
+		/* CPU has not been initialised */
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			"CPU%d not associated with L2 cluster\n", event->cpu);
+		return -EINVAL;
+	}
+
+	/* Ensure all events in a group are on the same cpu */
+	if ((event->group_leader != event) &&
+	    (cluster->on_cpu != event->group_leader->cpu)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create group on CPUs %d and %d",
+			 event->cpu, event->group_leader->cpu);
+		return -EINVAL;
+	}
+
+	if ((event != event->group_leader) &&
+	    (L2_EVT_GROUP(event->group_leader->attr.config) ==
+	     L2_EVT_GROUP(event->attr.config))) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Column exclusion: conflicting events %llx %llx\n",
+		       event->group_leader->attr.config,
+		       event->attr.config);
+		return -EINVAL;
+	}
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			    group_entry) {
+		if ((sibling != event) &&
+		    (L2_EVT_GROUP(sibling->attr.config) ==
+		     L2_EVT_GROUP(event->attr.config))) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			     "Column exclusion: conflicting events %llx %llx\n",
+					    sibling->attr.config,
+					    event->attr.config);
+			return -EINVAL;
+		}
+	}
+
+	hwc->idx = -1;
+	hwc->config_base = event->attr.config;
+
+	/*
+	 * Ensure all events are on the same cpu so all events are in the
+	 * same cpu context, to avoid races on pmu_enable etc.
+	 */
+	event->cpu = cluster->on_cpu;
+
+	return 0;
+}
+
+static void l2_cache_event_start(struct perf_event *event, int flags)
+{
+	struct cluster_pmu *cluster;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u32 config;
+	u32 event_cc, event_group;
+
+	hwc->state = 0;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_cluster_set_period(cluster, hwc);
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		cluster_pmu_set_evccntcr(0);
+	} else {
+		config = hwc->config_base;
+		event_cc    = L2_EVT_CODE(config);
+		event_group = L2_EVT_GROUP(config);
+
+		cluster_pmu_set_evcntcr(idx, 0);
+		cluster_pmu_set_evtyper(idx, event_group);
+		cluster_pmu_set_resr(cluster, event_group, event_cc);
+		cluster_pmu_set_evfilter_sys_mode(idx);
+	}
+
+	cluster_pmu_counter_enable_interrupt(idx);
+	cluster_pmu_counter_enable(idx);
+}
+
+static void l2_cache_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	cluster_pmu_counter_disable_interrupt(idx);
+	cluster_pmu_counter_disable(idx);
+
+	if (flags & PERF_EF_UPDATE)
+		l2_cache_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int l2_cache_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+	struct cluster_pmu *cluster;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	idx = l2_cache_get_event_idx(cluster, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	cluster->events[idx] = event;
+	local64_set(&hwc->prev_count, 0);
+
+	if (flags & PERF_EF_START)
+		l2_cache_event_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return err;
+}
+
+static void l2_cache_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	int idx = hwc->idx;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
+	cluster->events[idx] = NULL;
+	l2_cache_clear_event_idx(cluster, event);
+
+	perf_event_update_userpage(event);
+}
+
+static void l2_cache_event_read(struct perf_event *event)
+{
+	l2_cache_event_update(event);
+}
+
+static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
+}
+
+static struct device_attribute l2_cache_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL);
+
+static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
+	&l2_cache_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group l2_cache_pmu_cpumask_group = {
+	.attrs = l2_cache_pmu_cpumask_attrs,
+};
+
+/* CCG format for perf RAW codes. */
+PMU_FORMAT_ATTR(l2_code,   "config:4-11");
+PMU_FORMAT_ATTR(l2_group,  "config:0-3");
+static struct attribute *l2_cache_pmu_formats[] = {
+	&format_attr_l2_code.attr,
+	&format_attr_l2_group.attr,
+	NULL,
+};
+
+static struct attribute_group l2_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = l2_cache_pmu_formats,
+};
+
+static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
+	&l2_cache_pmu_format_group,
+	&l2_cache_pmu_cpumask_group,
+	NULL,
+};
+
+/*
+ * Generic device handlers
+ */
+
+static const struct acpi_device_id l2_cache_pmu_acpi_match[] = {
+	{ "QCOM8130", },
+	{ }
+};
+
+static int get_num_counters(void)
+{
+	int val;
+
+	val = get_l2_indirect_reg(L2PMCR);
+
+	/*
+	 * Read number of counters from L2PMCR and add 1
+	 * for the cycle counter.
+	 */
+	return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1;
+}
+
+static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	u64 mpidr;
+	int cpu_cluster_id;
+	struct cluster_pmu *cluster = NULL;
+
+	/*
+	 * This assumes that the cluster_id is in MPIDR[aff1] for
+	 * single-threaded cores, and MPIDR[aff2] for multi-threaded
+	 * cores. This logic will have to be updated if this changes.
+	 */
+	mpidr = read_cpuid_mpidr();
+	if (mpidr & MPIDR_MT_BITMASK)
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+	else
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
+		if (cluster->cluster_id != cpu_cluster_id)
+			continue;
+
+		dev_info(&l2cache_pmu->pdev->dev,
+			 "CPU%d associated with cluster %d\n", cpu,
+			 cluster->cluster_id);
+		cpumask_set_cpu(cpu, &cluster->cluster_cpus);
+		*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
+		break;
+	}
+
+	return cluster;
+}
+
+static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster) {
+		/* First time this CPU has come online */
+		cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
+		if (!cluster) {
+			/* Only if broken firmware doesn't list every cluster */
+			WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
+			return 0;
+		}
+	}
+
+	/* If another CPU is managing this cluster, we're done */
+	if (cluster->on_cpu != -1)
+		return 0;
+
+	/*
+	 * All CPUs on this cluster were down, use this one.
+	 * Reset to put it into sane state.
+	 */
+	cluster->on_cpu = cpu;
+	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster_pmu_reset();
+
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
+	enable_irq(cluster->irq);
+
+	return 0;
+}
+
+static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+	cpumask_t cluster_online_cpus;
+	unsigned int target;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster)
+		return 0;
+
+	/* If this CPU is not managing the cluster, we're done */
+	if (cluster->on_cpu != cpu)
+		return 0;
+
+	/* Give up ownership of cluster */
+	cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster->on_cpu = -1;
+
+	/* Any other CPU for this cluster which is still online */
+	cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
+		    cpu_online_mask);
+	target = cpumask_any_but(&cluster_online_cpus, cpu);
+	if (target >= nr_cpu_ids) {
+		disable_irq(cluster->irq);
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
+	cluster->on_cpu = target;
+	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev->parent);
+	struct platform_device *sdev = to_platform_device(dev);
+	struct l2cache_pmu *l2cache_pmu = data;
+	struct cluster_pmu *cluster;
+	struct acpi_device *device;
+	unsigned long fw_cluster_id;
+	int err;
+	int irq;
+
+	if (acpi_bus_get_device(ACPI_HANDLE(dev), &device))
+		return -ENODEV;
+
+	if (kstrtoul(device->pnp.unique_id, 10, &fw_cluster_id) < 0) {
+		dev_err(&pdev->dev, "unable to read ACPI uid\n");
+		return -ENODEV;
+	}
+
+	cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL);
+	if (!cluster)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&cluster->next);
+	list_add(&cluster->next, &l2cache_pmu->clusters);
+	cluster->cluster_id = fw_cluster_id;
+
+	irq = platform_get_irq(sdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev,
+			"Failed to get valid irq for cluster %ld\n",
+			fw_cluster_id);
+		return irq;
+	}
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+	cluster->irq = irq;
+
+	cluster->l2cache_pmu = l2cache_pmu;
+	cluster->on_cpu = -1;
+
+	err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       "l2-cache-pmu", cluster);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Unable to request IRQ%d for L2 PMU counters\n", irq);
+		return err;
+	}
+
+	dev_info(&pdev->dev,
+		"Registered L2 cache PMU cluster %ld\n", fw_cluster_id);
+
+	spin_lock_init(&cluster->pmu_lock);
+
+	l2cache_pmu->num_pmus++;
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe(struct platform_device *pdev)
+{
+	int err;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu =
+		devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
+	if (!l2cache_pmu)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&l2cache_pmu->clusters);
+
+	platform_set_drvdata(pdev, l2cache_pmu);
+	l2cache_pmu->pmu = (struct pmu) {
+		/* suffix is instance id for future use with multiple sockets */
+		.name		= "l2cache_0",
+		.task_ctx_nr    = perf_invalid_context,
+		.pmu_enable	= l2_cache_pmu_enable,
+		.pmu_disable	= l2_cache_pmu_disable,
+		.event_init	= l2_cache_event_init,
+		.add		= l2_cache_event_add,
+		.del		= l2_cache_event_del,
+		.start		= l2_cache_event_start,
+		.stop		= l2_cache_event_stop,
+		.read		= l2_cache_event_read,
+		.attr_groups	= l2_cache_pmu_attr_grps,
+	};
+
+	l2cache_pmu->num_counters = get_num_counters();
+	l2cache_pmu->pdev = pdev;
+	l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
+						     struct cluster_pmu *);
+	if (!l2cache_pmu->pmu_cluster)
+		return -ENOMEM;
+
+	l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1;
+	l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) |
+		BIT(L2CYCLE_CTR_BIT);
+
+	cpumask_clear(&l2cache_pmu->cpumask);
+
+	/* Read cluster info and initialize each cluster */
+	err = device_for_each_child(&pdev->dev, l2cache_pmu,
+				    l2_cache_pmu_probe_cluster);
+	if (err)
+		return err;
+
+	if (l2cache_pmu->num_pmus == 0) {
+		dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
+		return -ENODEV;
+	}
+
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				       &l2cache_pmu->node);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", err);
+		return err;
+	}
+
+	err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
+		goto out_unregister;
+	}
+
+	dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
+		 l2cache_pmu->num_pmus);
+
+	return err;
+
+out_unregister:
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return err;
+}
+
+static int l2_cache_pmu_remove(struct platform_device *pdev)
+{
+	struct l2cache_pmu *l2cache_pmu =
+		to_l2cache_pmu(platform_get_drvdata(pdev));
+
+	perf_pmu_unregister(&l2cache_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return 0;
+}
+
+static struct platform_driver l2_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l2cache-pmu",
+		.acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match),
+	},
+	.probe = l2_cache_pmu_probe,
+	.remove = l2_cache_pmu_remove,
+};
+
+static int __init register_l2_cache_pmu_driver(void)
+{
+	int err;
+
+	err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				      "AP_PERF_ARM_QCOM_L2_ONLINE",
+				      l2cache_pmu_online_cpu,
+				      l2cache_pmu_offline_cpu);
+	if (err)
+		return err;
+
+	return platform_driver_register(&l2_cache_pmu_driver);
+}
+device_initcall(register_l2_cache_pmu_driver);
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index a8ac4bc..35b5289 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/of_irq.h>
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index b5abfda..4c5bca38 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -14,9 +14,6 @@
 #ifndef __LINUX_ARM_SMCCC_H
 #define __LINUX_ARM_SMCCC_H
 
-#include <linux/linkage.h>
-#include <linux/types.h>
-
 /*
  * This file provides common defines for ARM SMC Calling Convention as
  * specified in
@@ -60,6 +57,13 @@
 #define ARM_SMCCC_OWNER_TRUSTED_OS	50
 #define ARM_SMCCC_OWNER_TRUSTED_OS_END	63
 
+#define ARM_SMCCC_QUIRK_NONE		0
+#define ARM_SMCCC_QUIRK_QCOM_A6		1 /* Save/restore register a6 */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/types.h>
 /**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
@@ -72,33 +76,59 @@
 };
 
 /**
- * arm_smccc_smc() - make SMC calls
+ * struct arm_smccc_quirk - Contains quirk information
+ * @id: quirk identification
+ * @state: quirk specific information
+ * @a6: Qualcomm quirk entry for returning post-smc call contents of a6
+ */
+struct arm_smccc_quirk {
+	int	id;
+	union {
+		unsigned long a6;
+	} state;
+};
+
+/**
+ * __arm_smccc_smc() - make SMC calls
  * @a0-a7: arguments passed in registers 0 to 7
  * @res: result values from registers 0 to 3
+ * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required.
  *
  * This function is used to make SMC calls following SMC Calling Convention.
  * The content of the supplied param are copied to registers 0 to 7 prior
  * to the SMC instruction. The return values are updated with the content
- * from register 0 to 3 on return from the SMC instruction.
+ * from register 0 to 3 on return from the SMC instruction.  An optional
+ * quirk structure provides vendor specific behavior.
  */
-asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1,
+asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
 			unsigned long a2, unsigned long a3, unsigned long a4,
 			unsigned long a5, unsigned long a6, unsigned long a7,
-			struct arm_smccc_res *res);
+			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
 
 /**
- * arm_smccc_hvc() - make HVC calls
+ * __arm_smccc_hvc() - make HVC calls
  * @a0-a7: arguments passed in registers 0 to 7
  * @res: result values from registers 0 to 3
+ * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required.
  *
  * This function is used to make HVC calls following SMC Calling
  * Convention.  The content of the supplied param are copied to registers 0
  * to 7 prior to the HVC instruction. The return values are updated with
- * the content from register 0 to 3 on return from the HVC instruction.
+ * the content from register 0 to 3 on return from the HVC instruction.  An
+ * optional quirk structure provides vendor specific behavior.
  */
-asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1,
+asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 			unsigned long a2, unsigned long a3, unsigned long a4,
 			unsigned long a5, unsigned long a6, unsigned long a7,
-			struct arm_smccc_res *res);
+			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
 
+#define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)
+
+#define arm_smccc_smc_quirk(...) __arm_smccc_smc(__VA_ARGS__)
+
+#define arm_smccc_hvc(...) __arm_smccc_hvc(__VA_ARGS__, NULL)
+
+#define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__)
+
+#endif /*__ASSEMBLY__*/
 #endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 921acaa..bb790c4 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -137,6 +137,7 @@
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b84615b..6ff66d6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -76,6 +76,10 @@
 #define page_to_virt(x)	__va(PFN_PHYS(page_to_pfn(x)))
 #endif
 
+#ifndef lm_alias
+#define lm_alias(x)	__va(__pa_symbol(x))
+#endif
+
 /*
  * To prevent common memory management code establishing
  * a zero page mapping on a read fault.
diff --git a/include/linux/of.h b/include/linux/of.h
index d72f010..21e6323 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -280,6 +280,7 @@
 
 /* cache lookup */
 extern struct device_node *of_find_next_cache_node(const struct device_node *);
+extern int of_find_last_cache_level(unsigned int cpu);
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5617cc4..a01974e 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1399,7 +1399,7 @@
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-	return __pa((unsigned long)(char *)&vmcoreinfo_note);
+	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8397e4e..7f44429 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -622,9 +622,12 @@
 
 	  If unsure, say N.
 
+config ARCH_HAS_DEBUG_VIRTUAL
+	bool
+
 config DEBUG_VIRTUAL
 	bool "Debug VM translations"
-	depends on DEBUG_KERNEL && X86
+	depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL
 	help
 	  Enable some costly sanity checks in virtual to page code. This can
 	  catch mistakes with virt_to_page() and friends.
diff --git a/mm/cma.c b/mm/cma.c
index c960459..94b3460 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -235,18 +235,13 @@
 	phys_addr_t highmem_start;
 	int ret = 0;
 
-#ifdef CONFIG_X86
 	/*
-	 * high_memory isn't direct mapped memory so retrieving its physical
-	 * address isn't appropriate.  But it would be useful to check the
-	 * physical address of the highmem boundary so it's justifiable to get
-	 * the physical address from it.  On x86 there is a validation check for
-	 * this case, so the following workaround is needed to avoid it.
+	 * We can't use __pa(high_memory) directly, since high_memory
+	 * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
+	 * complain. Find the boundary by adding one to the last valid
+	 * address.
 	 */
-	highmem_start = __pa_nodebug(high_memory);
-#else
-	highmem_start = __pa(high_memory);
-#endif
+	highmem_start = __pa(high_memory - 1) + 1;
 	pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
 		__func__, &size, &base, &limit, &alignment);
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 3f9a41c..31238da 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -15,6 +15,7 @@
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
+#include <linux/mm.h>
 #include <linux/pfn.h>
 
 #include <asm/page.h>
@@ -49,7 +50,7 @@
 	pte_t *pte = pte_offset_kernel(pmd, addr);
 	pte_t zero_pte;
 
-	zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL);
+	zero_pte = pfn_pte(PFN_DOWN(__pa_symbol(kasan_zero_page)), PAGE_KERNEL);
 	zero_pte = pte_wrprotect(zero_pte);
 
 	while (addr + PAGE_SIZE <= end) {
@@ -69,7 +70,7 @@
 		next = pmd_addr_end(addr, end);
 
 		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
-			pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+			pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
 			continue;
 		}
 
@@ -92,9 +93,9 @@
 		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
 			pmd_t *pmd;
 
-			pud_populate(&init_mm, pud, kasan_zero_pmd);
+			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
 			pmd = pmd_offset(pud, addr);
-			pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+			pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
 			continue;
 		}
 
@@ -135,11 +136,11 @@
 			 * puds,pmds, so pgd_populate(), pud_populate()
 			 * is noops.
 			 */
-			pgd_populate(&init_mm, pgd, kasan_zero_pud);
+			pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud));
 			pud = pud_offset(pgd, addr);
-			pud_populate(&init_mm, pud, kasan_zero_pmd);
+			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
 			pmd = pmd_offset(pud, addr);
-			pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+			pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
 			continue;
 		}
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 3c8da0a..8345299 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -108,13 +108,13 @@
 	 * __pa() is not just the reverse of __va(). This can be detected
 	 * and checked:
 	 */
-	textlow_linear = (unsigned long)__va(__pa(textlow));
+	textlow_linear = (unsigned long)lm_alias(textlow);
 	/* No different mapping: we're done. */
 	if (textlow_linear == textlow)
 		return NULL;
 
 	/* Check the secondary mapping... */
-	texthigh_linear = (unsigned long)__va(__pa(texthigh));
+	texthigh_linear = (unsigned long)lm_alias(texthigh);
 	if (overlaps(ptr, n, textlow_linear, texthigh_linear))
 		return "<linear kernel text>";