Merge branch 'apei' into release
diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
new file mode 100644
index 0000000..9146952
--- /dev/null
+++ b/Documentation/acpi/apei/output_format.txt
@@ -0,0 +1,122 @@
+                     APEI output format
+                     ~~~~~~~~~~~~~~~~~~
+
+APEI uses printk as hardware error reporting interface, the output
+format is as follow.
+
+<error record> :=
+APEI generic hardware error status
+severity: <integer>, <severity string>
+section: <integer>, severity: <integer>, <severity string>
+flags: <integer>
+<section flags strings>
+fru_id: <uuid string>
+fru_text: <string>
+section_type: <section type string>
+<section data>
+
+<severity string>* := recoverable | fatal | corrected | info
+
+<section flags strings># :=
+[primary][, containment warning][, reset][, threshold exceeded]\
+[, resource not accessible][, latent error]
+
+<section type string> := generic processor error | memory error | \
+PCIe error | unknown, <uuid string>
+
+<section data> :=
+<generic processor section data> | <memory section data> | \
+<pcie section data> | <null>
+
+<generic processor section data> :=
+[processor_type: <integer>, <proc type string>]
+[processor_isa: <integer>, <proc isa string>]
+[error_type: <integer>
+<proc error type strings>]
+[operation: <integer>, <proc operation string>]
+[flags: <integer>
+<proc flags strings>]
+[level: <integer>]
+[version_info: <integer>]
+[processor_id: <integer>]
+[target_address: <integer>]
+[requestor_id: <integer>]
+[responder_id: <integer>]
+[IP: <integer>]
+
+<proc type string>* := IA32/X64 | IA64
+
+<proc isa string>* := IA32 | IA64 | X64
+
+<processor error type strings># :=
+[cache error][, TLB error][, bus error][, micro-architectural error]
+
+<proc operation string>* := unknown or generic | data read | data write | \
+instruction execution
+
+<proc flags strings># :=
+[restartable][, precise IP][, overflow][, corrected]
+
+<memory section data> :=
+[error_status: <integer>]
+[physical_address: <integer>]
+[physical_address_mask: <integer>]
+[node: <integer>]
+[card: <integer>]
+[module: <integer>]
+[bank: <integer>]
+[device: <integer>]
+[row: <integer>]
+[column: <integer>]
+[bit_position: <integer>]
+[requestor_id: <integer>]
+[responder_id: <integer>]
+[target_id: <integer>]
+[error_type: <integer>, <mem error type string>]
+
+<mem error type string>* :=
+unknown | no error | single-bit ECC | multi-bit ECC | \
+single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
+target abort | parity error | watchdog timeout | invalid address | \
+mirror Broken | memory sparing | scrub corrected error | \
+scrub uncorrected error
+
+<pcie section data> :=
+[port_type: <integer>, <pcie port type string>]
+[version: <integer>.<integer>]
+[command: <integer>, status: <integer>]
+[device_id: <integer>:<integer>:<integer>.<integer>
+slot: <integer>
+secondary_bus: <integer>
+vendor_id: <integer>, device_id: <integer>
+class_code: <integer>]
+[serial number: <integer>, <integer>]
+[bridge: secondary_status: <integer>, control: <integer>]
+
+<pcie port type string>* := PCIe end point | legacy PCI end point | \
+unknown | unknown | root port | upstream switch port | \
+downstream switch port | PCIe to PCI/PCI-X bridge | \
+PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
+root complex event collector
+
+Where, [] designate corresponding content is optional
+
+All <field string> description with * has the following format:
+
+field: <integer>, <field string>
+
+Where value of <integer> should be the position of "string" in <field
+string> description. Otherwise, <field string> will be "unknown".
+
+All <field strings> description with # has the following format:
+
+field: <integer>
+<field strings>
+
+Where each string in <fields strings> corresponding to one set bit of
+<integer>. The bit position is the position of "string" in <field
+strings> description.
+
+For more detailed explanation of every field, please refer to UEFI
+specification version 2.3 or later, section Appendix N: Common
+Platform Error Record.
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b9..c2d0baa 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -504,6 +504,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
 
 int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
 {
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c..d34cf80 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -240,6 +240,7 @@
 	bust_spinlocks(1);
 	return flags;
 }
+EXPORT_SYMBOL_GPL(oops_begin);
 
 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 18df1e9..ef0581f 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -109,6 +109,8 @@
 		return sizeof(*estatus) + estatus->data_length;
 }
 
+void apei_estatus_print(const char *pfx,
+			const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
 #endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index f4cf2fc..31464a0 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -46,6 +46,317 @@
 }
 EXPORT_SYMBOL_GPL(cper_next_record_id);
 
+static const char *cper_severity_strs[] = {
+	"recoverable",
+	"fatal",
+	"corrected",
+	"info",
+};
+
+static const char *cper_severity_str(unsigned int severity)
+{
+	return severity < ARRAY_SIZE(cper_severity_strs) ?
+		cper_severity_strs[severity] : "unknown";
+}
+
+/*
+ * cper_print_bits - print strings for set bits
+ * @pfx: prefix for each line, including log level and prefix string
+ * @bits: bit mask
+ * @strs: string array, indexed by bit position
+ * @strs_size: size of the string array: @strs
+ *
+ * For each set bit in @bits, print the corresponding string in @strs.
+ * If the output length is longer than 80, multiple line will be
+ * printed, with @pfx is printed at the beginning of each line.
+ */
+static void cper_print_bits(const char *pfx, unsigned int bits,
+			    const char *strs[], unsigned int strs_size)
+{
+	int i, len = 0;
+	const char *str;
+	char buf[84];
+
+	for (i = 0; i < strs_size; i++) {
+		if (!(bits & (1U << i)))
+			continue;
+		str = strs[i];
+		if (len && len + strlen(str) + 2 > 80) {
+			printk("%s\n", buf);
+			len = 0;
+		}
+		if (!len)
+			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
+		else
+			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
+	}
+	if (len)
+		printk("%s\n", buf);
+}
+
+static const char *cper_proc_type_strs[] = {
+	"IA32/X64",
+	"IA64",
+};
+
+static const char *cper_proc_isa_strs[] = {
+	"IA32",
+	"IA64",
+	"X64",
+};
+
+static const char *cper_proc_error_type_strs[] = {
+	"cache error",
+	"TLB error",
+	"bus error",
+	"micro-architectural error",
+};
+
+static const char *cper_proc_op_strs[] = {
+	"unknown or generic",
+	"data read",
+	"data write",
+	"instruction execution",
+};
+
+static const char *cper_proc_flag_strs[] = {
+	"restartable",
+	"precise IP",
+	"overflow",
+	"corrected",
+};
+
+static void cper_print_proc_generic(const char *pfx,
+				    const struct cper_sec_proc_generic *proc)
+{
+	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
+		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
+		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
+		       cper_proc_type_strs[proc->proc_type] : "unknown");
+	if (proc->validation_bits & CPER_PROC_VALID_ISA)
+		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
+		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
+		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
+	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
+		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
+		cper_print_bits(pfx, proc->proc_error_type,
+				cper_proc_error_type_strs,
+				ARRAY_SIZE(cper_proc_error_type_strs));
+	}
+	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
+		printk("%s""operation: %d, %s\n", pfx, proc->operation,
+		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
+		       cper_proc_op_strs[proc->operation] : "unknown");
+	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
+		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
+		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
+				ARRAY_SIZE(cper_proc_flag_strs));
+	}
+	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
+		printk("%s""level: %d\n", pfx, proc->level);
+	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
+		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
+	if (proc->validation_bits & CPER_PROC_VALID_ID)
+		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
+	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
+		printk("%s""target_address: 0x%016llx\n",
+		       pfx, proc->target_addr);
+	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
+		printk("%s""requestor_id: 0x%016llx\n",
+		       pfx, proc->requestor_id);
+	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
+		printk("%s""responder_id: 0x%016llx\n",
+		       pfx, proc->responder_id);
+	if (proc->validation_bits & CPER_PROC_VALID_IP)
+		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
+}
+
+static const char *cper_mem_err_type_strs[] = {
+	"unknown",
+	"no error",
+	"single-bit ECC",
+	"multi-bit ECC",
+	"single-symbol chipkill ECC",
+	"multi-symbol chipkill ECC",
+	"master abort",
+	"target abort",
+	"parity error",
+	"watchdog timeout",
+	"invalid address",
+	"mirror Broken",
+	"memory sparing",
+	"scrub corrected error",
+	"scrub uncorrected error",
+};
+
+static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
+{
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
+		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
+		printk("%s""physical_address: 0x%016llx\n",
+		       pfx, mem->physical_addr);
+	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
+		printk("%s""physical_address_mask: 0x%016llx\n",
+		       pfx, mem->physical_addr_mask);
+	if (mem->validation_bits & CPER_MEM_VALID_NODE)
+		printk("%s""node: %d\n", pfx, mem->node);
+	if (mem->validation_bits & CPER_MEM_VALID_CARD)
+		printk("%s""card: %d\n", pfx, mem->card);
+	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
+		printk("%s""module: %d\n", pfx, mem->module);
+	if (mem->validation_bits & CPER_MEM_VALID_BANK)
+		printk("%s""bank: %d\n", pfx, mem->bank);
+	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
+		printk("%s""device: %d\n", pfx, mem->device);
+	if (mem->validation_bits & CPER_MEM_VALID_ROW)
+		printk("%s""row: %d\n", pfx, mem->row);
+	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
+		printk("%s""column: %d\n", pfx, mem->column);
+	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+		printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
+	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+		printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
+	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+		printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
+	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
+		printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+		u8 etype = mem->error_type;
+		printk("%s""error_type: %d, %s\n", pfx, etype,
+		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
+		       cper_mem_err_type_strs[etype] : "unknown");
+	}
+}
+
+static const char *cper_pcie_port_type_strs[] = {
+	"PCIe end point",
+	"legacy PCI end point",
+	"unknown",
+	"unknown",
+	"root port",
+	"upstream switch port",
+	"downstream switch port",
+	"PCIe to PCI/PCI-X bridge",
+	"PCI/PCI-X to PCIe bridge",
+	"root complex integrated endpoint device",
+	"root complex event collector",
+};
+
+static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
+{
+	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
+		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
+		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
+		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
+	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
+		printk("%s""version: %d.%d\n", pfx,
+		       pcie->version.major, pcie->version.minor);
+	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
+		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
+		       pcie->command, pcie->status);
+	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
+		const __u8 *p;
+		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
+		       pcie->device_id.segment, pcie->device_id.bus,
+		       pcie->device_id.device, pcie->device_id.function);
+		printk("%s""slot: %d\n", pfx,
+		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
+		printk("%s""secondary_bus: 0x%02x\n", pfx,
+		       pcie->device_id.secondary_bus);
+		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
+		       pcie->device_id.vendor_id, pcie->device_id.device_id);
+		p = pcie->device_id.class_code;
+		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
+	}
+	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
+		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
+		       pcie->serial_number.lower, pcie->serial_number.upper);
+	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
+		printk(
+	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
+	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
+}
+
+static const char *apei_estatus_section_flag_strs[] = {
+	"primary",
+	"containment warning",
+	"reset",
+	"threshold exceeded",
+	"resource not accessible",
+	"latent error",
+};
+
+static void apei_estatus_print_section(
+	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+{
+	uuid_le *sec_type = (uuid_le *)gdata->section_type;
+	__u16 severity;
+
+	severity = gdata->error_severity;
+	printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
+	       cper_severity_str(severity));
+	printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
+	cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
+			ARRAY_SIZE(apei_estatus_section_flag_strs));
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
+
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
+		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
+		printk("%s""section_type: general processor error\n", pfx);
+		if (gdata->error_data_length >= sizeof(*proc_err))
+			cper_print_proc_generic(pfx, proc_err);
+		else
+			goto err_section_too_small;
+	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+		printk("%s""section_type: memory error\n", pfx);
+		if (gdata->error_data_length >= sizeof(*mem_err))
+			cper_print_mem(pfx, mem_err);
+		else
+			goto err_section_too_small;
+	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
+		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
+		printk("%s""section_type: PCIe error\n", pfx);
+		if (gdata->error_data_length >= sizeof(*pcie))
+			cper_print_pcie(pfx, pcie);
+		else
+			goto err_section_too_small;
+	} else
+		printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
+
+	return;
+
+err_section_too_small:
+	pr_err(FW_WARN "error section length is too small\n");
+}
+
+void apei_estatus_print(const char *pfx,
+			const struct acpi_hest_generic_status *estatus)
+{
+	struct acpi_hest_generic_data *gdata;
+	unsigned int data_len, gedata_len;
+	int sec_no = 0;
+	__u16 severity;
+
+	printk("%s""APEI generic hardware error status\n", pfx);
+	severity = estatus->error_severity;
+	printk("%s""severity: %d, %s\n", pfx, severity,
+	       cper_severity_str(severity));
+	data_len = estatus->data_length;
+	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+	while (data_len > sizeof(*gdata)) {
+		gedata_len = gdata->error_data_length;
+		apei_estatus_print_section(pfx, gdata, sec_no);
+		data_len -= gedata_len + sizeof(*gdata);
+		sec_no++;
+	}
+}
+EXPORT_SYMBOL_GPL(apei_estatus_print);
+
 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
 {
 	if (estatus->data_length &&
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0d505e5..d1d484d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,10 +12,6 @@
  * For more information about Generic Hardware Error Source, please
  * refer to ACPI Specification version 4.0, section 17.3.2.6
  *
- * Now, only SCI notification type and memory errors are
- * supported. More notification type and hardware error type will be
- * added later.
- *
  * Copyright 2010 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
  *
@@ -39,14 +35,18 @@
 #include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/timer.h>
 #include <linux/cper.h>
 #include <linux/kdebug.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <acpi/apei.h>
 #include <acpi/atomicio.h>
 #include <acpi/hed.h>
 #include <asm/mce.h>
+#include <asm/tlbflush.h>
 
 #include "apei-internal.h"
 
@@ -55,42 +55,131 @@
 #define GHES_ESTATUS_MAX_SIZE		65536
 
 /*
- * One struct ghes is created for each generic hardware error
- * source.
- *
+ * One struct ghes is created for each generic hardware error source.
  * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler. Handler for one generic hardware error source is only
- * triggered after the previous one is done. So handler can uses
- * struct ghes without locking.
+ * handler.
  *
  * estatus: memory buffer for error status block, allocated during
  * HEST parsing.
  */
 #define GHES_TO_CLEAR		0x0001
+#define GHES_EXITING		0x0002
 
 struct ghes {
 	struct acpi_hest_generic *generic;
 	struct acpi_hest_generic_status *estatus;
-	struct list_head list;
 	u64 buffer_paddr;
 	unsigned long flags;
+	union {
+		struct list_head list;
+		struct timer_list timer;
+		unsigned int irq;
+	};
 };
 
+static int ghes_panic_timeout	__read_mostly = 30;
+
 /*
- * Error source lists, one list for each notification method. The
- * members in lists are struct ghes.
+ * All error sources notified with SCI shares one notifier function,
+ * so they need to be linked and checked one by one.  This is applied
+ * to NMI too.
  *
- * The list members are only added in HEST parsing and deleted during
- * module_exit, that is, single-threaded. So no lock is needed for
- * that.
- *
- * But the mutual exclusion is needed between members adding/deleting
- * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is
- * used for that.
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
  */
 static LIST_HEAD(ghes_sci);
+static LIST_HEAD(ghes_nmi);
 static DEFINE_MUTEX(ghes_list_mutex);
 
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * a special version of atomic ioremap is implemented for that.
+ */
+
+/*
+ * Two virtual pages are used, one for NMI context, the other for
+ * IRQ/PROCESS context
+ */
+#define GHES_IOREMAP_PAGES		2
+#define GHES_IOREMAP_NMI_PAGE(base)	(base)
+#define GHES_IOREMAP_IRQ_PAGE(base)	((base) + PAGE_SIZE)
+
+/* virtual memory area for atomic ioremap */
+static struct vm_struct *ghes_ioremap_area;
+/*
+ * These 2 spinlock is used to prevent atomic ioremap virtual memory
+ * area from being mapped simultaneously.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+
+static int ghes_ioremap_init(void)
+{
+	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
+		VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+	if (!ghes_ioremap_area) {
+		pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void ghes_ioremap_exit(void)
+{
+	free_vm_area(ghes_ioremap_area);
+}
+
+static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+{
+	unsigned long vaddr;
+
+	vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+			   pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+	return (void __iomem *)vaddr;
+}
+
+static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+{
+	unsigned long vaddr;
+
+	vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+			   pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+	return (void __iomem *)vaddr;
+}
+
+static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
+{
+	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+	void *base = ghes_ioremap_area->addr;
+
+	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+	__flush_tlb_one(vaddr);
+}
+
+static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
+{
+	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+	void *base = ghes_ioremap_area->addr;
+
+	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+	__flush_tlb_one(vaddr);
+}
+
 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 {
 	struct ghes *ghes;
@@ -101,7 +190,6 @@
 	if (!ghes)
 		return ERR_PTR(-ENOMEM);
 	ghes->generic = generic;
-	INIT_LIST_HEAD(&ghes->list);
 	rc = acpi_pre_map_gar(&generic->error_status_address);
 	if (rc)
 		goto err_free;
@@ -158,22 +246,41 @@
 	}
 }
 
-/* SCI handler run in work queue, so ioremap can be used here */
-static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
-				 int from_phys)
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+				  int from_phys)
 {
-	void *vaddr;
+	void __iomem *vaddr;
+	unsigned long flags = 0;
+	int in_nmi = in_nmi();
+	u64 offset;
+	u32 trunk;
 
-	vaddr = ioremap_cache(paddr, len);
-	if (!vaddr)
-		return -ENOMEM;
-	if (from_phys)
-		memcpy(buffer, vaddr, len);
-	else
-		memcpy(vaddr, buffer, len);
-	iounmap(vaddr);
-
-	return 0;
+	while (len > 0) {
+		offset = paddr - (paddr & PAGE_MASK);
+		if (in_nmi) {
+			raw_spin_lock(&ghes_ioremap_lock_nmi);
+			vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
+		} else {
+			spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
+			vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
+		}
+		trunk = PAGE_SIZE - offset;
+		trunk = min(trunk, len);
+		if (from_phys)
+			memcpy_fromio(buffer, vaddr + offset, trunk);
+		else
+			memcpy_toio(vaddr + offset, buffer, trunk);
+		len -= trunk;
+		paddr += trunk;
+		buffer += trunk;
+		if (in_nmi) {
+			ghes_iounmap_nmi(vaddr);
+			raw_spin_unlock(&ghes_ioremap_lock_nmi);
+		} else {
+			ghes_iounmap_irq(vaddr);
+			spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+		}
+	}
 }
 
 static int ghes_read_estatus(struct ghes *ghes, int silent)
@@ -194,10 +301,8 @@
 	if (!buf_paddr)
 		return -ENOENT;
 
-	rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
-				   sizeof(*ghes->estatus), 1);
-	if (rc)
-		return rc;
+	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+			      sizeof(*ghes->estatus), 1);
 	if (!ghes->estatus->block_status)
 		return -ENOENT;
 
@@ -212,17 +317,15 @@
 		goto err_read_block;
 	if (apei_estatus_check_header(ghes->estatus))
 		goto err_read_block;
-	rc = ghes_copy_tofrom_phys(ghes->estatus + 1,
-				   buf_paddr + sizeof(*ghes->estatus),
-				   len - sizeof(*ghes->estatus), 1);
-	if (rc)
-		return rc;
+	ghes_copy_tofrom_phys(ghes->estatus + 1,
+			      buf_paddr + sizeof(*ghes->estatus),
+			      len - sizeof(*ghes->estatus), 1);
 	if (apei_estatus_check(ghes->estatus))
 		goto err_read_block;
 	rc = 0;
 
 err_read_block:
-	if (rc && !silent)
+	if (rc && !silent && printk_ratelimit())
 		pr_warning(FW_WARN GHES_PFX
 			   "Failed to read error status block!\n");
 	return rc;
@@ -255,11 +358,26 @@
 		}
 #endif
 	}
+}
 
-	if (!processed && printk_ratelimit())
-		pr_warning(GHES_PFX
-		"Unknown error record from generic hardware error source: %d\n",
-			   ghes->generic->header.source_id);
+static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
+{
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
+
+	if (pfx == NULL) {
+		if (ghes_severity(ghes->estatus->error_severity) <=
+		    GHES_SEV_CORRECTED)
+			pfx = KERN_WARNING HW_ERR;
+		else
+			pfx = KERN_ERR HW_ERR;
+	}
+	if (__ratelimit(&ratelimit)) {
+		printk(
+	"%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+	pfx, ghes->generic->header.source_id);
+		apei_estatus_print(pfx, ghes->estatus);
+	}
 }
 
 static int ghes_proc(struct ghes *ghes)
@@ -269,6 +387,7 @@
 	rc = ghes_read_estatus(ghes, 0);
 	if (rc)
 		goto out;
+	ghes_print_estatus(NULL, ghes);
 	ghes_do_proc(ghes);
 
 out:
@@ -276,6 +395,42 @@
 	return 0;
 }
 
+static void ghes_add_timer(struct ghes *ghes)
+{
+	struct acpi_hest_generic *g = ghes->generic;
+	unsigned long expire;
+
+	if (!g->notify.poll_interval) {
+		pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+			   g->header.source_id);
+		return;
+	}
+	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+	ghes->timer.expires = round_jiffies_relative(expire);
+	add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(unsigned long data)
+{
+	struct ghes *ghes = (void *)data;
+
+	ghes_proc(ghes);
+	if (!(ghes->flags & GHES_EXITING))
+		ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+	struct ghes *ghes = data;
+	int rc;
+
+	rc = ghes_proc(ghes);
+	if (rc)
+		return IRQ_NONE;
+
+	return IRQ_HANDLED;
+}
+
 static int ghes_notify_sci(struct notifier_block *this,
 				  unsigned long event, void *data)
 {
@@ -292,10 +447,63 @@
 	return ret;
 }
 
+static int ghes_notify_nmi(struct notifier_block *this,
+				  unsigned long cmd, void *data)
+{
+	struct ghes *ghes, *ghes_global = NULL;
+	int sev, sev_global = -1;
+	int ret = NOTIFY_DONE;
+
+	if (cmd != DIE_NMI)
+		return ret;
+
+	raw_spin_lock(&ghes_nmi_lock);
+	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+		if (ghes_read_estatus(ghes, 1)) {
+			ghes_clear_estatus(ghes);
+			continue;
+		}
+		sev = ghes_severity(ghes->estatus->error_severity);
+		if (sev > sev_global) {
+			sev_global = sev;
+			ghes_global = ghes;
+		}
+		ret = NOTIFY_STOP;
+	}
+
+	if (ret == NOTIFY_DONE)
+		goto out;
+
+	if (sev_global >= GHES_SEV_PANIC) {
+		oops_begin();
+		ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
+		/* reboot to log the error! */
+		if (panic_timeout == 0)
+			panic_timeout = ghes_panic_timeout;
+		panic("Fatal hardware error!");
+	}
+
+	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+		if (!(ghes->flags & GHES_TO_CLEAR))
+			continue;
+		/* Do not print estatus because printk is not NMI safe */
+		ghes_do_proc(ghes);
+		ghes_clear_estatus(ghes);
+	}
+
+out:
+	raw_spin_unlock(&ghes_nmi_lock);
+	return ret;
+}
+
 static struct notifier_block ghes_notifier_sci = {
 	.notifier_call = ghes_notify_sci,
 };
 
+static struct notifier_block ghes_notifier_nmi = {
+	.notifier_call = ghes_notify_nmi,
+};
+
 static int __devinit ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
@@ -306,18 +514,27 @@
 	if (!generic->enabled)
 		return -ENODEV;
 
-	if (generic->error_block_length <
-	    sizeof(struct acpi_hest_generic_status)) {
-		pr_warning(FW_BUG GHES_PFX
-"Invalid error block length: %u for generic hardware error source: %d\n",
-			   generic->error_block_length,
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_NMI:
+		break;
+	case ACPI_HEST_NOTIFY_LOCAL:
+		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
 			   generic->header.source_id);
 		goto err;
+	default:
+		pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+			   generic->notify.type, generic->header.source_id);
+		goto err;
 	}
-	if (generic->records_to_preallocate == 0) {
-		pr_warning(FW_BUG GHES_PFX
-"Invalid records to preallocate: %u for generic hardware error source: %d\n",
-			   generic->records_to_preallocate,
+
+	rc = -EIO;
+	if (generic->error_block_length <
+	    sizeof(struct acpi_hest_generic_status)) {
+		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+			   generic->error_block_length,
 			   generic->header.source_id);
 		goto err;
 	}
@@ -327,38 +544,43 @@
 		ghes = NULL;
 		goto err;
 	}
-	if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+		ghes->timer.function = ghes_poll_func;
+		ghes->timer.data = (unsigned long)ghes;
+		init_timer_deferrable(&ghes->timer);
+		ghes_add_timer(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+		/* External interrupt vector is GSI */
+		if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
+			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+			       generic->header.source_id);
+			goto err;
+		}
+		if (request_irq(ghes->irq, ghes_irq_func,
+				0, "GHES IRQ", ghes)) {
+			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+			       generic->header.source_id);
+			goto err;
+		}
+		break;
+	case ACPI_HEST_NOTIFY_SCI:
 		mutex_lock(&ghes_list_mutex);
 		if (list_empty(&ghes_sci))
 			register_acpi_hed_notifier(&ghes_notifier_sci);
 		list_add_rcu(&ghes->list, &ghes_sci);
 		mutex_unlock(&ghes_list_mutex);
-	} else {
-		unsigned char *notify = NULL;
-
-		switch (generic->notify.type) {
-		case ACPI_HEST_NOTIFY_POLLED:
-			notify = "POLL";
-			break;
-		case ACPI_HEST_NOTIFY_EXTERNAL:
-		case ACPI_HEST_NOTIFY_LOCAL:
-			notify = "IRQ";
-			break;
-		case ACPI_HEST_NOTIFY_NMI:
-			notify = "NMI";
-			break;
-		}
-		if (notify) {
-			pr_warning(GHES_PFX
-"Generic hardware error source: %d notified via %s is not supported!\n",
-				   generic->header.source_id, notify);
-		} else {
-			pr_warning(FW_WARN GHES_PFX
-"Unknown notification type: %u for generic hardware error source: %d\n",
-			generic->notify.type, generic->header.source_id);
-		}
-		rc = -ENODEV;
-		goto err;
+		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		mutex_lock(&ghes_list_mutex);
+		if (list_empty(&ghes_nmi))
+			register_die_notifier(&ghes_notifier_nmi);
+		list_add_rcu(&ghes->list, &ghes_nmi);
+		mutex_unlock(&ghes_list_mutex);
+		break;
+	default:
+		BUG();
 	}
 	platform_set_drvdata(ghes_dev, ghes);
 
@@ -379,7 +601,14 @@
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
 
+	ghes->flags |= GHES_EXITING;
 	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+		del_timer_sync(&ghes->timer);
+		break;
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+		free_irq(ghes->irq, ghes);
+		break;
 	case ACPI_HEST_NOTIFY_SCI:
 		mutex_lock(&ghes_list_mutex);
 		list_del_rcu(&ghes->list);
@@ -387,12 +616,23 @@
 			unregister_acpi_hed_notifier(&ghes_notifier_sci);
 		mutex_unlock(&ghes_list_mutex);
 		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		mutex_lock(&ghes_list_mutex);
+		list_del_rcu(&ghes->list);
+		if (list_empty(&ghes_nmi))
+			unregister_die_notifier(&ghes_notifier_nmi);
+		mutex_unlock(&ghes_list_mutex);
+		/*
+		 * To synchronize with NMI handler, ghes can only be
+		 * freed after NMI handler finishes.
+		 */
+		synchronize_rcu();
+		break;
 	default:
 		BUG();
 		break;
 	}
 
-	synchronize_rcu();
 	ghes_fini(ghes);
 	kfree(ghes);
 
@@ -412,6 +652,8 @@
 
 static int __init ghes_init(void)
 {
+	int rc;
+
 	if (acpi_disabled)
 		return -ENODEV;
 
@@ -420,12 +662,25 @@
 		return -EINVAL;
 	}
 
-	return platform_driver_register(&ghes_platform_driver);
+	rc = ghes_ioremap_init();
+	if (rc)
+		goto err;
+
+	rc = platform_driver_register(&ghes_platform_driver);
+	if (rc)
+		goto err_ioremap_exit;
+
+	return 0;
+err_ioremap_exit:
+	ghes_ioremap_exit();
+err:
+	return rc;
 }
 
 static void __exit ghes_exit(void)
 {
 	platform_driver_unregister(&ghes_platform_driver);
+	ghes_ioremap_exit();
 }
 
 module_init(ghes_init);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index bf972f8..3104aaf 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -39,10 +39,12 @@
  * Severity difinition for error_severity in struct cper_record_header
  * and section_severity in struct cper_section_descriptor
  */
-#define CPER_SEV_RECOVERABLE			0x0
-#define CPER_SEV_FATAL				0x1
-#define CPER_SEV_CORRECTED			0x2
-#define CPER_SEV_INFORMATIONAL			0x3
+enum {
+	CPER_SEV_RECOVERABLE,
+	CPER_SEV_FATAL,
+	CPER_SEV_CORRECTED,
+	CPER_SEV_INFORMATIONAL,
+};
 
 /*
  * Validation bits difinition for validation_bits in struct
@@ -201,6 +203,47 @@
 	UUID_LE(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F,	\
 		0xDF, 0xAA, 0x84, 0xEC)
 
+#define CPER_PROC_VALID_TYPE			0x0001
+#define CPER_PROC_VALID_ISA			0x0002
+#define CPER_PROC_VALID_ERROR_TYPE		0x0004
+#define CPER_PROC_VALID_OPERATION		0x0008
+#define CPER_PROC_VALID_FLAGS			0x0010
+#define CPER_PROC_VALID_LEVEL			0x0020
+#define CPER_PROC_VALID_VERSION			0x0040
+#define CPER_PROC_VALID_BRAND_INFO		0x0080
+#define CPER_PROC_VALID_ID			0x0100
+#define CPER_PROC_VALID_TARGET_ADDRESS		0x0200
+#define CPER_PROC_VALID_REQUESTOR_ID		0x0400
+#define CPER_PROC_VALID_RESPONDER_ID		0x0800
+#define CPER_PROC_VALID_IP			0x1000
+
+#define CPER_MEM_VALID_ERROR_STATUS		0x0001
+#define CPER_MEM_VALID_PHYSICAL_ADDRESS		0x0002
+#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK	0x0004
+#define CPER_MEM_VALID_NODE			0x0008
+#define CPER_MEM_VALID_CARD			0x0010
+#define CPER_MEM_VALID_MODULE			0x0020
+#define CPER_MEM_VALID_BANK			0x0040
+#define CPER_MEM_VALID_DEVICE			0x0080
+#define CPER_MEM_VALID_ROW			0x0100
+#define CPER_MEM_VALID_COLUMN			0x0200
+#define CPER_MEM_VALID_BIT_POSITION		0x0400
+#define CPER_MEM_VALID_REQUESTOR_ID		0x0800
+#define CPER_MEM_VALID_RESPONDER_ID		0x1000
+#define CPER_MEM_VALID_TARGET_ID		0x2000
+#define CPER_MEM_VALID_ERROR_TYPE		0x4000
+
+#define CPER_PCIE_VALID_PORT_TYPE		0x0001
+#define CPER_PCIE_VALID_VERSION			0x0002
+#define CPER_PCIE_VALID_COMMAND_STATUS		0x0004
+#define CPER_PCIE_VALID_DEVICE_ID		0x0008
+#define CPER_PCIE_VALID_SERIAL_NUMBER		0x0010
+#define CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS	0x0020
+#define CPER_PCIE_VALID_CAPABILITY		0x0040
+#define CPER_PCIE_VALID_AER_INFO		0x0080
+
+#define CPER_PCIE_SLOT_SHIFT			3
+
 /*
  * All tables and structs must be byte-packed to match CPER
  * specification, since the tables are provided by the system BIOS
@@ -306,6 +349,41 @@
 	__u8	error_type;
 };
 
+struct cper_sec_pcie {
+	__u64		validation_bits;
+	__u32		port_type;
+	struct {
+		__u8	minor;
+		__u8	major;
+		__u8	reserved[2];
+	}		version;
+	__u16		command;
+	__u16		status;
+	__u32		reserved;
+	struct {
+		__u16	vendor_id;
+		__u16	device_id;
+		__u8	class_code[3];
+		__u8	function;
+		__u8	device;
+		__u16	segment;
+		__u8	bus;
+		__u8	secondary_bus;
+		__u16	slot;
+		__u8	reserved;
+	}		device_id;
+	struct {
+		__u32	lower;
+		__u32	upper;
+	}		serial_number;
+	struct {
+		__u16	secondary_status;
+		__u16	control;
+	}		bridge;
+	__u8	capability[60];
+	__u8	aer_info[96];
+};
+
 /* Reset to default packing */
 #pragma pack()
 
diff --git a/kernel/panic.c b/kernel/panic.c
index 4c13b1a..991bb87 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 
 int panic_timeout;
+EXPORT_SYMBOL_GPL(panic_timeout);
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 5730ecd..da4e2ad 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/io.h>
+#include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 
@@ -90,3 +91,4 @@
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(ioremap_page_range);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eb5cc7d..816f074 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1175,6 +1175,7 @@
 {
 	vunmap_page_range(addr, addr + size);
 }
+EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
 
 /**
  * unmap_kernel_range - unmap kernel VM area and flush cache and TLB