ACPI, APEI, Printk queued error record before panic
Because printk is not safe inside NMI handler, the recoverable error
records received in NMI handler will be queued to be printked in a
delayed IRQ context via irq_work. If a fatal error occurs after the
recoverable error and before the irq_work processed, we lost a error
report.
To solve the issue, the queued error records are printked in NMI
handler if system will go panic.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9dcb2d8..aaf3609 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -740,26 +740,34 @@
return ret;
}
-static void ghes_proc_in_irq(struct irq_work *irq_work)
+static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
{
- struct llist_node *llnode, *next, *tail = NULL;
- struct ghes_estatus_node *estatus_node;
- struct acpi_hest_generic *generic;
- struct acpi_hest_generic_status *estatus;
- u32 len, node_len;
+ struct llist_node *next, *tail = NULL;
- /*
- * Because the time order of estatus in list is reversed,
- * revert it back to proper order.
- */
- llnode = llist_del_all(&ghes_estatus_llist);
while (llnode) {
next = llnode->next;
llnode->next = tail;
tail = llnode;
llnode = next;
}
- llnode = tail;
+
+ return tail;
+}
+
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+ struct llist_node *llnode, *next;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
while (llnode) {
next = llnode->next;
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
@@ -779,6 +787,32 @@
}
}
+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = apei_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes, *ghes_global = NULL;
@@ -804,6 +838,7 @@
if (sev_global >= GHES_SEV_PANIC) {
oops_begin();
+ ghes_print_queued_estatus();
__ghes_print_estatus(KERN_EMERG, ghes_global->generic,
ghes_global->estatus);
/* reboot to log the error! */