blob: cf3fdde9337edc7d03bb02482e35d32a2b21d23b [file] [log] [blame]
/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/edac.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/interrupt.h>
#include <linux/of_irq.h>
#include <asm/cputype.h>
#include "edac_core.h"
#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
static int poll_msec = 1000;
module_param(poll_msec, int, 0444);
#endif
#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE
static bool panic_on_ce = 1;
#else
static bool panic_on_ce;
#endif
module_param_named(panic_on_ce, panic_on_ce, bool, 0664);
#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE
#define ARM64_ERP_PANIC_ON_UE 1
#else
#define ARM64_ERP_PANIC_ON_UE 0
#endif
#define L1 0x0
#define L2 0x1
#define L3 0x2
#define EDAC_CPU "kryo3xx_edac"
#define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1)
#define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1)
#define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF)
#define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7)
#define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF)
static inline void set_errxctlr_el1(void)
{
u64 val = 0x10f;
asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val));
}
static inline void set_errxmisc_overflow(void)
{
u64 val = 0x7F7F00000000ULL;
asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (val));
}
static inline void write_errselr_el1(u64 val)
{
asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val));
}
static inline u64 read_errxstatus_el1(void)
{
u64 val;
asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val));
return val;
}
static inline u64 read_errxmisc_el1(void)
{
u64 val;
asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val));
return val;
}
static inline void clear_errxstatus_valid(u64 val)
{
asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val));
}
struct errors_edac {
const char * const msg;
void (*func)(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
};
static const struct errors_edac errors[] = {
{"Kryo3xx L1 Correctable Error", edac_device_handle_ce },
{"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue },
{"Kryo3xx L2 Correctable Error", edac_device_handle_ce },
{"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue },
{"L3 Correctable Error", edac_device_handle_ce },
{"L3 Uncorrectable Error", edac_device_handle_ue },
};
#define KRYO3XX_L1_CE 0
#define KRYO3XX_L1_UE 1
#define KRYO3XX_L2_CE 2
#define KRYO3XX_L2_UE 3
#define KRYO3XX_L3_CE 4
#define KRYO3XX_L3_UE 5
#define DATA_BUF_ERR 0x2
#define CACHE_DATA_ERR 0x6
#define CACHE_TAG_DIRTY_ERR 0x7
#define TLB_PARITY_ERR_DATA 0x8
#define TLB_PARITY_ERR_TAG 0x9
#define BUS_ERROR 0x12
struct erp_drvdata {
struct edac_device_ctl_info *edev_ctl;
struct erp_drvdata __percpu **erp_cpu_drvdata;
struct notifier_block nb_pm;
int ppi;
};
static struct erp_drvdata *panic_handler_drvdata;
static DEFINE_SPINLOCK(local_handler_lock);
static void l1_l2_irq_enable(void *info)
{
int irq = *(int *)info;
enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH);
}
static int request_erp_irq(struct platform_device *pdev, const char *propname,
const char *desc, irq_handler_t handler,
void *ed, int percpu)
{
int rc;
struct resource *r;
struct erp_drvdata *drv = ed;
r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname);
if (!r) {
pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n",
propname);
goto out;
}
if (!percpu) {
rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL,
handler,
IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
desc,
ed);
if (rc) {
pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
(int) r->start, rc, propname, desc);
goto out;
}
} else {
drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *);
if (!drv->erp_cpu_drvdata) {
pr_err("Failed to allocate percpu erp data\n");
goto out;
}
*raw_cpu_ptr(drv->erp_cpu_drvdata) = drv;
rc = request_percpu_irq(r->start, handler, desc,
drv->erp_cpu_drvdata);
if (rc) {
pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
(int) r->start, rc, propname, desc);
goto out_free;
}
drv->ppi = r->start;
on_each_cpu(l1_l2_irq_enable, &(r->start), 1);
}
return 0;
out_free:
free_percpu(drv->erp_cpu_drvdata);
drv->erp_cpu_drvdata = NULL;
out:
return -EINVAL;
}
static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc,
struct edac_device_ctl_info *edev_ctl)
{
edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus);
edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc);
edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1);
switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) {
case DATA_BUF_ERR:
edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n");
break;
case CACHE_DATA_ERR:
edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n");
break;
case CACHE_TAG_DIRTY_ERR:
edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n");
break;
case TLB_PARITY_ERR_DATA:
edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n");
break;
case TLB_PARITY_ERR_TAG:
edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB DATA\n");
case BUS_ERROR:
edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
break;
}
if (level == L3)
edac_printk(KERN_CRIT, EDAC_CPU,
"Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc));
else
edac_printk(KERN_CRIT, EDAC_CPU,
"Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2);
edev_ctl->panic_on_ce = panic_on_ce;
errors[errorcode].func(edev_ctl, smp_processor_id(),
level, errors[errorcode].msg);
}
static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc,
struct edac_device_ctl_info *edev_ctl)
{
switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) {
case L1:
if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc,
edev_ctl);
else
dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc,
edev_ctl);
break;
case L2:
if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc,
edev_ctl);
else
dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc,
edev_ctl);
break;
}
}
static void kryo3xx_check_l1_l2_ecc(void *info)
{
struct edac_device_ctl_info *edev_ctl = info;
u64 errxstatus = 0;
u64 errxmisc = 0;
unsigned long flags;
spin_lock_irqsave(&local_handler_lock, flags);
write_errselr_el1(0);
errxstatus = read_errxstatus_el1();
if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) {
errxmisc = read_errxmisc_el1();
edac_printk(KERN_CRIT, EDAC_CPU,
"Kryo3xx CPU%d detected a L1/L2 cache error\n",
smp_processor_id());
kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl);
clear_errxstatus_valid(errxstatus);
}
spin_unlock_irqrestore(&local_handler_lock, flags);
}
static bool l3_is_bus_error(u64 errxstatus)
{
if (KRYO3XX_ERRXSTATUS_SERR(errxstatus) == BUS_ERROR) {
edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
return true;
}
return false;
}
static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl)
{
u64 errxstatus = 0;
u64 errxmisc = 0;
unsigned long flags;
spin_lock_irqsave(&local_handler_lock, flags);
write_errselr_el1(1);
errxstatus = read_errxstatus_el1();
errxmisc = read_errxmisc_el1();
if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) &&
KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) {
if (l3_is_bus_error(errxstatus)) {
if (edev_ctl->panic_on_ue)
panic("Causing panic due to Bus Error\n");
return;
}
if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) {
edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n");
dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc,
edev_ctl);
} else {
edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n");
dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc,
edev_ctl);
}
clear_errxstatus_valid(errxstatus);
}
spin_unlock_irqrestore(&local_handler_lock, flags);
}
void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl)
{
int cpu;
if (edev_ctl == NULL)
edev_ctl = panic_handler_drvdata->edev_ctl;
kryo3xx_check_l3_scu_error(edev_ctl);
for_each_possible_cpu(cpu)
smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc,
edev_ctl, 0);
}
static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata)
{
kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
return IRQ_HANDLED;
}
static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata)
{
struct erp_drvdata *drv = drvdata;
struct edac_device_ctl_info *edev_ctl = drv->edev_ctl;
kryo3xx_check_l3_scu_error(edev_ctl);
return IRQ_HANDLED;
}
static void initialize_registers(void *info)
{
set_errxctlr_el1();
set_errxmisc_overflow();
}
static void init_regs_on_cpu(bool all_cpus)
{
int cpu;
write_errselr_el1(0);
if (all_cpus) {
for_each_possible_cpu(cpu)
smp_call_function_single(cpu, initialize_registers,
NULL, 1);
} else
initialize_registers(NULL);
write_errselr_el1(1);
initialize_registers(NULL);
}
static int kryo3xx_pmu_cpu_pm_notify(struct notifier_block *self,
unsigned long action, void *v)
{
switch (action) {
case CPU_PM_EXIT:
init_regs_on_cpu(false);
kryo3xx_check_l3_scu_error(panic_handler_drvdata->edev_ctl);
kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
break;
}
return NOTIFY_OK;
}
static int kryo3xx_cpu_erp_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct erp_drvdata *drv;
int rc = 0;
int fail = 0;
init_regs_on_cpu(true);
drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
if (!drv)
return -ENOMEM;
drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu",
num_possible_cpus(), "L", 3, 1, NULL, 0,
edac_device_alloc_index());
if (!drv->edev_ctl)
return -ENOMEM;
#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors;
drv->edev_ctl->poll_msec = poll_msec;
drv->edev_ctl->defer_work = 1;
#endif
drv->edev_ctl->dev = dev;
drv->edev_ctl->mod_name = dev_name(dev);
drv->edev_ctl->dev_name = dev_name(dev);
drv->edev_ctl->ctl_name = "cache";
drv->edev_ctl->panic_on_ce = panic_on_ce;
drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
drv->nb_pm.notifier_call = kryo3xx_pmu_cpu_pm_notify;
platform_set_drvdata(pdev, drv);
rc = edac_device_add_device(drv->edev_ctl);
if (rc)
goto out_mem;
panic_handler_drvdata = drv;
if (request_erp_irq(pdev, "l1-l2-faultirq",
"KRYO3XX L1-L2 ECC FAULTIRQ",
kryo3xx_l1_l2_handler, drv, 1))
fail++;
if (request_erp_irq(pdev, "l3-scu-faultirq",
"KRYO3XX L3-SCU ECC FAULTIRQ",
kryo3xx_l3_scu_handler, drv, 0))
fail++;
if (fail == of_irq_count(dev->of_node)) {
pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n");
rc = -ENODEV;
goto out_dev;
}
cpu_pm_register_notifier(&(drv->nb_pm));
return 0;
out_dev:
edac_device_del_device(dev);
out_mem:
edac_device_free_ctl_info(drv->edev_ctl);
return rc;
}
static int kryo3xx_cpu_erp_remove(struct platform_device *pdev)
{
struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;
if (drv->erp_cpu_drvdata != NULL) {
free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata);
free_percpu(drv->erp_cpu_drvdata);
}
edac_device_del_device(edac_ctl->dev);
edac_device_free_ctl_info(edac_ctl);
return 0;
}
static const struct of_device_id kryo3xx_cpu_erp_match_table[] = {
{ .compatible = "arm,arm64-kryo3xx-cpu-erp" },
{ }
};
static struct platform_driver kryo3xx_cpu_erp_driver = {
.probe = kryo3xx_cpu_erp_probe,
.remove = kryo3xx_cpu_erp_remove,
.driver = {
.name = "kryo3xx_cpu_cache_erp",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table),
},
};
static int __init kryo3xx_cpu_erp_init(void)
{
return platform_driver_register(&kryo3xx_cpu_erp_driver);
}
module_init(kryo3xx_cpu_erp_init);
static void __exit kryo3xx_cpu_erp_exit(void)
{
platform_driver_unregister(&kryo3xx_cpu_erp_driver);
}
module_exit(kryo3xx_cpu_erp_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Kryo3xx EDAC driver");