blob: cf3fdde9337edc7d03bb02482e35d32a2b21d23b [file] [log] [blame]
Kyle Yan7a59b362017-04-11 19:56:45 -07001/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
Kyle Yanddc44242016-06-20 14:42:14 -07002 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/kernel.h>
14#include <linux/edac.h>
15#include <linux/of_device.h>
16#include <linux/platform_device.h>
17#include <linux/smp.h>
18#include <linux/cpu.h>
Kyle Yan73fd1702017-07-18 15:35:13 -070019#include <linux/cpu_pm.h>
Kyle Yanddc44242016-06-20 14:42:14 -070020#include <linux/interrupt.h>
21#include <linux/of_irq.h>
22
23#include <asm/cputype.h>
24
25#include "edac_core.h"
26
27#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
28static int poll_msec = 1000;
29module_param(poll_msec, int, 0444);
30#endif
31
32#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE
Kyle Yan61068972017-11-08 16:43:13 -080033static bool panic_on_ce = 1;
Kyle Yanddc44242016-06-20 14:42:14 -070034#else
Kyle Yan61068972017-11-08 16:43:13 -080035static bool panic_on_ce;
Kyle Yanddc44242016-06-20 14:42:14 -070036#endif
Kyle Yan61068972017-11-08 16:43:13 -080037module_param_named(panic_on_ce, panic_on_ce, bool, 0664);
Kyle Yanddc44242016-06-20 14:42:14 -070038
39#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE
40#define ARM64_ERP_PANIC_ON_UE 1
41#else
42#define ARM64_ERP_PANIC_ON_UE 0
43#endif
44
45#define L1 0x0
46#define L2 0x1
47#define L3 0x2
48
49#define EDAC_CPU "kryo3xx_edac"
50
51#define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1)
52#define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1)
53#define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF)
54
55#define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7)
56#define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF)
57
Kyle Yanddc44242016-06-20 14:42:14 -070058static inline void set_errxctlr_el1(void)
59{
Kyle Yane1280772016-10-10 18:32:45 -070060 u64 val = 0x10f;
Kyle Yanddc44242016-06-20 14:42:14 -070061
62 asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val));
63}
64
Kyle Yan7a59b362017-04-11 19:56:45 -070065static inline void set_errxmisc_overflow(void)
66{
Kyle Yan51096c4082017-06-19 17:25:05 -070067 u64 val = 0x7F7F00000000ULL;
Kyle Yan7a59b362017-04-11 19:56:45 -070068
69 asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (val));
70}
71
Kyle Yanddc44242016-06-20 14:42:14 -070072static inline void write_errselr_el1(u64 val)
73{
74 asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val));
75}
76
77static inline u64 read_errxstatus_el1(void)
78{
79 u64 val;
80
81 asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val));
82 return val;
83}
84
85static inline u64 read_errxmisc_el1(void)
86{
87 u64 val;
88
89 asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val));
90 return val;
91}
92
93static inline void clear_errxstatus_valid(u64 val)
94{
Kyle Yanfb736162016-10-05 17:28:20 -070095 asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val));
Kyle Yanddc44242016-06-20 14:42:14 -070096}
97
98struct errors_edac {
99 const char * const msg;
100 void (*func)(struct edac_device_ctl_info *edac_dev,
101 int inst_nr, int block_nr, const char *msg);
102};
103
104static const struct errors_edac errors[] = {
105 {"Kryo3xx L1 Correctable Error", edac_device_handle_ce },
106 {"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue },
107 {"Kryo3xx L2 Correctable Error", edac_device_handle_ce },
108 {"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue },
109 {"L3 Correctable Error", edac_device_handle_ce },
110 {"L3 Uncorrectable Error", edac_device_handle_ue },
111};
112
113#define KRYO3XX_L1_CE 0
114#define KRYO3XX_L1_UE 1
115#define KRYO3XX_L2_CE 2
116#define KRYO3XX_L2_UE 3
117#define KRYO3XX_L3_CE 4
118#define KRYO3XX_L3_UE 5
119
120#define DATA_BUF_ERR 0x2
121#define CACHE_DATA_ERR 0x6
122#define CACHE_TAG_DIRTY_ERR 0x7
Kyle Yan51096c4082017-06-19 17:25:05 -0700123#define TLB_PARITY_ERR_DATA 0x8
124#define TLB_PARITY_ERR_TAG 0x9
125#define BUS_ERROR 0x12
Kyle Yanddc44242016-06-20 14:42:14 -0700126
127struct erp_drvdata {
128 struct edac_device_ctl_info *edev_ctl;
Kyle Yane1280772016-10-10 18:32:45 -0700129 struct erp_drvdata __percpu **erp_cpu_drvdata;
Kyle Yan73fd1702017-07-18 15:35:13 -0700130 struct notifier_block nb_pm;
Kyle Yane1280772016-10-10 18:32:45 -0700131 int ppi;
Kyle Yanddc44242016-06-20 14:42:14 -0700132};
133
134static struct erp_drvdata *panic_handler_drvdata;
135
136static DEFINE_SPINLOCK(local_handler_lock);
137
Kyle Yane1280772016-10-10 18:32:45 -0700138static void l1_l2_irq_enable(void *info)
139{
140 int irq = *(int *)info;
141
142 enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH);
143}
144
Kyle Yanddc44242016-06-20 14:42:14 -0700145static int request_erp_irq(struct platform_device *pdev, const char *propname,
146 const char *desc, irq_handler_t handler,
147 void *ed, int percpu)
148{
149 int rc;
150 struct resource *r;
Kyle Yane1280772016-10-10 18:32:45 -0700151 struct erp_drvdata *drv = ed;
Kyle Yanddc44242016-06-20 14:42:14 -0700152
153 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname);
154
155 if (!r) {
156 pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n",
157 propname);
Kyle Yane1280772016-10-10 18:32:45 -0700158 goto out;
Kyle Yanddc44242016-06-20 14:42:14 -0700159 }
160
161 if (!percpu) {
162 rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL,
163 handler,
164 IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
165 desc,
166 ed);
Kyle Yanddc44242016-06-20 14:42:14 -0700167
Kyle Yane1280772016-10-10 18:32:45 -0700168 if (rc) {
169 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
170 (int) r->start, rc, propname, desc);
171 goto out;
172 }
173
174 } else {
175 drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *);
176 if (!drv->erp_cpu_drvdata) {
177 pr_err("Failed to allocate percpu erp data\n");
178 goto out;
179 }
180
181 *raw_cpu_ptr(drv->erp_cpu_drvdata) = drv;
182 rc = request_percpu_irq(r->start, handler, desc,
183 drv->erp_cpu_drvdata);
184
185 if (rc) {
186 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
187 (int) r->start, rc, propname, desc);
188 goto out_free;
189 }
190
191 drv->ppi = r->start;
192 on_each_cpu(l1_l2_irq_enable, &(r->start), 1);
Kyle Yanddc44242016-06-20 14:42:14 -0700193 }
194
195 return 0;
Kyle Yane1280772016-10-10 18:32:45 -0700196
197out_free:
198 free_percpu(drv->erp_cpu_drvdata);
199 drv->erp_cpu_drvdata = NULL;
200out:
201 return -EINVAL;
Kyle Yanddc44242016-06-20 14:42:14 -0700202}
203
204static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc,
205 struct edac_device_ctl_info *edev_ctl)
206{
207 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus);
208 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc);
209 edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1);
210
211 switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) {
212 case DATA_BUF_ERR:
213 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n");
214 break;
215
216 case CACHE_DATA_ERR:
217 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n");
218 break;
219
220 case CACHE_TAG_DIRTY_ERR:
221 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n");
222 break;
223
Kyle Yan51096c4082017-06-19 17:25:05 -0700224 case TLB_PARITY_ERR_DATA:
Kyle Yanddc44242016-06-20 14:42:14 -0700225 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n");
226 break;
227
Kyle Yan51096c4082017-06-19 17:25:05 -0700228 case TLB_PARITY_ERR_TAG:
229 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB DATA\n");
230
Kyle Yanddc44242016-06-20 14:42:14 -0700231 case BUS_ERROR:
232 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
233 break;
234 }
235
236 if (level == L3)
237 edac_printk(KERN_CRIT, EDAC_CPU,
238 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc));
239 else
240 edac_printk(KERN_CRIT, EDAC_CPU,
241 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2);
Kyle Yan61068972017-11-08 16:43:13 -0800242
243 edev_ctl->panic_on_ce = panic_on_ce;
Kyle Yanddc44242016-06-20 14:42:14 -0700244 errors[errorcode].func(edev_ctl, smp_processor_id(),
245 level, errors[errorcode].msg);
246}
247
248static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc,
249 struct edac_device_ctl_info *edev_ctl)
250{
251 switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) {
252 case L1:
253 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
254 dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc,
255 edev_ctl);
256 else
257 dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc,
258 edev_ctl);
259 break;
260
261 case L2:
262 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
263 dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc,
264 edev_ctl);
265 else
266 dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc,
267 edev_ctl);
268 break;
269 }
270
271}
272
273static void kryo3xx_check_l1_l2_ecc(void *info)
274{
275 struct edac_device_ctl_info *edev_ctl = info;
276 u64 errxstatus = 0;
277 u64 errxmisc = 0;
278 unsigned long flags;
279
280 spin_lock_irqsave(&local_handler_lock, flags);
281 write_errselr_el1(0);
282 errxstatus = read_errxstatus_el1();
283 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) {
284 errxmisc = read_errxmisc_el1();
285 edac_printk(KERN_CRIT, EDAC_CPU,
286 "Kryo3xx CPU%d detected a L1/L2 cache error\n",
287 smp_processor_id());
288
289 kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl);
290 clear_errxstatus_valid(errxstatus);
291 }
292 spin_unlock_irqrestore(&local_handler_lock, flags);
293}
294
Kyle Yan51096c4082017-06-19 17:25:05 -0700295static bool l3_is_bus_error(u64 errxstatus)
296{
297 if (KRYO3XX_ERRXSTATUS_SERR(errxstatus) == BUS_ERROR) {
298 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
299 return true;
300 }
301
302 return false;
303}
304
Kyle Yanddc44242016-06-20 14:42:14 -0700305static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl)
306{
307 u64 errxstatus = 0;
308 u64 errxmisc = 0;
309 unsigned long flags;
310
311 spin_lock_irqsave(&local_handler_lock, flags);
312 write_errselr_el1(1);
313 errxstatus = read_errxstatus_el1();
314 errxmisc = read_errxmisc_el1();
315
316 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) &&
317 KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) {
Kyle Yan51096c4082017-06-19 17:25:05 -0700318 if (l3_is_bus_error(errxstatus)) {
319 if (edev_ctl->panic_on_ue)
320 panic("Causing panic due to Bus Error\n");
321 return;
322 }
Kyle Yanddc44242016-06-20 14:42:14 -0700323 if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) {
324 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n");
325 dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc,
326 edev_ctl);
327 } else {
328 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n");
329 dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc,
330 edev_ctl);
331 }
332
333 clear_errxstatus_valid(errxstatus);
334 }
335 spin_unlock_irqrestore(&local_handler_lock, flags);
336}
337
338void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl)
339{
340 int cpu;
341
342 if (edev_ctl == NULL)
343 edev_ctl = panic_handler_drvdata->edev_ctl;
344
345 kryo3xx_check_l3_scu_error(edev_ctl);
346 for_each_possible_cpu(cpu)
347 smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc,
348 edev_ctl, 0);
349}
350
351static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata)
352{
Kyle Yan7a59b362017-04-11 19:56:45 -0700353 kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
Kyle Yanddc44242016-06-20 14:42:14 -0700354 return IRQ_HANDLED;
355}
356
357static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata)
358{
359 struct erp_drvdata *drv = drvdata;
360 struct edac_device_ctl_info *edev_ctl = drv->edev_ctl;
361
362 kryo3xx_check_l3_scu_error(edev_ctl);
363 return IRQ_HANDLED;
364}
365
Kyle Yancbe97e82017-11-06 10:36:23 -0800366static void initialize_registers(void *info)
367{
368 set_errxctlr_el1();
369 set_errxmisc_overflow();
370}
371
372static void init_regs_on_cpu(bool all_cpus)
373{
374 int cpu;
375
376 write_errselr_el1(0);
377 if (all_cpus) {
378 for_each_possible_cpu(cpu)
379 smp_call_function_single(cpu, initialize_registers,
380 NULL, 1);
381 } else
382 initialize_registers(NULL);
383
384 write_errselr_el1(1);
385 initialize_registers(NULL);
386}
387
Kyle Yan73fd1702017-07-18 15:35:13 -0700388static int kryo3xx_pmu_cpu_pm_notify(struct notifier_block *self,
389 unsigned long action, void *v)
390{
391 switch (action) {
392 case CPU_PM_EXIT:
Kyle Yancbe97e82017-11-06 10:36:23 -0800393 init_regs_on_cpu(false);
Kyle Yan73fd1702017-07-18 15:35:13 -0700394 kryo3xx_check_l3_scu_error(panic_handler_drvdata->edev_ctl);
395 kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
396 break;
397 }
398
399 return NOTIFY_OK;
400}
401
Kyle Yanddc44242016-06-20 14:42:14 -0700402static int kryo3xx_cpu_erp_probe(struct platform_device *pdev)
403{
404 struct device *dev = &pdev->dev;
405 struct erp_drvdata *drv;
406 int rc = 0;
407 int fail = 0;
408
Kyle Yancbe97e82017-11-06 10:36:23 -0800409 init_regs_on_cpu(true);
Kyle Yan7a59b362017-04-11 19:56:45 -0700410
Kyle Yanddc44242016-06-20 14:42:14 -0700411 drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
412
413 if (!drv)
414 return -ENOMEM;
415
416 drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu",
417 num_possible_cpus(), "L", 3, 1, NULL, 0,
418 edac_device_alloc_index());
419
420 if (!drv->edev_ctl)
421 return -ENOMEM;
422
423 #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
424 drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors;
425 drv->edev_ctl->poll_msec = poll_msec;
426 drv->edev_ctl->defer_work = 1;
427 #endif
428
429 drv->edev_ctl->dev = dev;
430 drv->edev_ctl->mod_name = dev_name(dev);
431 drv->edev_ctl->dev_name = dev_name(dev);
432 drv->edev_ctl->ctl_name = "cache";
Kyle Yan61068972017-11-08 16:43:13 -0800433 drv->edev_ctl->panic_on_ce = panic_on_ce;
Kyle Yanddc44242016-06-20 14:42:14 -0700434 drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
Kyle Yan73fd1702017-07-18 15:35:13 -0700435 drv->nb_pm.notifier_call = kryo3xx_pmu_cpu_pm_notify;
Kyle Yanddc44242016-06-20 14:42:14 -0700436 platform_set_drvdata(pdev, drv);
437
438 rc = edac_device_add_device(drv->edev_ctl);
439 if (rc)
440 goto out_mem;
441
442 panic_handler_drvdata = drv;
443
444 if (request_erp_irq(pdev, "l1-l2-faultirq",
445 "KRYO3XX L1-L2 ECC FAULTIRQ",
446 kryo3xx_l1_l2_handler, drv, 1))
447 fail++;
448
449 if (request_erp_irq(pdev, "l3-scu-faultirq",
450 "KRYO3XX L3-SCU ECC FAULTIRQ",
451 kryo3xx_l3_scu_handler, drv, 0))
452 fail++;
453
454 if (fail == of_irq_count(dev->of_node)) {
455 pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n");
456 rc = -ENODEV;
457 goto out_dev;
458 }
459
Kyle Yan73fd1702017-07-18 15:35:13 -0700460 cpu_pm_register_notifier(&(drv->nb_pm));
461
Kyle Yanddc44242016-06-20 14:42:14 -0700462 return 0;
463
464out_dev:
465 edac_device_del_device(dev);
466out_mem:
467 edac_device_free_ctl_info(drv->edev_ctl);
468 return rc;
469}
470
471static int kryo3xx_cpu_erp_remove(struct platform_device *pdev)
472{
473 struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
474 struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;
475
Kyle Yane1280772016-10-10 18:32:45 -0700476
477 if (drv->erp_cpu_drvdata != NULL) {
478 free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata);
479 free_percpu(drv->erp_cpu_drvdata);
480 }
481
Kyle Yanddc44242016-06-20 14:42:14 -0700482 edac_device_del_device(edac_ctl->dev);
483 edac_device_free_ctl_info(edac_ctl);
484
485 return 0;
486}
487
488static const struct of_device_id kryo3xx_cpu_erp_match_table[] = {
489 { .compatible = "arm,arm64-kryo3xx-cpu-erp" },
490 { }
491};
492
493static struct platform_driver kryo3xx_cpu_erp_driver = {
494 .probe = kryo3xx_cpu_erp_probe,
495 .remove = kryo3xx_cpu_erp_remove,
496 .driver = {
497 .name = "kryo3xx_cpu_cache_erp",
498 .owner = THIS_MODULE,
499 .of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table),
500 },
501};
502
503static int __init kryo3xx_cpu_erp_init(void)
504{
505 return platform_driver_register(&kryo3xx_cpu_erp_driver);
506}
507module_init(kryo3xx_cpu_erp_init);
508
509static void __exit kryo3xx_cpu_erp_exit(void)
510{
511 platform_driver_unregister(&kryo3xx_cpu_erp_driver);
512}
513module_exit(kryo3xx_cpu_erp_exit);
514
515MODULE_LICENSE("GPL v2");
516MODULE_DESCRIPTION("Kryo3xx EDAC driver");