blob: 5ca93a6a17621d9e90e6a29c3b3b55b7ea8eb389 [file] [log] [blame]
Kyle Yan7a59b362017-04-11 19:56:45 -07001/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
Kyle Yanddc44242016-06-20 14:42:14 -07002 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/kernel.h>
14#include <linux/edac.h>
15#include <linux/of_device.h>
16#include <linux/platform_device.h>
17#include <linux/smp.h>
18#include <linux/cpu.h>
Kyle Yan73fd1702017-07-18 15:35:13 -070019#include <linux/cpu_pm.h>
Kyle Yanddc44242016-06-20 14:42:14 -070020#include <linux/interrupt.h>
21#include <linux/of_irq.h>
22
23#include <asm/cputype.h>
24
25#include "edac_core.h"
26
27#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
28static int poll_msec = 1000;
29module_param(poll_msec, int, 0444);
30#endif
31
32#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE
33#define ARM64_ERP_PANIC_ON_CE 1
34#else
35#define ARM64_ERP_PANIC_ON_CE 0
36#endif
37
38#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE
39#define ARM64_ERP_PANIC_ON_UE 1
40#else
41#define ARM64_ERP_PANIC_ON_UE 0
42#endif
43
44#define L1 0x0
45#define L2 0x1
46#define L3 0x2
47
48#define EDAC_CPU "kryo3xx_edac"
49
50#define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1)
51#define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1)
52#define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF)
53
54#define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7)
55#define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF)
56
Kyle Yanddc44242016-06-20 14:42:14 -070057static inline void set_errxctlr_el1(void)
58{
Kyle Yane1280772016-10-10 18:32:45 -070059 u64 val = 0x10f;
Kyle Yanddc44242016-06-20 14:42:14 -070060
61 asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val));
62}
63
Kyle Yan7a59b362017-04-11 19:56:45 -070064static inline void set_errxmisc_overflow(void)
65{
Kyle Yan51096c4082017-06-19 17:25:05 -070066 u64 val = 0x7F7F00000000ULL;
Kyle Yan7a59b362017-04-11 19:56:45 -070067
68 asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (val));
69}
70
Kyle Yanddc44242016-06-20 14:42:14 -070071static inline void write_errselr_el1(u64 val)
72{
73 asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val));
74}
75
76static inline u64 read_errxstatus_el1(void)
77{
78 u64 val;
79
80 asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val));
81 return val;
82}
83
84static inline u64 read_errxmisc_el1(void)
85{
86 u64 val;
87
88 asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val));
89 return val;
90}
91
92static inline void clear_errxstatus_valid(u64 val)
93{
Kyle Yanfb736162016-10-05 17:28:20 -070094 asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val));
Kyle Yanddc44242016-06-20 14:42:14 -070095}
96
97struct errors_edac {
98 const char * const msg;
99 void (*func)(struct edac_device_ctl_info *edac_dev,
100 int inst_nr, int block_nr, const char *msg);
101};
102
103static const struct errors_edac errors[] = {
104 {"Kryo3xx L1 Correctable Error", edac_device_handle_ce },
105 {"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue },
106 {"Kryo3xx L2 Correctable Error", edac_device_handle_ce },
107 {"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue },
108 {"L3 Correctable Error", edac_device_handle_ce },
109 {"L3 Uncorrectable Error", edac_device_handle_ue },
110};
111
112#define KRYO3XX_L1_CE 0
113#define KRYO3XX_L1_UE 1
114#define KRYO3XX_L2_CE 2
115#define KRYO3XX_L2_UE 3
116#define KRYO3XX_L3_CE 4
117#define KRYO3XX_L3_UE 5
118
119#define DATA_BUF_ERR 0x2
120#define CACHE_DATA_ERR 0x6
121#define CACHE_TAG_DIRTY_ERR 0x7
Kyle Yan51096c4082017-06-19 17:25:05 -0700122#define TLB_PARITY_ERR_DATA 0x8
123#define TLB_PARITY_ERR_TAG 0x9
124#define BUS_ERROR 0x12
Kyle Yanddc44242016-06-20 14:42:14 -0700125
126struct erp_drvdata {
127 struct edac_device_ctl_info *edev_ctl;
Kyle Yane1280772016-10-10 18:32:45 -0700128 struct erp_drvdata __percpu **erp_cpu_drvdata;
Kyle Yan73fd1702017-07-18 15:35:13 -0700129 struct notifier_block nb_pm;
Kyle Yane1280772016-10-10 18:32:45 -0700130 int ppi;
Kyle Yanddc44242016-06-20 14:42:14 -0700131};
132
133static struct erp_drvdata *panic_handler_drvdata;
134
135static DEFINE_SPINLOCK(local_handler_lock);
136
Kyle Yane1280772016-10-10 18:32:45 -0700137static void l1_l2_irq_enable(void *info)
138{
139 int irq = *(int *)info;
140
141 enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH);
142}
143
Kyle Yanddc44242016-06-20 14:42:14 -0700144static int request_erp_irq(struct platform_device *pdev, const char *propname,
145 const char *desc, irq_handler_t handler,
146 void *ed, int percpu)
147{
148 int rc;
149 struct resource *r;
Kyle Yane1280772016-10-10 18:32:45 -0700150 struct erp_drvdata *drv = ed;
Kyle Yanddc44242016-06-20 14:42:14 -0700151
152 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname);
153
154 if (!r) {
155 pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n",
156 propname);
Kyle Yane1280772016-10-10 18:32:45 -0700157 goto out;
Kyle Yanddc44242016-06-20 14:42:14 -0700158 }
159
160 if (!percpu) {
161 rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL,
162 handler,
163 IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
164 desc,
165 ed);
Kyle Yanddc44242016-06-20 14:42:14 -0700166
Kyle Yane1280772016-10-10 18:32:45 -0700167 if (rc) {
168 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
169 (int) r->start, rc, propname, desc);
170 goto out;
171 }
172
173 } else {
174 drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *);
175 if (!drv->erp_cpu_drvdata) {
176 pr_err("Failed to allocate percpu erp data\n");
177 goto out;
178 }
179
180 *raw_cpu_ptr(drv->erp_cpu_drvdata) = drv;
181 rc = request_percpu_irq(r->start, handler, desc,
182 drv->erp_cpu_drvdata);
183
184 if (rc) {
185 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
186 (int) r->start, rc, propname, desc);
187 goto out_free;
188 }
189
190 drv->ppi = r->start;
191 on_each_cpu(l1_l2_irq_enable, &(r->start), 1);
Kyle Yanddc44242016-06-20 14:42:14 -0700192 }
193
194 return 0;
Kyle Yane1280772016-10-10 18:32:45 -0700195
196out_free:
197 free_percpu(drv->erp_cpu_drvdata);
198 drv->erp_cpu_drvdata = NULL;
199out:
200 return -EINVAL;
Kyle Yanddc44242016-06-20 14:42:14 -0700201}
202
203static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc,
204 struct edac_device_ctl_info *edev_ctl)
205{
206 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus);
207 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc);
208 edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1);
209
210 switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) {
211 case DATA_BUF_ERR:
212 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n");
213 break;
214
215 case CACHE_DATA_ERR:
216 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n");
217 break;
218
219 case CACHE_TAG_DIRTY_ERR:
220 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n");
221 break;
222
Kyle Yan51096c4082017-06-19 17:25:05 -0700223 case TLB_PARITY_ERR_DATA:
Kyle Yanddc44242016-06-20 14:42:14 -0700224 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n");
225 break;
226
Kyle Yan51096c4082017-06-19 17:25:05 -0700227 case TLB_PARITY_ERR_TAG:
228 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB DATA\n");
229
Kyle Yanddc44242016-06-20 14:42:14 -0700230 case BUS_ERROR:
231 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
232 break;
233 }
234
235 if (level == L3)
236 edac_printk(KERN_CRIT, EDAC_CPU,
237 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc));
238 else
239 edac_printk(KERN_CRIT, EDAC_CPU,
240 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2);
241 errors[errorcode].func(edev_ctl, smp_processor_id(),
242 level, errors[errorcode].msg);
243}
244
245static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc,
246 struct edac_device_ctl_info *edev_ctl)
247{
248 switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) {
249 case L1:
250 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
251 dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc,
252 edev_ctl);
253 else
254 dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc,
255 edev_ctl);
256 break;
257
258 case L2:
259 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
260 dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc,
261 edev_ctl);
262 else
263 dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc,
264 edev_ctl);
265 break;
266 }
267
268}
269
270static void kryo3xx_check_l1_l2_ecc(void *info)
271{
272 struct edac_device_ctl_info *edev_ctl = info;
273 u64 errxstatus = 0;
274 u64 errxmisc = 0;
275 unsigned long flags;
276
277 spin_lock_irqsave(&local_handler_lock, flags);
278 write_errselr_el1(0);
279 errxstatus = read_errxstatus_el1();
280 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) {
281 errxmisc = read_errxmisc_el1();
282 edac_printk(KERN_CRIT, EDAC_CPU,
283 "Kryo3xx CPU%d detected a L1/L2 cache error\n",
284 smp_processor_id());
285
286 kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl);
287 clear_errxstatus_valid(errxstatus);
288 }
289 spin_unlock_irqrestore(&local_handler_lock, flags);
290}
291
Kyle Yan51096c4082017-06-19 17:25:05 -0700292static bool l3_is_bus_error(u64 errxstatus)
293{
294 if (KRYO3XX_ERRXSTATUS_SERR(errxstatus) == BUS_ERROR) {
295 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
296 return true;
297 }
298
299 return false;
300}
301
Kyle Yanddc44242016-06-20 14:42:14 -0700302static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl)
303{
304 u64 errxstatus = 0;
305 u64 errxmisc = 0;
306 unsigned long flags;
307
308 spin_lock_irqsave(&local_handler_lock, flags);
309 write_errselr_el1(1);
310 errxstatus = read_errxstatus_el1();
311 errxmisc = read_errxmisc_el1();
312
313 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) &&
314 KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) {
Kyle Yan51096c4082017-06-19 17:25:05 -0700315 if (l3_is_bus_error(errxstatus)) {
316 if (edev_ctl->panic_on_ue)
317 panic("Causing panic due to Bus Error\n");
318 return;
319 }
Kyle Yanddc44242016-06-20 14:42:14 -0700320 if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) {
321 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n");
322 dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc,
323 edev_ctl);
324 } else {
325 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n");
326 dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc,
327 edev_ctl);
328 }
329
330 clear_errxstatus_valid(errxstatus);
331 }
332 spin_unlock_irqrestore(&local_handler_lock, flags);
333}
334
335void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl)
336{
337 int cpu;
338
339 if (edev_ctl == NULL)
340 edev_ctl = panic_handler_drvdata->edev_ctl;
341
342 kryo3xx_check_l3_scu_error(edev_ctl);
343 for_each_possible_cpu(cpu)
344 smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc,
345 edev_ctl, 0);
346}
347
348static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata)
349{
Kyle Yan7a59b362017-04-11 19:56:45 -0700350 kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
Kyle Yanddc44242016-06-20 14:42:14 -0700351 return IRQ_HANDLED;
352}
353
354static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata)
355{
356 struct erp_drvdata *drv = drvdata;
357 struct edac_device_ctl_info *edev_ctl = drv->edev_ctl;
358
359 kryo3xx_check_l3_scu_error(edev_ctl);
360 return IRQ_HANDLED;
361}
362
Kyle Yan73fd1702017-07-18 15:35:13 -0700363static int kryo3xx_pmu_cpu_pm_notify(struct notifier_block *self,
364 unsigned long action, void *v)
365{
366 switch (action) {
367 case CPU_PM_EXIT:
368 kryo3xx_check_l3_scu_error(panic_handler_drvdata->edev_ctl);
369 kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
370 break;
371 }
372
373 return NOTIFY_OK;
374}
375
Kyle Yan7a59b362017-04-11 19:56:45 -0700376static void initialize_registers(void *info)
377{
378 set_errxctlr_el1();
379 set_errxmisc_overflow();
380}
381
Kyle Yanddc44242016-06-20 14:42:14 -0700382static int kryo3xx_cpu_erp_probe(struct platform_device *pdev)
383{
384 struct device *dev = &pdev->dev;
385 struct erp_drvdata *drv;
386 int rc = 0;
387 int fail = 0;
Kyle Yan7a59b362017-04-11 19:56:45 -0700388 int cpu;
Kyle Yanddc44242016-06-20 14:42:14 -0700389
Kyle Yan7a59b362017-04-11 19:56:45 -0700390 for_each_possible_cpu(cpu)
391 smp_call_function_single(cpu, initialize_registers, NULL, 1);
392
393
Kyle Yanddc44242016-06-20 14:42:14 -0700394 drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
395
396 if (!drv)
397 return -ENOMEM;
398
399 drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu",
400 num_possible_cpus(), "L", 3, 1, NULL, 0,
401 edac_device_alloc_index());
402
403 if (!drv->edev_ctl)
404 return -ENOMEM;
405
406 #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
407 drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors;
408 drv->edev_ctl->poll_msec = poll_msec;
409 drv->edev_ctl->defer_work = 1;
410 #endif
411
412 drv->edev_ctl->dev = dev;
413 drv->edev_ctl->mod_name = dev_name(dev);
414 drv->edev_ctl->dev_name = dev_name(dev);
415 drv->edev_ctl->ctl_name = "cache";
416 drv->edev_ctl->panic_on_ce = ARM64_ERP_PANIC_ON_CE;
417 drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
Kyle Yan73fd1702017-07-18 15:35:13 -0700418 drv->nb_pm.notifier_call = kryo3xx_pmu_cpu_pm_notify;
Kyle Yanddc44242016-06-20 14:42:14 -0700419 platform_set_drvdata(pdev, drv);
420
421 rc = edac_device_add_device(drv->edev_ctl);
422 if (rc)
423 goto out_mem;
424
425 panic_handler_drvdata = drv;
426
427 if (request_erp_irq(pdev, "l1-l2-faultirq",
428 "KRYO3XX L1-L2 ECC FAULTIRQ",
429 kryo3xx_l1_l2_handler, drv, 1))
430 fail++;
431
432 if (request_erp_irq(pdev, "l3-scu-faultirq",
433 "KRYO3XX L3-SCU ECC FAULTIRQ",
434 kryo3xx_l3_scu_handler, drv, 0))
435 fail++;
436
437 if (fail == of_irq_count(dev->of_node)) {
438 pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n");
439 rc = -ENODEV;
440 goto out_dev;
441 }
442
Kyle Yan73fd1702017-07-18 15:35:13 -0700443 cpu_pm_register_notifier(&(drv->nb_pm));
444
Kyle Yanddc44242016-06-20 14:42:14 -0700445 return 0;
446
447out_dev:
448 edac_device_del_device(dev);
449out_mem:
450 edac_device_free_ctl_info(drv->edev_ctl);
451 return rc;
452}
453
454static int kryo3xx_cpu_erp_remove(struct platform_device *pdev)
455{
456 struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
457 struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;
458
Kyle Yane1280772016-10-10 18:32:45 -0700459
460 if (drv->erp_cpu_drvdata != NULL) {
461 free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata);
462 free_percpu(drv->erp_cpu_drvdata);
463 }
464
Kyle Yanddc44242016-06-20 14:42:14 -0700465 edac_device_del_device(edac_ctl->dev);
466 edac_device_free_ctl_info(edac_ctl);
467
468 return 0;
469}
470
471static const struct of_device_id kryo3xx_cpu_erp_match_table[] = {
472 { .compatible = "arm,arm64-kryo3xx-cpu-erp" },
473 { }
474};
475
476static struct platform_driver kryo3xx_cpu_erp_driver = {
477 .probe = kryo3xx_cpu_erp_probe,
478 .remove = kryo3xx_cpu_erp_remove,
479 .driver = {
480 .name = "kryo3xx_cpu_cache_erp",
481 .owner = THIS_MODULE,
482 .of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table),
483 },
484};
485
486static int __init kryo3xx_cpu_erp_init(void)
487{
488 return platform_driver_register(&kryo3xx_cpu_erp_driver);
489}
490module_init(kryo3xx_cpu_erp_init);
491
492static void __exit kryo3xx_cpu_erp_exit(void)
493{
494 platform_driver_unregister(&kryo3xx_cpu_erp_driver);
495}
496module_exit(kryo3xx_cpu_erp_exit);
497
498MODULE_LICENSE("GPL v2");
499MODULE_DESCRIPTION("Kryo3xx EDAC driver");