blob: f5bb3ed2bb65b1010a026255b7c13306f9f0b9d8 [file] [log] [blame]
Kyle Yan7a59b362017-04-11 19:56:45 -07001/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
Kyle Yanddc44242016-06-20 14:42:14 -07002 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/kernel.h>
14#include <linux/edac.h>
15#include <linux/of_device.h>
16#include <linux/platform_device.h>
17#include <linux/smp.h>
18#include <linux/cpu.h>
19#include <linux/interrupt.h>
20#include <linux/of_irq.h>
21
22#include <asm/cputype.h>
23
24#include "edac_core.h"
25
26#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
27static int poll_msec = 1000;
28module_param(poll_msec, int, 0444);
29#endif
30
31#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE
32#define ARM64_ERP_PANIC_ON_CE 1
33#else
34#define ARM64_ERP_PANIC_ON_CE 0
35#endif
36
37#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE
38#define ARM64_ERP_PANIC_ON_UE 1
39#else
40#define ARM64_ERP_PANIC_ON_UE 0
41#endif
42
43#define L1 0x0
44#define L2 0x1
45#define L3 0x2
46
47#define EDAC_CPU "kryo3xx_edac"
48
49#define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1)
50#define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1)
51#define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF)
52
53#define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7)
54#define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF)
55
Kyle Yanddc44242016-06-20 14:42:14 -070056static inline void set_errxctlr_el1(void)
57{
Kyle Yane1280772016-10-10 18:32:45 -070058 u64 val = 0x10f;
Kyle Yanddc44242016-06-20 14:42:14 -070059
60 asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val));
61}
62
Kyle Yan7a59b362017-04-11 19:56:45 -070063static inline void set_errxmisc_overflow(void)
64{
Kyle Yan51096c4082017-06-19 17:25:05 -070065 u64 val = 0x7F7F00000000ULL;
Kyle Yan7a59b362017-04-11 19:56:45 -070066
67 asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (val));
68}
69
Kyle Yanddc44242016-06-20 14:42:14 -070070static inline void write_errselr_el1(u64 val)
71{
72 asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val));
73}
74
75static inline u64 read_errxstatus_el1(void)
76{
77 u64 val;
78
79 asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val));
80 return val;
81}
82
83static inline u64 read_errxmisc_el1(void)
84{
85 u64 val;
86
87 asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val));
88 return val;
89}
90
91static inline void clear_errxstatus_valid(u64 val)
92{
Kyle Yanfb736162016-10-05 17:28:20 -070093 asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val));
Kyle Yanddc44242016-06-20 14:42:14 -070094}
95
96struct errors_edac {
97 const char * const msg;
98 void (*func)(struct edac_device_ctl_info *edac_dev,
99 int inst_nr, int block_nr, const char *msg);
100};
101
102static const struct errors_edac errors[] = {
103 {"Kryo3xx L1 Correctable Error", edac_device_handle_ce },
104 {"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue },
105 {"Kryo3xx L2 Correctable Error", edac_device_handle_ce },
106 {"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue },
107 {"L3 Correctable Error", edac_device_handle_ce },
108 {"L3 Uncorrectable Error", edac_device_handle_ue },
109};
110
111#define KRYO3XX_L1_CE 0
112#define KRYO3XX_L1_UE 1
113#define KRYO3XX_L2_CE 2
114#define KRYO3XX_L2_UE 3
115#define KRYO3XX_L3_CE 4
116#define KRYO3XX_L3_UE 5
117
118#define DATA_BUF_ERR 0x2
119#define CACHE_DATA_ERR 0x6
120#define CACHE_TAG_DIRTY_ERR 0x7
Kyle Yan51096c4082017-06-19 17:25:05 -0700121#define TLB_PARITY_ERR_DATA 0x8
122#define TLB_PARITY_ERR_TAG 0x9
123#define BUS_ERROR 0x12
Kyle Yanddc44242016-06-20 14:42:14 -0700124
125struct erp_drvdata {
126 struct edac_device_ctl_info *edev_ctl;
Kyle Yane1280772016-10-10 18:32:45 -0700127 struct erp_drvdata __percpu **erp_cpu_drvdata;
128 int ppi;
Kyle Yanddc44242016-06-20 14:42:14 -0700129};
130
131static struct erp_drvdata *panic_handler_drvdata;
132
133static DEFINE_SPINLOCK(local_handler_lock);
134
Kyle Yane1280772016-10-10 18:32:45 -0700135static void l1_l2_irq_enable(void *info)
136{
137 int irq = *(int *)info;
138
139 enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH);
140}
141
Kyle Yanddc44242016-06-20 14:42:14 -0700142static int request_erp_irq(struct platform_device *pdev, const char *propname,
143 const char *desc, irq_handler_t handler,
144 void *ed, int percpu)
145{
146 int rc;
147 struct resource *r;
Kyle Yane1280772016-10-10 18:32:45 -0700148 struct erp_drvdata *drv = ed;
Kyle Yanddc44242016-06-20 14:42:14 -0700149
150 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname);
151
152 if (!r) {
153 pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n",
154 propname);
Kyle Yane1280772016-10-10 18:32:45 -0700155 goto out;
Kyle Yanddc44242016-06-20 14:42:14 -0700156 }
157
158 if (!percpu) {
159 rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL,
160 handler,
161 IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
162 desc,
163 ed);
Kyle Yanddc44242016-06-20 14:42:14 -0700164
Kyle Yane1280772016-10-10 18:32:45 -0700165 if (rc) {
166 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
167 (int) r->start, rc, propname, desc);
168 goto out;
169 }
170
171 } else {
172 drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *);
173 if (!drv->erp_cpu_drvdata) {
174 pr_err("Failed to allocate percpu erp data\n");
175 goto out;
176 }
177
178 *raw_cpu_ptr(drv->erp_cpu_drvdata) = drv;
179 rc = request_percpu_irq(r->start, handler, desc,
180 drv->erp_cpu_drvdata);
181
182 if (rc) {
183 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
184 (int) r->start, rc, propname, desc);
185 goto out_free;
186 }
187
188 drv->ppi = r->start;
189 on_each_cpu(l1_l2_irq_enable, &(r->start), 1);
Kyle Yanddc44242016-06-20 14:42:14 -0700190 }
191
192 return 0;
Kyle Yane1280772016-10-10 18:32:45 -0700193
194out_free:
195 free_percpu(drv->erp_cpu_drvdata);
196 drv->erp_cpu_drvdata = NULL;
197out:
198 return -EINVAL;
Kyle Yanddc44242016-06-20 14:42:14 -0700199}
200
201static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc,
202 struct edac_device_ctl_info *edev_ctl)
203{
204 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus);
205 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc);
206 edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1);
207
208 switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) {
209 case DATA_BUF_ERR:
210 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n");
211 break;
212
213 case CACHE_DATA_ERR:
214 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n");
215 break;
216
217 case CACHE_TAG_DIRTY_ERR:
218 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n");
219 break;
220
Kyle Yan51096c4082017-06-19 17:25:05 -0700221 case TLB_PARITY_ERR_DATA:
Kyle Yanddc44242016-06-20 14:42:14 -0700222 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n");
223 break;
224
Kyle Yan51096c4082017-06-19 17:25:05 -0700225 case TLB_PARITY_ERR_TAG:
226 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB DATA\n");
227
Kyle Yanddc44242016-06-20 14:42:14 -0700228 case BUS_ERROR:
229 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
230 break;
231 }
232
233 if (level == L3)
234 edac_printk(KERN_CRIT, EDAC_CPU,
235 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc));
236 else
237 edac_printk(KERN_CRIT, EDAC_CPU,
238 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2);
239 errors[errorcode].func(edev_ctl, smp_processor_id(),
240 level, errors[errorcode].msg);
241}
242
243static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc,
244 struct edac_device_ctl_info *edev_ctl)
245{
246 switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) {
247 case L1:
248 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
249 dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc,
250 edev_ctl);
251 else
252 dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc,
253 edev_ctl);
254 break;
255
256 case L2:
257 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
258 dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc,
259 edev_ctl);
260 else
261 dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc,
262 edev_ctl);
263 break;
264 }
265
266}
267
268static void kryo3xx_check_l1_l2_ecc(void *info)
269{
270 struct edac_device_ctl_info *edev_ctl = info;
271 u64 errxstatus = 0;
272 u64 errxmisc = 0;
273 unsigned long flags;
274
275 spin_lock_irqsave(&local_handler_lock, flags);
276 write_errselr_el1(0);
277 errxstatus = read_errxstatus_el1();
278 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) {
279 errxmisc = read_errxmisc_el1();
280 edac_printk(KERN_CRIT, EDAC_CPU,
281 "Kryo3xx CPU%d detected a L1/L2 cache error\n",
282 smp_processor_id());
283
284 kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl);
285 clear_errxstatus_valid(errxstatus);
286 }
287 spin_unlock_irqrestore(&local_handler_lock, flags);
288}
289
Kyle Yan51096c4082017-06-19 17:25:05 -0700290static bool l3_is_bus_error(u64 errxstatus)
291{
292 if (KRYO3XX_ERRXSTATUS_SERR(errxstatus) == BUS_ERROR) {
293 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
294 return true;
295 }
296
297 return false;
298}
299
Kyle Yanddc44242016-06-20 14:42:14 -0700300static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl)
301{
302 u64 errxstatus = 0;
303 u64 errxmisc = 0;
304 unsigned long flags;
305
306 spin_lock_irqsave(&local_handler_lock, flags);
307 write_errselr_el1(1);
308 errxstatus = read_errxstatus_el1();
309 errxmisc = read_errxmisc_el1();
310
311 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) &&
312 KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) {
Kyle Yan51096c4082017-06-19 17:25:05 -0700313 if (l3_is_bus_error(errxstatus)) {
314 if (edev_ctl->panic_on_ue)
315 panic("Causing panic due to Bus Error\n");
316 return;
317 }
Kyle Yanddc44242016-06-20 14:42:14 -0700318 if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) {
319 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n");
320 dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc,
321 edev_ctl);
322 } else {
323 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n");
324 dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc,
325 edev_ctl);
326 }
327
328 clear_errxstatus_valid(errxstatus);
329 }
330 spin_unlock_irqrestore(&local_handler_lock, flags);
331}
332
333void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl)
334{
335 int cpu;
336
337 if (edev_ctl == NULL)
338 edev_ctl = panic_handler_drvdata->edev_ctl;
339
340 kryo3xx_check_l3_scu_error(edev_ctl);
341 for_each_possible_cpu(cpu)
342 smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc,
343 edev_ctl, 0);
344}
345
346static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata)
347{
Kyle Yan7a59b362017-04-11 19:56:45 -0700348 kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl);
Kyle Yanddc44242016-06-20 14:42:14 -0700349 return IRQ_HANDLED;
350}
351
352static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata)
353{
354 struct erp_drvdata *drv = drvdata;
355 struct edac_device_ctl_info *edev_ctl = drv->edev_ctl;
356
357 kryo3xx_check_l3_scu_error(edev_ctl);
358 return IRQ_HANDLED;
359}
360
Kyle Yan7a59b362017-04-11 19:56:45 -0700361static void initialize_registers(void *info)
362{
363 set_errxctlr_el1();
364 set_errxmisc_overflow();
365}
366
Kyle Yanddc44242016-06-20 14:42:14 -0700367static int kryo3xx_cpu_erp_probe(struct platform_device *pdev)
368{
369 struct device *dev = &pdev->dev;
370 struct erp_drvdata *drv;
371 int rc = 0;
372 int fail = 0;
Kyle Yan7a59b362017-04-11 19:56:45 -0700373 int cpu;
Kyle Yanddc44242016-06-20 14:42:14 -0700374
Kyle Yan7a59b362017-04-11 19:56:45 -0700375 for_each_possible_cpu(cpu)
376 smp_call_function_single(cpu, initialize_registers, NULL, 1);
377
378
Kyle Yanddc44242016-06-20 14:42:14 -0700379 drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
380
381 if (!drv)
382 return -ENOMEM;
383
384 drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu",
385 num_possible_cpus(), "L", 3, 1, NULL, 0,
386 edac_device_alloc_index());
387
388 if (!drv->edev_ctl)
389 return -ENOMEM;
390
391 #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
392 drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors;
393 drv->edev_ctl->poll_msec = poll_msec;
394 drv->edev_ctl->defer_work = 1;
395 #endif
396
397 drv->edev_ctl->dev = dev;
398 drv->edev_ctl->mod_name = dev_name(dev);
399 drv->edev_ctl->dev_name = dev_name(dev);
400 drv->edev_ctl->ctl_name = "cache";
401 drv->edev_ctl->panic_on_ce = ARM64_ERP_PANIC_ON_CE;
402 drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
403 platform_set_drvdata(pdev, drv);
404
405 rc = edac_device_add_device(drv->edev_ctl);
406 if (rc)
407 goto out_mem;
408
409 panic_handler_drvdata = drv;
410
411 if (request_erp_irq(pdev, "l1-l2-faultirq",
412 "KRYO3XX L1-L2 ECC FAULTIRQ",
413 kryo3xx_l1_l2_handler, drv, 1))
414 fail++;
415
416 if (request_erp_irq(pdev, "l3-scu-faultirq",
417 "KRYO3XX L3-SCU ECC FAULTIRQ",
418 kryo3xx_l3_scu_handler, drv, 0))
419 fail++;
420
421 if (fail == of_irq_count(dev->of_node)) {
422 pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n");
423 rc = -ENODEV;
424 goto out_dev;
425 }
426
427 return 0;
428
429out_dev:
430 edac_device_del_device(dev);
431out_mem:
432 edac_device_free_ctl_info(drv->edev_ctl);
433 return rc;
434}
435
436static int kryo3xx_cpu_erp_remove(struct platform_device *pdev)
437{
438 struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
439 struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;
440
Kyle Yane1280772016-10-10 18:32:45 -0700441
442 if (drv->erp_cpu_drvdata != NULL) {
443 free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata);
444 free_percpu(drv->erp_cpu_drvdata);
445 }
446
Kyle Yanddc44242016-06-20 14:42:14 -0700447 edac_device_del_device(edac_ctl->dev);
448 edac_device_free_ctl_info(edac_ctl);
449
450 return 0;
451}
452
453static const struct of_device_id kryo3xx_cpu_erp_match_table[] = {
454 { .compatible = "arm,arm64-kryo3xx-cpu-erp" },
455 { }
456};
457
458static struct platform_driver kryo3xx_cpu_erp_driver = {
459 .probe = kryo3xx_cpu_erp_probe,
460 .remove = kryo3xx_cpu_erp_remove,
461 .driver = {
462 .name = "kryo3xx_cpu_cache_erp",
463 .owner = THIS_MODULE,
464 .of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table),
465 },
466};
467
468static int __init kryo3xx_cpu_erp_init(void)
469{
470 return platform_driver_register(&kryo3xx_cpu_erp_driver);
471}
472module_init(kryo3xx_cpu_erp_init);
473
474static void __exit kryo3xx_cpu_erp_exit(void)
475{
476 platform_driver_unregister(&kryo3xx_cpu_erp_driver);
477}
478module_exit(kryo3xx_cpu_erp_exit);
479
480MODULE_LICENSE("GPL v2");
481MODULE_DESCRIPTION("Kryo3xx EDAC driver");