blob: 7e2aadc163f7c4a599aaabaf196d7167ee646fa2 [file] [log] [blame]
Kyle Yanddc44242016-06-20 14:42:14 -07001/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/kernel.h>
14#include <linux/edac.h>
15#include <linux/of_device.h>
16#include <linux/platform_device.h>
17#include <linux/smp.h>
18#include <linux/cpu.h>
19#include <linux/interrupt.h>
20#include <linux/of_irq.h>
21
22#include <asm/cputype.h>
23
24#include "edac_core.h"
25
26#ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
27static int poll_msec = 1000;
28module_param(poll_msec, int, 0444);
29#endif
30
31#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE
32#define ARM64_ERP_PANIC_ON_CE 1
33#else
34#define ARM64_ERP_PANIC_ON_CE 0
35#endif
36
37#ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE
38#define ARM64_ERP_PANIC_ON_UE 1
39#else
40#define ARM64_ERP_PANIC_ON_UE 0
41#endif
42
43#define L1 0x0
44#define L2 0x1
45#define L3 0x2
46
47#define EDAC_CPU "kryo3xx_edac"
48
49#define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1)
50#define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1)
51#define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF)
52
53#define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7)
54#define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF)
55
Kyle Yanddc44242016-06-20 14:42:14 -070056static inline void set_errxctlr_el1(void)
57{
Kyle Yane1280772016-10-10 18:32:45 -070058 u64 val = 0x10f;
Kyle Yanddc44242016-06-20 14:42:14 -070059
60 asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val));
61}
62
63static inline void write_errselr_el1(u64 val)
64{
65 asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val));
66}
67
68static inline u64 read_errxstatus_el1(void)
69{
70 u64 val;
71
72 asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val));
73 return val;
74}
75
76static inline u64 read_errxmisc_el1(void)
77{
78 u64 val;
79
80 asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val));
81 return val;
82}
83
84static inline void clear_errxstatus_valid(u64 val)
85{
Kyle Yanfb736162016-10-05 17:28:20 -070086 asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val));
Kyle Yanddc44242016-06-20 14:42:14 -070087}
88
89struct errors_edac {
90 const char * const msg;
91 void (*func)(struct edac_device_ctl_info *edac_dev,
92 int inst_nr, int block_nr, const char *msg);
93};
94
95static const struct errors_edac errors[] = {
96 {"Kryo3xx L1 Correctable Error", edac_device_handle_ce },
97 {"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue },
98 {"Kryo3xx L2 Correctable Error", edac_device_handle_ce },
99 {"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue },
100 {"L3 Correctable Error", edac_device_handle_ce },
101 {"L3 Uncorrectable Error", edac_device_handle_ue },
102};
103
104#define KRYO3XX_L1_CE 0
105#define KRYO3XX_L1_UE 1
106#define KRYO3XX_L2_CE 2
107#define KRYO3XX_L2_UE 3
108#define KRYO3XX_L3_CE 4
109#define KRYO3XX_L3_UE 5
110
111#define DATA_BUF_ERR 0x2
112#define CACHE_DATA_ERR 0x6
113#define CACHE_TAG_DIRTY_ERR 0x7
114#define TLB_PARITY_ERR 0x8
115#define BUS_ERROR 0x18
116
117struct erp_drvdata {
118 struct edac_device_ctl_info *edev_ctl;
Kyle Yane1280772016-10-10 18:32:45 -0700119 struct erp_drvdata __percpu **erp_cpu_drvdata;
120 int ppi;
Kyle Yanddc44242016-06-20 14:42:14 -0700121};
122
123static struct erp_drvdata *panic_handler_drvdata;
124
125static DEFINE_SPINLOCK(local_handler_lock);
126
Kyle Yane1280772016-10-10 18:32:45 -0700127static void l1_l2_irq_enable(void *info)
128{
129 int irq = *(int *)info;
130
131 enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH);
132}
133
Kyle Yanddc44242016-06-20 14:42:14 -0700134static int request_erp_irq(struct platform_device *pdev, const char *propname,
135 const char *desc, irq_handler_t handler,
136 void *ed, int percpu)
137{
138 int rc;
139 struct resource *r;
Kyle Yane1280772016-10-10 18:32:45 -0700140 struct erp_drvdata *drv = ed;
Kyle Yanddc44242016-06-20 14:42:14 -0700141
142 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname);
143
144 if (!r) {
145 pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n",
146 propname);
Kyle Yane1280772016-10-10 18:32:45 -0700147 goto out;
Kyle Yanddc44242016-06-20 14:42:14 -0700148 }
149
150 if (!percpu) {
151 rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL,
152 handler,
153 IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
154 desc,
155 ed);
Kyle Yanddc44242016-06-20 14:42:14 -0700156
Kyle Yane1280772016-10-10 18:32:45 -0700157 if (rc) {
158 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
159 (int) r->start, rc, propname, desc);
160 goto out;
161 }
162
163 } else {
164 drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *);
165 if (!drv->erp_cpu_drvdata) {
166 pr_err("Failed to allocate percpu erp data\n");
167 goto out;
168 }
169
170 *raw_cpu_ptr(drv->erp_cpu_drvdata) = drv;
171 rc = request_percpu_irq(r->start, handler, desc,
172 drv->erp_cpu_drvdata);
173
174 if (rc) {
175 pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n",
176 (int) r->start, rc, propname, desc);
177 goto out_free;
178 }
179
180 drv->ppi = r->start;
181 on_each_cpu(l1_l2_irq_enable, &(r->start), 1);
Kyle Yanddc44242016-06-20 14:42:14 -0700182 }
183
184 return 0;
Kyle Yane1280772016-10-10 18:32:45 -0700185
186out_free:
187 free_percpu(drv->erp_cpu_drvdata);
188 drv->erp_cpu_drvdata = NULL;
189out:
190 return -EINVAL;
Kyle Yanddc44242016-06-20 14:42:14 -0700191}
192
193static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc,
194 struct edac_device_ctl_info *edev_ctl)
195{
196 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus);
197 edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc);
198 edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1);
199
200 switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) {
201 case DATA_BUF_ERR:
202 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n");
203 break;
204
205 case CACHE_DATA_ERR:
206 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n");
207 break;
208
209 case CACHE_TAG_DIRTY_ERR:
210 edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n");
211 break;
212
213 case TLB_PARITY_ERR:
214 edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n");
215 break;
216
217 case BUS_ERROR:
218 edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n");
219 break;
220 }
221
222 if (level == L3)
223 edac_printk(KERN_CRIT, EDAC_CPU,
224 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc));
225 else
226 edac_printk(KERN_CRIT, EDAC_CPU,
227 "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2);
228 errors[errorcode].func(edev_ctl, smp_processor_id(),
229 level, errors[errorcode].msg);
230}
231
232static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc,
233 struct edac_device_ctl_info *edev_ctl)
234{
235 switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) {
236 case L1:
237 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
238 dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc,
239 edev_ctl);
240 else
241 dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc,
242 edev_ctl);
243 break;
244
245 case L2:
246 if (KRYO3XX_ERRXSTATUS_UE(errxstatus))
247 dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc,
248 edev_ctl);
249 else
250 dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc,
251 edev_ctl);
252 break;
253 }
254
255}
256
257static void kryo3xx_check_l1_l2_ecc(void *info)
258{
259 struct edac_device_ctl_info *edev_ctl = info;
260 u64 errxstatus = 0;
261 u64 errxmisc = 0;
262 unsigned long flags;
263
264 spin_lock_irqsave(&local_handler_lock, flags);
265 write_errselr_el1(0);
266 errxstatus = read_errxstatus_el1();
267 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) {
268 errxmisc = read_errxmisc_el1();
269 edac_printk(KERN_CRIT, EDAC_CPU,
270 "Kryo3xx CPU%d detected a L1/L2 cache error\n",
271 smp_processor_id());
272
273 kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl);
274 clear_errxstatus_valid(errxstatus);
275 }
276 spin_unlock_irqrestore(&local_handler_lock, flags);
277}
278
279static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl)
280{
281 u64 errxstatus = 0;
282 u64 errxmisc = 0;
283 unsigned long flags;
284
285 spin_lock_irqsave(&local_handler_lock, flags);
286 write_errselr_el1(1);
287 errxstatus = read_errxstatus_el1();
288 errxmisc = read_errxmisc_el1();
289
290 if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) &&
291 KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) {
292 if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) {
293 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n");
294 dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc,
295 edev_ctl);
296 } else {
297 edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n");
298 dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc,
299 edev_ctl);
300 }
301
302 clear_errxstatus_valid(errxstatus);
303 }
304 spin_unlock_irqrestore(&local_handler_lock, flags);
305}
306
307void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl)
308{
309 int cpu;
310
311 if (edev_ctl == NULL)
312 edev_ctl = panic_handler_drvdata->edev_ctl;
313
314 kryo3xx_check_l3_scu_error(edev_ctl);
315 for_each_possible_cpu(cpu)
316 smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc,
317 edev_ctl, 0);
318}
319
320static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata)
321{
Kyle Yane1280772016-10-10 18:32:45 -0700322 struct erp_drvdata *drv = *(struct erp_drvdata **)(drvdata);
Kyle Yanddc44242016-06-20 14:42:14 -0700323
Kyle Yane1280772016-10-10 18:32:45 -0700324 kryo3xx_check_l1_l2_ecc(drv->edev_ctl);
Kyle Yanddc44242016-06-20 14:42:14 -0700325 return IRQ_HANDLED;
326}
327
328static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata)
329{
330 struct erp_drvdata *drv = drvdata;
331 struct edac_device_ctl_info *edev_ctl = drv->edev_ctl;
332
333 kryo3xx_check_l3_scu_error(edev_ctl);
334 return IRQ_HANDLED;
335}
336
337static int kryo3xx_cpu_erp_probe(struct platform_device *pdev)
338{
339 struct device *dev = &pdev->dev;
340 struct erp_drvdata *drv;
341 int rc = 0;
342 int fail = 0;
343
344 set_errxctlr_el1();
345 drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
346
347 if (!drv)
348 return -ENOMEM;
349
350 drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu",
351 num_possible_cpus(), "L", 3, 1, NULL, 0,
352 edac_device_alloc_index());
353
354 if (!drv->edev_ctl)
355 return -ENOMEM;
356
357 #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL
358 drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors;
359 drv->edev_ctl->poll_msec = poll_msec;
360 drv->edev_ctl->defer_work = 1;
361 #endif
362
363 drv->edev_ctl->dev = dev;
364 drv->edev_ctl->mod_name = dev_name(dev);
365 drv->edev_ctl->dev_name = dev_name(dev);
366 drv->edev_ctl->ctl_name = "cache";
367 drv->edev_ctl->panic_on_ce = ARM64_ERP_PANIC_ON_CE;
368 drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
369 platform_set_drvdata(pdev, drv);
370
371 rc = edac_device_add_device(drv->edev_ctl);
372 if (rc)
373 goto out_mem;
374
375 panic_handler_drvdata = drv;
376
377 if (request_erp_irq(pdev, "l1-l2-faultirq",
378 "KRYO3XX L1-L2 ECC FAULTIRQ",
379 kryo3xx_l1_l2_handler, drv, 1))
380 fail++;
381
382 if (request_erp_irq(pdev, "l3-scu-faultirq",
383 "KRYO3XX L3-SCU ECC FAULTIRQ",
384 kryo3xx_l3_scu_handler, drv, 0))
385 fail++;
386
387 if (fail == of_irq_count(dev->of_node)) {
388 pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n");
389 rc = -ENODEV;
390 goto out_dev;
391 }
392
393 return 0;
394
395out_dev:
396 edac_device_del_device(dev);
397out_mem:
398 edac_device_free_ctl_info(drv->edev_ctl);
399 return rc;
400}
401
402static int kryo3xx_cpu_erp_remove(struct platform_device *pdev)
403{
404 struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
405 struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;
406
Kyle Yane1280772016-10-10 18:32:45 -0700407
408 if (drv->erp_cpu_drvdata != NULL) {
409 free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata);
410 free_percpu(drv->erp_cpu_drvdata);
411 }
412
Kyle Yanddc44242016-06-20 14:42:14 -0700413 edac_device_del_device(edac_ctl->dev);
414 edac_device_free_ctl_info(edac_ctl);
415
416 return 0;
417}
418
419static const struct of_device_id kryo3xx_cpu_erp_match_table[] = {
420 { .compatible = "arm,arm64-kryo3xx-cpu-erp" },
421 { }
422};
423
424static struct platform_driver kryo3xx_cpu_erp_driver = {
425 .probe = kryo3xx_cpu_erp_probe,
426 .remove = kryo3xx_cpu_erp_remove,
427 .driver = {
428 .name = "kryo3xx_cpu_cache_erp",
429 .owner = THIS_MODULE,
430 .of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table),
431 },
432};
433
434static int __init kryo3xx_cpu_erp_init(void)
435{
436 return platform_driver_register(&kryo3xx_cpu_erp_driver);
437}
438module_init(kryo3xx_cpu_erp_init);
439
440static void __exit kryo3xx_cpu_erp_exit(void)
441{
442 platform_driver_unregister(&kryo3xx_cpu_erp_driver);
443}
444module_exit(kryo3xx_cpu_erp_exit);
445
446MODULE_LICENSE("GPL v2");
447MODULE_DESCRIPTION("Kryo3xx EDAC driver");