blob: 669246056812e8d361040fb4ce6f098574460768 [file] [log] [blame]
Loc Ho0d442932015-05-22 17:32:59 -06001/*
2 * APM X-Gene SoC EDAC (error detection and correction)
3 *
4 * Copyright (c) 2015, Applied Micro Circuits Corporation
5 * Author: Feng Kan <fkan@apm.com>
6 * Loc Ho <lho@apm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <linux/ctype.h>
23#include <linux/edac.h>
24#include <linux/interrupt.h>
25#include <linux/mfd/syscon.h>
26#include <linux/module.h>
27#include <linux/of.h>
28#include <linux/of_address.h>
29#include <linux/regmap.h>
30
Borislav Petkov09bd1b42015-09-22 13:13:46 +020031#include "edac_module.h"
Loc Ho0d442932015-05-22 17:32:59 -060032
33#define EDAC_MOD_STR "xgene_edac"
34
35/* Global error configuration status registers (CSR) */
36#define PCPHPERRINTSTS 0x0000
37#define PCPHPERRINTMSK 0x0004
38#define MCU_CTL_ERR_MASK BIT(12)
39#define IOB_PA_ERR_MASK BIT(11)
40#define IOB_BA_ERR_MASK BIT(10)
41#define IOB_XGIC_ERR_MASK BIT(9)
42#define IOB_RB_ERR_MASK BIT(8)
43#define L3C_UNCORR_ERR_MASK BIT(5)
44#define MCU_UNCORR_ERR_MASK BIT(4)
45#define PMD3_MERR_MASK BIT(3)
46#define PMD2_MERR_MASK BIT(2)
47#define PMD1_MERR_MASK BIT(1)
48#define PMD0_MERR_MASK BIT(0)
49#define PCPLPERRINTSTS 0x0008
50#define PCPLPERRINTMSK 0x000C
51#define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
52#define L3C_CORR_ERR_MASK BIT(1)
53#define MCU_CORR_ERR_MASK BIT(0)
54#define MEMERRINTSTS 0x0010
55#define MEMERRINTMSK 0x0014
56
57struct xgene_edac {
58 struct device *dev;
59 struct regmap *csw_map;
60 struct regmap *mcba_map;
61 struct regmap *mcbb_map;
62 struct regmap *efuse_map;
Loc Ho4d67e3c2016-01-22 13:47:04 -070063 struct regmap *rb_map;
Loc Ho0d442932015-05-22 17:32:59 -060064 void __iomem *pcp_csr;
65 spinlock_t lock;
Loc Ho93474732015-09-23 17:40:59 -070066 struct dentry *dfs;
Loc Ho0d442932015-05-22 17:32:59 -060067
68 struct list_head mcus;
69 struct list_head pmds;
Loc Ho93474732015-09-23 17:40:59 -070070 struct list_head l3s;
Loc Hof864b792015-09-23 17:41:00 -070071 struct list_head socs;
Loc Ho0d442932015-05-22 17:32:59 -060072
73 struct mutex mc_lock;
74 int mc_active_mask;
75 int mc_registered_mask;
76};
77
78static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
79{
80 *val = readl(edac->pcp_csr + reg);
81}
82
83static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
84 u32 bits_mask)
85{
86 u32 val;
87
88 spin_lock(&edac->lock);
89 val = readl(edac->pcp_csr + reg);
90 val &= ~bits_mask;
91 writel(val, edac->pcp_csr + reg);
92 spin_unlock(&edac->lock);
93}
94
95static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
96 u32 bits_mask)
97{
98 u32 val;
99
100 spin_lock(&edac->lock);
101 val = readl(edac->pcp_csr + reg);
102 val |= bits_mask;
103 writel(val, edac->pcp_csr + reg);
104 spin_unlock(&edac->lock);
105}
106
107/* Memory controller error CSR */
108#define MCU_MAX_RANK 8
109#define MCU_RANK_STRIDE 0x40
110
111#define MCUGECR 0x0110
112#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
113#define MCU_GECR_BACKUCINTREN_MASK BIT(1)
114#define MCU_GECR_CINTREN_MASK BIT(2)
115#define MUC_GECR_MCUADDRERREN_MASK BIT(9)
116#define MCUGESR 0x0114
117#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
118#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
119#define MCU_GESR_PHYP_ERR_MASK BIT(3)
120#define MCUESRR0 0x0314
121#define MCU_ESRR_MULTUCERR_MASK BIT(3)
122#define MCU_ESRR_BACKUCERR_MASK BIT(2)
123#define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
124#define MCU_ESRR_CERR_MASK BIT(0)
125#define MCUESRRA0 0x0318
126#define MCUEBLRR0 0x031c
127#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
128#define MCUERCRR0 0x0320
129#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
130#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
131#define MCUSBECNT0 0x0324
132#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
133
134#define CSW_CSWCR 0x0000
135#define CSW_CSWCR_DUALMCB_MASK BIT(0)
136
137#define MCBADDRMR 0x0000
138#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
139#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
140#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
141#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
142
143struct xgene_edac_mc_ctx {
144 struct list_head next;
145 char *name;
146 struct mem_ctl_info *mci;
147 struct xgene_edac *edac;
148 void __iomem *mcu_csr;
149 u32 mcu_id;
150};
151
152static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
153 const char __user *data,
154 size_t count, loff_t *ppos)
155{
156 struct mem_ctl_info *mci = file->private_data;
157 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
158 int i;
159
160 for (i = 0; i < MCU_MAX_RANK; i++) {
161 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
162 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
163 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
164 }
165 return count;
166}
167
168static const struct file_operations xgene_edac_mc_debug_inject_fops = {
169 .open = simple_open,
170 .write = xgene_edac_mc_err_inject_write,
171 .llseek = generic_file_llseek,
172};
173
174static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
175{
176 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
177 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200178
Loc Ho0d442932015-05-22 17:32:59 -0600179 if (!mci->debugfs)
180 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200181
182 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
183 &xgene_edac_mc_debug_inject_fops);
Loc Ho0d442932015-05-22 17:32:59 -0600184}
185
186static void xgene_edac_mc_check(struct mem_ctl_info *mci)
187{
188 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
189 unsigned int pcp_hp_stat;
190 unsigned int pcp_lp_stat;
191 u32 reg;
192 u32 rank;
193 u32 bank;
194 u32 count;
195 u32 col_row;
196
197 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
198 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
199 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
200 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
201 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
202 return;
203
204 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
205 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
206
207 /* Detect uncorrectable memory error */
208 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
209 MCU_ESRR_BACKUCERR_MASK)) {
210 /* Detected uncorrectable memory error */
211 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
212 "MCU uncorrectable error at rank %d\n", rank);
213
214 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
215 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
216 }
217
218 /* Detect correctable memory error */
219 if (reg & MCU_ESRR_CERR_MASK) {
220 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
221 rank * MCU_RANK_STRIDE);
222 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
223 rank * MCU_RANK_STRIDE);
224 count = readl(ctx->mcu_csr + MCUSBECNT0 +
225 rank * MCU_RANK_STRIDE);
226 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
227 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
228 rank, MCU_EBLRR_ERRBANK_RD(bank),
229 MCU_ERCRR_ERRCOL_RD(col_row),
230 MCU_ERCRR_ERRROW_RD(col_row),
231 MCU_SBECNT_COUNT(count));
232
233 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
234 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
235 }
236
237 /* Clear all error registers */
238 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
239 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
240 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
241 rank * MCU_RANK_STRIDE);
242 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
243 }
244
245 /* Detect memory controller error */
246 reg = readl(ctx->mcu_csr + MCUGESR);
247 if (reg) {
248 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
249 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
250 "MCU address miss-match error\n");
251 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
252 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
253 "MCU address multi-match error\n");
254
255 writel(reg, ctx->mcu_csr + MCUGESR);
256 }
257}
258
259static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
260{
261 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
262 unsigned int val;
263
264 if (edac_op_state != EDAC_OPSTATE_INT)
265 return;
266
267 mutex_lock(&ctx->edac->mc_lock);
268
269 /*
270 * As there is only single bit for enable error and interrupt mask,
271 * we must only enable top level interrupt after all MCUs are
272 * registered. Otherwise, if there is an error and the corresponding
273 * MCU has not registered, the interrupt will never get cleared. To
274 * determine all MCU have registered, we will keep track of active
275 * MCUs and registered MCUs.
276 */
277 if (enable) {
278 /* Set registered MCU bit */
279 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
280
281 /* Enable interrupt after all active MCU registered */
282 if (ctx->edac->mc_registered_mask ==
283 ctx->edac->mc_active_mask) {
284 /* Enable memory controller top level interrupt */
285 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
286 MCU_UNCORR_ERR_MASK |
287 MCU_CTL_ERR_MASK);
288 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
289 MCU_CORR_ERR_MASK);
290 }
291
292 /* Enable MCU interrupt and error reporting */
293 val = readl(ctx->mcu_csr + MCUGECR);
294 val |= MCU_GECR_DEMANDUCINTREN_MASK |
295 MCU_GECR_BACKUCINTREN_MASK |
296 MCU_GECR_CINTREN_MASK |
297 MUC_GECR_MCUADDRERREN_MASK;
298 writel(val, ctx->mcu_csr + MCUGECR);
299 } else {
300 /* Disable MCU interrupt */
301 val = readl(ctx->mcu_csr + MCUGECR);
302 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
303 MCU_GECR_BACKUCINTREN_MASK |
304 MCU_GECR_CINTREN_MASK |
305 MUC_GECR_MCUADDRERREN_MASK);
306 writel(val, ctx->mcu_csr + MCUGECR);
307
308 /* Disable memory controller top level interrupt */
309 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
310 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
311 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
312 MCU_CORR_ERR_MASK);
313
314 /* Clear registered MCU bit */
315 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
316 }
317
318 mutex_unlock(&ctx->edac->mc_lock);
319}
320
321static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
322{
323 unsigned int reg;
324 u32 mcu_mask;
325
326 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
327 return 0;
328
329 if (reg & CSW_CSWCR_DUALMCB_MASK) {
330 /*
331 * Dual MCB active - Determine if all 4 active or just MCU0
332 * and MCU2 active
333 */
334 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
335 return 0;
336 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
337 } else {
338 /*
339 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
340 * active
341 */
342 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
343 return 0;
344 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
345 }
346
347 /* Save active MC mask if hasn't set already */
348 if (!ctx->edac->mc_active_mask)
349 ctx->edac->mc_active_mask = mcu_mask;
350
351 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
352}
353
354static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
355{
356 struct mem_ctl_info *mci;
357 struct edac_mc_layer layers[2];
358 struct xgene_edac_mc_ctx tmp_ctx;
359 struct xgene_edac_mc_ctx *ctx;
360 struct resource res;
361 int rc;
362
363 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
364 tmp_ctx.edac = edac;
365
366 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
367 return -ENOMEM;
368
369 rc = of_address_to_resource(np, 0, &res);
370 if (rc < 0) {
371 dev_err(edac->dev, "no MCU resource address\n");
372 goto err_group;
373 }
374 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
375 if (IS_ERR(tmp_ctx.mcu_csr)) {
376 dev_err(edac->dev, "unable to map MCU resource\n");
377 rc = PTR_ERR(tmp_ctx.mcu_csr);
378 goto err_group;
379 }
380
381 /* Ignore non-active MCU */
382 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
383 dev_err(edac->dev, "no memory-controller property\n");
384 rc = -ENODEV;
385 goto err_group;
386 }
387 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
388 rc = -ENODEV;
389 goto err_group;
390 }
391
392 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
393 layers[0].size = 4;
394 layers[0].is_virt_csrow = true;
395 layers[1].type = EDAC_MC_LAYER_CHANNEL;
396 layers[1].size = 2;
397 layers[1].is_virt_csrow = false;
398 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
399 sizeof(*ctx));
400 if (!mci) {
401 rc = -ENOMEM;
402 goto err_group;
403 }
404
405 ctx = mci->pvt_info;
406 *ctx = tmp_ctx; /* Copy over resource value */
407 ctx->name = "xgene_edac_mc_err";
408 ctx->mci = mci;
409 mci->pdev = &mci->dev;
410 mci->ctl_name = ctx->name;
411 mci->dev_name = ctx->name;
412
413 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
414 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
415 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
416 mci->edac_cap = EDAC_FLAG_SECDED;
417 mci->mod_name = EDAC_MOD_STR;
418 mci->mod_ver = "0.1";
419 mci->ctl_page_to_phys = NULL;
420 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
421 mci->scrub_mode = SCRUB_HW_SRC;
422
423 if (edac_op_state == EDAC_OPSTATE_POLL)
424 mci->edac_check = xgene_edac_mc_check;
425
426 if (edac_mc_add_mc(mci)) {
427 dev_err(edac->dev, "edac_mc_add_mc failed\n");
428 rc = -EINVAL;
429 goto err_free;
430 }
431
432 xgene_edac_mc_create_debugfs_node(mci);
433
434 list_add(&ctx->next, &edac->mcus);
435
436 xgene_edac_mc_irq_ctl(mci, true);
437
438 devres_remove_group(edac->dev, xgene_edac_mc_add);
439
440 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
441 return 0;
442
443err_free:
444 edac_mc_free(mci);
445err_group:
446 devres_release_group(edac->dev, xgene_edac_mc_add);
447 return rc;
448}
449
450static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
451{
452 xgene_edac_mc_irq_ctl(mcu->mci, false);
453 edac_mc_del_mc(&mcu->mci->dev);
454 edac_mc_free(mcu->mci);
455 return 0;
456}
457
458/* CPU L1/L2 error CSR */
459#define MAX_CPU_PER_PMD 2
460#define CPU_CSR_STRIDE 0x00100000
461#define CPU_L2C_PAGE 0x000D0000
462#define CPU_MEMERR_L2C_PAGE 0x000E0000
463#define CPU_MEMERR_CPU_PAGE 0x000F0000
464
465#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
466#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
467#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
468#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
469#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
470#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
471#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
472#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
473#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
474#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
475#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
476#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
477#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
478#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
479#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
480#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
481#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
482#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
483#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
484#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
485#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
486#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
487#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
488#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
489#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
490#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
491#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
492#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
493
494#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
495#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
496#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
497#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
498#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
499#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
500#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
501#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
502#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
503#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
504#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
505#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
506#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
507#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
508#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
509#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
510#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
511#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
512#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
513#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
514#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
515
516/*
517 * Processor Module Domain (PMD) context - Context for a pair of processsors.
518 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
519 * its own L1 cache.
520 */
521struct xgene_edac_pmd_ctx {
522 struct list_head next;
523 struct device ddev;
524 char *name;
525 struct xgene_edac *edac;
526 struct edac_device_ctl_info *edac_dev;
527 void __iomem *pmd_csr;
528 u32 pmd;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600529 int version;
Loc Ho0d442932015-05-22 17:32:59 -0600530};
531
532static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
533 int cpu_idx)
534{
535 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
536 void __iomem *pg_f;
537 u32 val;
538
539 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
540
541 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700542 if (!val)
543 goto chk_lsu;
544 dev_err(edac_dev->dev,
545 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
546 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
547 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
548 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
549 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
550 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
551 dev_err(edac_dev->dev, "One or more correctable error\n");
552 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
553 dev_err(edac_dev->dev, "Multiple correctable error\n");
554 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
555 case 1:
556 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
557 break;
558 case 2:
559 dev_err(edac_dev->dev, "Way select multiple hit\n");
560 break;
561 case 3:
562 dev_err(edac_dev->dev, "Physical tag parity error\n");
563 break;
564 case 4:
565 case 5:
566 dev_err(edac_dev->dev, "L1 data parity error\n");
567 break;
568 case 6:
569 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
570 break;
Loc Ho0d442932015-05-22 17:32:59 -0600571 }
572
Loc Ho93474732015-09-23 17:40:59 -0700573 /* Clear any HW errors */
574 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho0d442932015-05-22 17:32:59 -0600575
Loc Ho93474732015-09-23 17:40:59 -0700576 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
577 MEMERR_CPU_ICFESR_MULTCERR_MASK))
Loc Ho0d442932015-05-22 17:32:59 -0600578 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho93474732015-09-23 17:40:59 -0700579
580chk_lsu:
581 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
582 if (!val)
583 goto chk_mmu;
584 dev_err(edac_dev->dev,
585 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
586 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
587 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
588 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
589 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
590 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
591 dev_err(edac_dev->dev, "One or more correctable error\n");
592 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
593 dev_err(edac_dev->dev, "Multiple correctable error\n");
594 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
595 case 0:
596 dev_err(edac_dev->dev, "Load tag error\n");
597 break;
598 case 1:
599 dev_err(edac_dev->dev, "Load data error\n");
600 break;
601 case 2:
602 dev_err(edac_dev->dev, "WSL multihit error\n");
603 break;
604 case 3:
605 dev_err(edac_dev->dev, "Store tag error\n");
606 break;
607 case 4:
608 dev_err(edac_dev->dev,
609 "DTB multihit from load pipeline error\n");
610 break;
611 case 5:
612 dev_err(edac_dev->dev,
613 "DTB multihit from store pipeline error\n");
614 break;
Loc Ho0d442932015-05-22 17:32:59 -0600615 }
Loc Ho93474732015-09-23 17:40:59 -0700616
617 /* Clear any HW errors */
618 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
619
620 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
621 MEMERR_CPU_LSUESR_MULTCERR_MASK))
622 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
623
624chk_mmu:
625 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
626 if (!val)
627 return;
628 dev_err(edac_dev->dev,
629 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
630 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
631 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
632 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
633 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
634 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
635 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
636 dev_err(edac_dev->dev, "One or more correctable error\n");
637 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
638 dev_err(edac_dev->dev, "Multiple correctable error\n");
639 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
640 case 0:
641 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
642 break;
643 case 1:
644 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
645 break;
646 case 2:
647 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
648 break;
649 case 3:
650 dev_err(edac_dev->dev, "TMO operation single bank error\n");
651 break;
652 case 4:
653 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
654 break;
655 case 5:
656 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
657 break;
658 case 6:
659 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
660 break;
661 case 7:
662 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
663 break;
664 }
665
666 /* Clear any HW errors */
667 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
668
669 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho0d442932015-05-22 17:32:59 -0600670}
671
672static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
673{
674 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
675 void __iomem *pg_d;
676 void __iomem *pg_e;
677 u32 val_hi;
678 u32 val_lo;
679 u32 val;
680
681 /* Check L2 */
682 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
683 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700684 if (!val)
685 goto chk_l2c;
686 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
687 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
688 dev_err(edac_dev->dev,
689 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
690 ctx->pmd, val, val_hi, val_lo);
691 dev_err(edac_dev->dev,
692 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
693 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
694 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
695 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
696 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
697 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
Loc Ho0d442932015-05-22 17:32:59 -0600698
Loc Ho93474732015-09-23 17:40:59 -0700699 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
700 dev_err(edac_dev->dev, "One or more correctable error\n");
701 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
702 dev_err(edac_dev->dev, "Multiple correctable error\n");
703 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
704 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
705 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
706 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
Loc Ho0d442932015-05-22 17:32:59 -0600707
Loc Ho93474732015-09-23 17:40:59 -0700708 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
709 case 0:
710 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
711 break;
712 case 1:
713 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
714 break;
715 case 2:
716 dev_err(edac_dev->dev, "Tag ECC error\n");
717 break;
718 case 3:
719 dev_err(edac_dev->dev, "Data ECC error\n");
720 break;
Loc Ho0d442932015-05-22 17:32:59 -0600721 }
722
Loc Ho93474732015-09-23 17:40:59 -0700723 /* Clear any HW errors */
724 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
725
726 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
727 MEMERR_L2C_L2ESR_MULTICERR_MASK))
728 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
729 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
730 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
731 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
732
733chk_l2c:
Loc Ho0d442932015-05-22 17:32:59 -0600734 /* Check if any memory request timed out on L2 cache */
735 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
736 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
737 if (val) {
738 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
739 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
740 dev_err(edac_dev->dev,
741 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
742 ctx->pmd, val, val_hi, val_lo);
743 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
744 }
745}
746
747static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
748{
749 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
750 unsigned int pcp_hp_stat;
751 int i;
752
753 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
754 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
755 return;
756
757 /* Check CPU L1 error */
758 for (i = 0; i < MAX_CPU_PER_PMD; i++)
759 xgene_edac_pmd_l1_check(edac_dev, i);
760
761 /* Check CPU L2 error */
762 xgene_edac_pmd_l2_check(edac_dev);
763}
764
765static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
766 int cpu)
767{
768 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
769 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
770 CPU_MEMERR_CPU_PAGE;
771
772 /*
773 * Enable CPU memory error:
774 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
775 */
776 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
777 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
778 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
779}
780
Loc Ho0d442932015-05-22 17:32:59 -0600781static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
782{
783 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
784 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
785 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
786
787 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
788 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
789 /* Configure L2C HW request time out feature if supported */
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600790 if (ctx->version > 1)
Loc Ho0d442932015-05-22 17:32:59 -0600791 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
792}
793
794static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
795 bool enable)
796{
797 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
798 int i;
799
800 /* Enable PMD error interrupt */
801 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
802 if (enable)
803 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
804 PMD0_MERR_MASK << ctx->pmd);
805 else
806 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
807 PMD0_MERR_MASK << ctx->pmd);
808 }
809
810 if (enable) {
811 xgene_edac_pmd_hw_cfg(edac_dev);
812
813 /* Two CPUs per a PMD */
814 for (i = 0; i < MAX_CPU_PER_PMD; i++)
815 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
816 }
817}
818
819static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
820 const char __user *data,
821 size_t count, loff_t *ppos)
822{
823 struct edac_device_ctl_info *edac_dev = file->private_data;
824 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
825 void __iomem *cpux_pg_f;
826 int i;
827
828 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
829 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
830 CPU_MEMERR_CPU_PAGE;
831
832 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
833 MEMERR_CPU_ICFESR_CERR_MASK,
834 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
835 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
836 MEMERR_CPU_LSUESR_CERR_MASK,
837 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
838 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
839 MEMERR_CPU_MMUESR_CERR_MASK,
840 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
841 }
842 return count;
843}
844
845static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
846 const char __user *data,
847 size_t count, loff_t *ppos)
848{
849 struct edac_device_ctl_info *edac_dev = file->private_data;
850 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
851 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
852
853 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
854 MEMERR_L2C_L2ESR_MULTICERR_MASK |
855 MEMERR_L2C_L2ESR_UCERR_MASK |
856 MEMERR_L2C_L2ESR_ERR_MASK,
857 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
858 return count;
859}
860
861static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
862 {
863 .open = simple_open,
864 .write = xgene_edac_pmd_l1_inject_ctrl_write,
865 .llseek = generic_file_llseek, },
866 {
867 .open = simple_open,
868 .write = xgene_edac_pmd_l2_inject_ctrl_write,
869 .llseek = generic_file_llseek, },
870 { }
871};
872
Loc Ho93474732015-09-23 17:40:59 -0700873static void
874xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
Loc Ho0d442932015-05-22 17:32:59 -0600875{
876 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200877 struct dentry *dbgfs_dir;
Loc Ho93474732015-09-23 17:40:59 -0700878 char name[10];
Loc Ho0d442932015-05-22 17:32:59 -0600879
Loc Ho93474732015-09-23 17:40:59 -0700880 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
Loc Ho0d442932015-05-22 17:32:59 -0600881 return;
882
Loc Ho9bc1c0c2015-09-24 10:38:07 -0700883 snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200884 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
885 if (!dbgfs_dir)
Loc Ho0d442932015-05-22 17:32:59 -0600886 return;
887
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200888 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
889 &xgene_edac_pmd_debug_inject_fops[0]);
890 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
891 &xgene_edac_pmd_debug_inject_fops[1]);
Loc Ho0d442932015-05-22 17:32:59 -0600892}
893
894static int xgene_edac_pmd_available(u32 efuse, int pmd)
895{
896 return (efuse & (1 << pmd)) ? 0 : 1;
897}
898
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600899static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
900 int version)
Loc Ho0d442932015-05-22 17:32:59 -0600901{
902 struct edac_device_ctl_info *edac_dev;
903 struct xgene_edac_pmd_ctx *ctx;
904 struct resource res;
905 char edac_name[10];
906 u32 pmd;
907 int rc;
908 u32 val;
909
910 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
911 return -ENOMEM;
912
913 /* Determine if this PMD is disabled */
914 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
915 dev_err(edac->dev, "no pmd-controller property\n");
916 rc = -ENODEV;
917 goto err_group;
918 }
919 rc = regmap_read(edac->efuse_map, 0, &val);
920 if (rc)
921 goto err_group;
922 if (!xgene_edac_pmd_available(val, pmd)) {
923 rc = -ENODEV;
924 goto err_group;
925 }
926
Loc Ho9bc1c0c2015-09-24 10:38:07 -0700927 snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
Loc Ho0d442932015-05-22 17:32:59 -0600928 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
929 edac_name, 1, "l2c", 1, 2, NULL,
930 0, edac_device_alloc_index());
931 if (!edac_dev) {
932 rc = -ENOMEM;
933 goto err_group;
934 }
935
936 ctx = edac_dev->pvt_info;
937 ctx->name = "xgene_pmd_err";
938 ctx->pmd = pmd;
939 ctx->edac = edac;
940 ctx->edac_dev = edac_dev;
941 ctx->ddev = *edac->dev;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600942 ctx->version = version;
Loc Ho0d442932015-05-22 17:32:59 -0600943 edac_dev->dev = &ctx->ddev;
944 edac_dev->ctl_name = ctx->name;
945 edac_dev->dev_name = ctx->name;
946 edac_dev->mod_name = EDAC_MOD_STR;
947
948 rc = of_address_to_resource(np, 0, &res);
949 if (rc < 0) {
950 dev_err(edac->dev, "no PMD resource address\n");
951 goto err_free;
952 }
953 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
954 if (IS_ERR(ctx->pmd_csr)) {
955 dev_err(edac->dev,
956 "devm_ioremap_resource failed for PMD resource address\n");
957 rc = PTR_ERR(ctx->pmd_csr);
958 goto err_free;
959 }
960
961 if (edac_op_state == EDAC_OPSTATE_POLL)
962 edac_dev->edac_check = xgene_edac_pmd_check;
963
964 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
965
966 rc = edac_device_add_device(edac_dev);
967 if (rc > 0) {
968 dev_err(edac->dev, "edac_device_add_device failed\n");
969 rc = -ENOMEM;
970 goto err_free;
971 }
972
973 if (edac_op_state == EDAC_OPSTATE_INT)
974 edac_dev->op_state = OP_RUNNING_INTERRUPT;
975
976 list_add(&ctx->next, &edac->pmds);
977
978 xgene_edac_pmd_hw_ctl(edac_dev, 1);
979
980 devres_remove_group(edac->dev, xgene_edac_pmd_add);
981
982 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
983 return 0;
984
985err_free:
986 edac_device_free_ctl_info(edac_dev);
987err_group:
988 devres_release_group(edac->dev, xgene_edac_pmd_add);
989 return rc;
990}
991
992static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
993{
994 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
995
996 xgene_edac_pmd_hw_ctl(edac_dev, 0);
997 edac_device_del_device(edac_dev->dev);
998 edac_device_free_ctl_info(edac_dev);
999 return 0;
1000}
1001
Loc Ho93474732015-09-23 17:40:59 -07001002/* L3 Error device */
1003#define L3C_ESR (0x0A * 4)
1004#define L3C_ESR_DATATAG_MASK BIT(9)
1005#define L3C_ESR_MULTIHIT_MASK BIT(8)
1006#define L3C_ESR_UCEVICT_MASK BIT(6)
1007#define L3C_ESR_MULTIUCERR_MASK BIT(5)
1008#define L3C_ESR_MULTICERR_MASK BIT(4)
1009#define L3C_ESR_UCERR_MASK BIT(3)
1010#define L3C_ESR_CERR_MASK BIT(2)
1011#define L3C_ESR_UCERRINTR_MASK BIT(1)
1012#define L3C_ESR_CERRINTR_MASK BIT(0)
1013#define L3C_ECR (0x0B * 4)
1014#define L3C_ECR_UCINTREN BIT(3)
1015#define L3C_ECR_CINTREN BIT(2)
1016#define L3C_UCERREN BIT(1)
1017#define L3C_CERREN BIT(0)
1018#define L3C_ELR (0x0C * 4)
1019#define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1020#define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1021#define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1022#define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1023#define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1024#define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1025#define L3C_AELR (0x0D * 4)
1026#define L3C_BELR (0x0E * 4)
1027#define L3C_BELR_BANK(src) (src & 0x0000000F)
1028
1029struct xgene_edac_dev_ctx {
1030 struct list_head next;
1031 struct device ddev;
1032 char *name;
1033 struct xgene_edac *edac;
1034 struct edac_device_ctl_info *edac_dev;
1035 int edac_idx;
1036 void __iomem *dev_csr;
1037 int version;
1038};
1039
1040/*
1041 * Version 1 of the L3 controller has broken single bit correctable logic for
1042 * certain error syndromes. Log them as uncorrectable in that case.
1043 */
1044static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1045{
1046 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1047 switch (L3C_ELR_ERRSYN(l3celr)) {
1048 case 0x13C:
1049 case 0x0B4:
1050 case 0x007:
1051 case 0x00D:
1052 case 0x00E:
1053 case 0x019:
1054 case 0x01A:
1055 case 0x01C:
1056 case 0x04E:
1057 case 0x041:
1058 return true;
1059 }
Loc Ho4d67e3c2016-01-22 13:47:04 -07001060 } else if (L3C_ELR_ERRWAY(l3celr) == 9)
Loc Ho93474732015-09-23 17:40:59 -07001061 return true;
1062
1063 return false;
1064}
1065
1066static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1067{
1068 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1069 u32 l3cesr;
1070 u32 l3celr;
1071 u32 l3caelr;
1072 u32 l3cbelr;
1073
1074 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1075 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1076 return;
1077
1078 if (l3cesr & L3C_ESR_UCERR_MASK)
1079 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1080 if (l3cesr & L3C_ESR_CERR_MASK)
1081 dev_warn(edac_dev->dev, "L3C correctable error\n");
1082
1083 l3celr = readl(ctx->dev_csr + L3C_ELR);
1084 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1085 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1086 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1087 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1088 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1089 dev_err(edac_dev->dev,
1090 "L3C dropped eviction of line with error\n");
1091 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1092 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1093 if (l3cesr & L3C_ESR_DATATAG_MASK)
1094 dev_err(edac_dev->dev,
1095 "L3C data error syndrome 0x%X group 0x%X\n",
1096 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1097 else
1098 dev_err(edac_dev->dev,
1099 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1100 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1101 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1102 /*
1103 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1104 * Address [37:6] in l3caelr. Lower 6 bits are zero.
1105 */
1106 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1107 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1108 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1109 dev_err(edac_dev->dev,
1110 "L3C error status register value 0x%X\n", l3cesr);
1111
1112 /* Clear L3C error interrupt */
1113 writel(0, ctx->dev_csr + L3C_ESR);
1114
1115 if (ctx->version <= 1 &&
1116 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1117 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1118 return;
1119 }
1120 if (l3cesr & L3C_ESR_CERR_MASK)
1121 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1122 if (l3cesr & L3C_ESR_UCERR_MASK)
1123 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1124}
1125
1126static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1127 bool enable)
1128{
1129 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1130 u32 val;
1131
1132 val = readl(ctx->dev_csr + L3C_ECR);
1133 val |= L3C_UCERREN | L3C_CERREN;
1134 /* On disable, we just disable interrupt but keep error enabled */
1135 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1136 if (enable)
1137 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1138 else
1139 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1140 }
1141 writel(val, ctx->dev_csr + L3C_ECR);
1142
1143 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1144 /* Enable/disable L3 error top level interrupt */
1145 if (enable) {
1146 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1147 L3C_UNCORR_ERR_MASK);
1148 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1149 L3C_CORR_ERR_MASK);
1150 } else {
1151 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1152 L3C_UNCORR_ERR_MASK);
1153 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1154 L3C_CORR_ERR_MASK);
1155 }
1156 }
1157}
1158
1159static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1160 const char __user *data,
1161 size_t count, loff_t *ppos)
1162{
1163 struct edac_device_ctl_info *edac_dev = file->private_data;
1164 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1165
1166 /* Generate all errors */
1167 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1168 return count;
1169}
1170
1171static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1172 .open = simple_open,
1173 .write = xgene_edac_l3_inject_ctrl_write,
1174 .llseek = generic_file_llseek
1175};
1176
1177static void
1178xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1179{
1180 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1181 struct dentry *dbgfs_dir;
1182 char name[10];
1183
1184 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1185 return;
1186
1187 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1188 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1189 if (!dbgfs_dir)
1190 return;
1191
1192 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1193 &xgene_edac_l3_debug_inject_fops);
1194}
1195
1196static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1197 int version)
1198{
1199 struct edac_device_ctl_info *edac_dev;
1200 struct xgene_edac_dev_ctx *ctx;
1201 struct resource res;
1202 void __iomem *dev_csr;
1203 int edac_idx;
1204 int rc = 0;
1205
1206 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1207 return -ENOMEM;
1208
1209 rc = of_address_to_resource(np, 0, &res);
1210 if (rc < 0) {
1211 dev_err(edac->dev, "no L3 resource address\n");
1212 goto err_release_group;
1213 }
1214 dev_csr = devm_ioremap_resource(edac->dev, &res);
1215 if (IS_ERR(dev_csr)) {
1216 dev_err(edac->dev,
1217 "devm_ioremap_resource failed for L3 resource address\n");
1218 rc = PTR_ERR(dev_csr);
1219 goto err_release_group;
1220 }
1221
1222 edac_idx = edac_device_alloc_index();
1223 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1224 "l3c", 1, "l3c", 1, 0, NULL, 0,
1225 edac_idx);
1226 if (!edac_dev) {
1227 rc = -ENOMEM;
1228 goto err_release_group;
1229 }
1230
1231 ctx = edac_dev->pvt_info;
1232 ctx->dev_csr = dev_csr;
1233 ctx->name = "xgene_l3_err";
1234 ctx->edac_idx = edac_idx;
1235 ctx->edac = edac;
1236 ctx->edac_dev = edac_dev;
1237 ctx->ddev = *edac->dev;
1238 ctx->version = version;
1239 edac_dev->dev = &ctx->ddev;
1240 edac_dev->ctl_name = ctx->name;
1241 edac_dev->dev_name = ctx->name;
1242 edac_dev->mod_name = EDAC_MOD_STR;
1243
1244 if (edac_op_state == EDAC_OPSTATE_POLL)
1245 edac_dev->edac_check = xgene_edac_l3_check;
1246
1247 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1248
1249 rc = edac_device_add_device(edac_dev);
1250 if (rc > 0) {
1251 dev_err(edac->dev, "failed edac_device_add_device()\n");
1252 rc = -ENOMEM;
1253 goto err_ctl_free;
1254 }
1255
1256 if (edac_op_state == EDAC_OPSTATE_INT)
1257 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1258
1259 list_add(&ctx->next, &edac->l3s);
1260
1261 xgene_edac_l3_hw_init(edac_dev, 1);
1262
1263 devres_remove_group(edac->dev, xgene_edac_l3_add);
1264
1265 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1266 return 0;
1267
1268err_ctl_free:
1269 edac_device_free_ctl_info(edac_dev);
1270err_release_group:
1271 devres_release_group(edac->dev, xgene_edac_l3_add);
1272 return rc;
1273}
1274
1275static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1276{
1277 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1278
1279 xgene_edac_l3_hw_init(edac_dev, 0);
1280 edac_device_del_device(l3->edac->dev);
1281 edac_device_free_ctl_info(edac_dev);
1282 return 0;
1283}
1284
Loc Hof864b792015-09-23 17:41:00 -07001285/* SoC error device */
1286#define IOBAXIS0TRANSERRINTSTS 0x0000
1287#define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1)
1288#define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0)
1289#define IOBAXIS0TRANSERRINTMSK 0x0004
1290#define IOBAXIS0TRANSERRREQINFOL 0x0008
1291#define IOBAXIS0TRANSERRREQINFOH 0x000c
1292#define REQTYPE_RD(src) (((src) & BIT(0)))
1293#define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22)
1294#define IOBAXIS1TRANSERRINTSTS 0x0010
1295#define IOBAXIS1TRANSERRINTMSK 0x0014
1296#define IOBAXIS1TRANSERRREQINFOL 0x0018
1297#define IOBAXIS1TRANSERRREQINFOH 0x001c
1298#define IOBPATRANSERRINTSTS 0x0020
1299#define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7)
1300#define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6)
1301#define IOBPA_M_TRANS_CORRUPT_MASK BIT(5)
1302#define IOBPA_TRANS_CORRUPT_MASK BIT(4)
1303#define IOBPA_M_WDATA_CORRUPT_MASK BIT(3)
1304#define IOBPA_WDATA_CORRUPT_MASK BIT(2)
1305#define IOBPA_M_RDATA_CORRUPT_MASK BIT(1)
1306#define IOBPA_RDATA_CORRUPT_MASK BIT(0)
1307#define IOBBATRANSERRINTSTS 0x0030
1308#define M_ILLEGAL_ACCESS_MASK BIT(15)
1309#define ILLEGAL_ACCESS_MASK BIT(14)
1310#define M_WIDRAM_CORRUPT_MASK BIT(13)
1311#define WIDRAM_CORRUPT_MASK BIT(12)
1312#define M_RIDRAM_CORRUPT_MASK BIT(11)
1313#define RIDRAM_CORRUPT_MASK BIT(10)
1314#define M_TRANS_CORRUPT_MASK BIT(9)
1315#define TRANS_CORRUPT_MASK BIT(8)
1316#define M_WDATA_CORRUPT_MASK BIT(7)
1317#define WDATA_CORRUPT_MASK BIT(6)
1318#define M_RBM_POISONED_REQ_MASK BIT(5)
1319#define RBM_POISONED_REQ_MASK BIT(4)
1320#define M_XGIC_POISONED_REQ_MASK BIT(3)
1321#define XGIC_POISONED_REQ_MASK BIT(2)
1322#define M_WRERR_RESP_MASK BIT(1)
1323#define WRERR_RESP_MASK BIT(0)
1324#define IOBBATRANSERRREQINFOL 0x0038
1325#define IOBBATRANSERRREQINFOH 0x003c
1326#define REQTYPE_F2_RD(src) ((src) & BIT(0))
1327#define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22)
1328#define IOBBATRANSERRCSWREQID 0x0040
1329#define XGICTRANSERRINTSTS 0x0050
1330#define M_WR_ACCESS_ERR_MASK BIT(3)
1331#define WR_ACCESS_ERR_MASK BIT(2)
1332#define M_RD_ACCESS_ERR_MASK BIT(1)
1333#define RD_ACCESS_ERR_MASK BIT(0)
1334#define XGICTRANSERRINTMSK 0x0054
1335#define XGICTRANSERRREQINFO 0x0058
1336#define REQTYPE_MASK BIT(26)
1337#define ERRADDR_RD(src) ((src) & 0x03ffffff)
1338#define GLBL_ERR_STS 0x0800
1339#define MDED_ERR_MASK BIT(3)
1340#define DED_ERR_MASK BIT(2)
1341#define MSEC_ERR_MASK BIT(1)
1342#define SEC_ERR_MASK BIT(0)
1343#define GLBL_SEC_ERRL 0x0810
1344#define GLBL_SEC_ERRH 0x0818
1345#define GLBL_MSEC_ERRL 0x0820
1346#define GLBL_MSEC_ERRH 0x0828
1347#define GLBL_DED_ERRL 0x0830
1348#define GLBL_DED_ERRLMASK 0x0834
1349#define GLBL_DED_ERRH 0x0838
1350#define GLBL_DED_ERRHMASK 0x083c
1351#define GLBL_MDED_ERRL 0x0840
1352#define GLBL_MDED_ERRLMASK 0x0844
1353#define GLBL_MDED_ERRH 0x0848
1354#define GLBL_MDED_ERRHMASK 0x084c
1355
Loc Ho4d67e3c2016-01-22 13:47:04 -07001356/* IO Bus Registers */
1357#define RBCSR 0x0000
1358#define STICKYERR_MASK BIT(0)
1359#define RBEIR 0x0008
1360#define AGENT_OFFLINE_ERR_MASK BIT(30)
1361#define UNIMPL_RBPAGE_ERR_MASK BIT(29)
1362#define WORD_ALIGNED_ERR_MASK BIT(28)
1363#define PAGE_ACCESS_ERR_MASK BIT(27)
1364#define WRITE_ACCESS_MASK BIT(26)
1365#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF)
1366
Loc Hof864b792015-09-23 17:41:00 -07001367static const char * const soc_mem_err_v1[] = {
1368 "10GbE0",
1369 "10GbE1",
1370 "Security",
1371 "SATA45",
1372 "SATA23/ETH23",
1373 "SATA01/ETH01",
1374 "USB1",
1375 "USB0",
1376 "QML",
1377 "QM0",
1378 "QM1 (XGbE01)",
1379 "PCIE4",
1380 "PCIE3",
1381 "PCIE2",
1382 "PCIE1",
1383 "PCIE0",
1384 "CTX Manager",
1385 "OCM",
1386 "1GbE",
1387 "CLE",
1388 "AHBC",
1389 "PktDMA",
1390 "GFC",
1391 "MSLIM",
1392 "10GbE2",
1393 "10GbE3",
1394 "QM2 (XGbE23)",
1395 "IOB",
1396 "unknown",
1397 "unknown",
1398 "unknown",
1399 "unknown",
1400};
1401
1402static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1403{
1404 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1405 u32 err_addr_lo;
1406 u32 err_addr_hi;
1407 u32 reg;
1408 u32 info;
1409
1410 /* GIC transaction error interrupt */
1411 reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1412 if (!reg)
1413 goto chk_iob_err;
1414 dev_err(edac_dev->dev, "XGIC transaction error\n");
1415 if (reg & RD_ACCESS_ERR_MASK)
1416 dev_err(edac_dev->dev, "XGIC read size error\n");
1417 if (reg & M_RD_ACCESS_ERR_MASK)
1418 dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1419 if (reg & WR_ACCESS_ERR_MASK)
1420 dev_err(edac_dev->dev, "XGIC write size error\n");
1421 if (reg & M_WR_ACCESS_ERR_MASK)
1422 dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1423 info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1424 dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1425 info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1426 info);
1427 writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1428
1429chk_iob_err:
1430 /* IOB memory error */
1431 reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1432 if (!reg)
1433 return;
1434 if (reg & SEC_ERR_MASK) {
1435 err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1436 err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1437 dev_err(edac_dev->dev,
1438 "IOB single-bit correctable memory at 0x%08X.%08X error\n",
1439 err_addr_lo, err_addr_hi);
1440 writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1441 writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1442 }
1443 if (reg & MSEC_ERR_MASK) {
1444 err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1445 err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1446 dev_err(edac_dev->dev,
1447 "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1448 err_addr_lo, err_addr_hi);
1449 writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1450 writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1451 }
1452 if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1453 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1454
1455 if (reg & DED_ERR_MASK) {
1456 err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1457 err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1458 dev_err(edac_dev->dev,
1459 "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1460 err_addr_lo, err_addr_hi);
1461 writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1462 writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1463 }
1464 if (reg & MDED_ERR_MASK) {
1465 err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1466 err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1467 dev_err(edac_dev->dev,
1468 "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1469 err_addr_lo, err_addr_hi);
1470 writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1471 writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1472 }
1473 if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1474 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1475}
1476
1477static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1478{
1479 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1480 u32 err_addr_lo;
1481 u32 err_addr_hi;
1482 u32 reg;
1483
Loc Ho4d67e3c2016-01-22 13:47:04 -07001484 /* If the register bus resource isn't available, just skip it */
1485 if (!ctx->edac->rb_map)
1486 goto rb_skip;
1487
1488 /*
1489 * Check RB access errors
1490 * 1. Out of range
1491 * 2. Un-implemented page
1492 * 3. Un-aligned access
1493 * 4. Offline slave IP
1494 */
1495 if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1496 return;
1497 if (reg & STICKYERR_MASK) {
1498 bool write;
1499 u32 address;
1500
1501 dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1502 if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1503 return;
1504 write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1505 address = RBERRADDR_RD(reg);
1506 if (reg & AGENT_OFFLINE_ERR_MASK)
1507 dev_err(edac_dev->dev,
1508 "IOB bus %s access to offline agent error\n",
1509 write ? "write" : "read");
1510 if (reg & UNIMPL_RBPAGE_ERR_MASK)
1511 dev_err(edac_dev->dev,
1512 "IOB bus %s access to unimplemented page error\n",
1513 write ? "write" : "read");
1514 if (reg & WORD_ALIGNED_ERR_MASK)
1515 dev_err(edac_dev->dev,
1516 "IOB bus %s word aligned access error\n",
1517 write ? "write" : "read");
1518 if (reg & PAGE_ACCESS_ERR_MASK)
1519 dev_err(edac_dev->dev,
1520 "IOB bus %s to page out of range access error\n",
1521 write ? "write" : "read");
1522 if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1523 return;
1524 if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1525 return;
1526 }
1527rb_skip:
1528
Loc Hof864b792015-09-23 17:41:00 -07001529 /* IOB Bridge agent transaction error interrupt */
1530 reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1531 if (!reg)
1532 return;
1533
1534 dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1535 if (reg & WRERR_RESP_MASK)
1536 dev_err(edac_dev->dev, "IOB BA write response error\n");
1537 if (reg & M_WRERR_RESP_MASK)
1538 dev_err(edac_dev->dev,
1539 "Multiple IOB BA write response error\n");
1540 if (reg & XGIC_POISONED_REQ_MASK)
1541 dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1542 if (reg & M_XGIC_POISONED_REQ_MASK)
1543 dev_err(edac_dev->dev,
1544 "Multiple IOB BA XGIC poisoned write error\n");
1545 if (reg & RBM_POISONED_REQ_MASK)
1546 dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1547 if (reg & M_RBM_POISONED_REQ_MASK)
1548 dev_err(edac_dev->dev,
1549 "Multiple IOB BA RBM poisoned write error\n");
1550 if (reg & WDATA_CORRUPT_MASK)
1551 dev_err(edac_dev->dev, "IOB BA write error\n");
1552 if (reg & M_WDATA_CORRUPT_MASK)
1553 dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1554 if (reg & TRANS_CORRUPT_MASK)
1555 dev_err(edac_dev->dev, "IOB BA transaction error\n");
1556 if (reg & M_TRANS_CORRUPT_MASK)
1557 dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1558 if (reg & RIDRAM_CORRUPT_MASK)
1559 dev_err(edac_dev->dev,
1560 "IOB BA RDIDRAM read transaction ID error\n");
1561 if (reg & M_RIDRAM_CORRUPT_MASK)
1562 dev_err(edac_dev->dev,
1563 "Multiple IOB BA RDIDRAM read transaction ID error\n");
1564 if (reg & WIDRAM_CORRUPT_MASK)
1565 dev_err(edac_dev->dev,
1566 "IOB BA RDIDRAM write transaction ID error\n");
1567 if (reg & M_WIDRAM_CORRUPT_MASK)
1568 dev_err(edac_dev->dev,
1569 "Multiple IOB BA RDIDRAM write transaction ID error\n");
1570 if (reg & ILLEGAL_ACCESS_MASK)
1571 dev_err(edac_dev->dev,
1572 "IOB BA XGIC/RB illegal access error\n");
1573 if (reg & M_ILLEGAL_ACCESS_MASK)
1574 dev_err(edac_dev->dev,
1575 "Multiple IOB BA XGIC/RB illegal access error\n");
1576
1577 err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1578 err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1579 dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1580 REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1581 ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1582 if (reg & WRERR_RESP_MASK)
1583 dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1584 readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1585 writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1586}
1587
1588static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1589{
1590 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1591 u32 err_addr_lo;
1592 u32 err_addr_hi;
1593 u32 reg;
1594
1595 /* IOB Processing agent transaction error interrupt */
1596 reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1597 if (!reg)
1598 goto chk_iob_axi0;
Colin Ian King4bd035e2017-02-23 00:26:09 +00001599 dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
Loc Hof864b792015-09-23 17:41:00 -07001600 if (reg & IOBPA_RDATA_CORRUPT_MASK)
1601 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1602 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1603 dev_err(edac_dev->dev,
Colin Ian King81761702016-11-14 23:11:04 +00001604 "Multiple IOB PA read data RAM error\n");
Loc Hof864b792015-09-23 17:41:00 -07001605 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1606 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1607 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1608 dev_err(edac_dev->dev,
Colin Ian King81761702016-11-14 23:11:04 +00001609 "Multiple IOB PA write data RAM error\n");
Loc Hof864b792015-09-23 17:41:00 -07001610 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1611 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1612 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
Colin Ian King81761702016-11-14 23:11:04 +00001613 dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
Loc Hof864b792015-09-23 17:41:00 -07001614 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1615 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1616 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1617 dev_err(edac_dev->dev,
1618 "Multiple IOB PA transaction ID RAM error\n");
1619 writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1620
1621chk_iob_axi0:
1622 /* IOB AXI0 Error */
1623 reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1624 if (!reg)
1625 goto chk_iob_axi1;
1626 err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1627 err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1628 dev_err(edac_dev->dev,
1629 "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1630 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1631 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1632 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1633 writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1634
1635chk_iob_axi1:
1636 /* IOB AXI1 Error */
1637 reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1638 if (!reg)
1639 return;
1640 err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1641 err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1642 dev_err(edac_dev->dev,
1643 "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1644 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1645 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1646 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1647 writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1648}
1649
1650static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1651{
1652 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1653 const char * const *soc_mem_err = NULL;
1654 u32 pcp_hp_stat;
1655 u32 pcp_lp_stat;
1656 u32 reg;
1657 int i;
1658
1659 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1660 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1661 xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1662 if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1663 IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1664 (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1665 return;
1666
1667 if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1668 xgene_edac_iob_gic_report(edac_dev);
1669
1670 if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1671 xgene_edac_rb_report(edac_dev);
1672
1673 if (pcp_hp_stat & IOB_PA_ERR_MASK)
1674 xgene_edac_pa_report(edac_dev);
1675
1676 if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1677 dev_info(edac_dev->dev,
1678 "CSW switch trace correctable memory parity error\n");
1679 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1680 }
1681
1682 if (!reg)
1683 return;
1684 if (ctx->version == 1)
1685 soc_mem_err = soc_mem_err_v1;
1686 if (!soc_mem_err) {
1687 dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1688 reg);
1689 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1690 return;
1691 }
1692 for (i = 0; i < 31; i++) {
1693 if (reg & (1 << i)) {
1694 dev_err(edac_dev->dev, "%s memory parity error\n",
1695 soc_mem_err[i]);
1696 edac_device_handle_ue(edac_dev, 0, 0,
1697 edac_dev->ctl_name);
1698 }
1699 }
1700}
1701
1702static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1703 bool enable)
1704{
1705 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1706
1707 /* Enable SoC IP error interrupt */
1708 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1709 if (enable) {
1710 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1711 IOB_PA_ERR_MASK |
1712 IOB_BA_ERR_MASK |
1713 IOB_XGIC_ERR_MASK |
1714 IOB_RB_ERR_MASK);
1715 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1716 CSW_SWITCH_TRACE_ERR_MASK);
1717 } else {
1718 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1719 IOB_PA_ERR_MASK |
1720 IOB_BA_ERR_MASK |
1721 IOB_XGIC_ERR_MASK |
1722 IOB_RB_ERR_MASK);
1723 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1724 CSW_SWITCH_TRACE_ERR_MASK);
1725 }
1726
1727 writel(enable ? 0x0 : 0xFFFFFFFF,
1728 ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1729 writel(enable ? 0x0 : 0xFFFFFFFF,
1730 ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1731 writel(enable ? 0x0 : 0xFFFFFFFF,
1732 ctx->dev_csr + XGICTRANSERRINTMSK);
1733
1734 xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1735 enable ? 0x0 : 0xFFFFFFFF);
1736 }
1737}
1738
1739static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1740 int version)
1741{
1742 struct edac_device_ctl_info *edac_dev;
1743 struct xgene_edac_dev_ctx *ctx;
1744 void __iomem *dev_csr;
1745 struct resource res;
1746 int edac_idx;
1747 int rc;
1748
1749 if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1750 return -ENOMEM;
1751
1752 rc = of_address_to_resource(np, 0, &res);
1753 if (rc < 0) {
1754 dev_err(edac->dev, "no SoC resource address\n");
1755 goto err_release_group;
1756 }
1757 dev_csr = devm_ioremap_resource(edac->dev, &res);
1758 if (IS_ERR(dev_csr)) {
1759 dev_err(edac->dev,
1760 "devm_ioremap_resource failed for soc resource address\n");
1761 rc = PTR_ERR(dev_csr);
1762 goto err_release_group;
1763 }
1764
1765 edac_idx = edac_device_alloc_index();
1766 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1767 "SOC", 1, "SOC", 1, 2, NULL, 0,
1768 edac_idx);
1769 if (!edac_dev) {
1770 rc = -ENOMEM;
1771 goto err_release_group;
1772 }
1773
1774 ctx = edac_dev->pvt_info;
1775 ctx->dev_csr = dev_csr;
1776 ctx->name = "xgene_soc_err";
1777 ctx->edac_idx = edac_idx;
1778 ctx->edac = edac;
1779 ctx->edac_dev = edac_dev;
1780 ctx->ddev = *edac->dev;
1781 ctx->version = version;
1782 edac_dev->dev = &ctx->ddev;
1783 edac_dev->ctl_name = ctx->name;
1784 edac_dev->dev_name = ctx->name;
1785 edac_dev->mod_name = EDAC_MOD_STR;
1786
1787 if (edac_op_state == EDAC_OPSTATE_POLL)
1788 edac_dev->edac_check = xgene_edac_soc_check;
1789
1790 rc = edac_device_add_device(edac_dev);
1791 if (rc > 0) {
1792 dev_err(edac->dev, "failed edac_device_add_device()\n");
1793 rc = -ENOMEM;
1794 goto err_ctl_free;
1795 }
1796
1797 if (edac_op_state == EDAC_OPSTATE_INT)
1798 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1799
1800 list_add(&ctx->next, &edac->socs);
1801
1802 xgene_edac_soc_hw_init(edac_dev, 1);
1803
1804 devres_remove_group(edac->dev, xgene_edac_soc_add);
1805
1806 dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1807
1808 return 0;
1809
1810err_ctl_free:
1811 edac_device_free_ctl_info(edac_dev);
1812err_release_group:
1813 devres_release_group(edac->dev, xgene_edac_soc_add);
1814 return rc;
1815}
1816
1817static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1818{
1819 struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1820
1821 xgene_edac_soc_hw_init(edac_dev, 0);
1822 edac_device_del_device(soc->edac->dev);
1823 edac_device_free_ctl_info(edac_dev);
1824 return 0;
1825}
1826
Loc Ho0d442932015-05-22 17:32:59 -06001827static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1828{
1829 struct xgene_edac *ctx = dev_id;
1830 struct xgene_edac_pmd_ctx *pmd;
Loc Ho93474732015-09-23 17:40:59 -07001831 struct xgene_edac_dev_ctx *node;
Loc Ho0d442932015-05-22 17:32:59 -06001832 unsigned int pcp_hp_stat;
1833 unsigned int pcp_lp_stat;
1834
1835 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1836 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1837 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1838 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1839 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1840 struct xgene_edac_mc_ctx *mcu;
1841
Loc Ho93474732015-09-23 17:40:59 -07001842 list_for_each_entry(mcu, &ctx->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001843 xgene_edac_mc_check(mcu->mci);
Loc Ho0d442932015-05-22 17:32:59 -06001844 }
1845
1846 list_for_each_entry(pmd, &ctx->pmds, next) {
1847 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1848 xgene_edac_pmd_check(pmd->edac_dev);
1849 }
1850
Loc Ho93474732015-09-23 17:40:59 -07001851 list_for_each_entry(node, &ctx->l3s, next)
1852 xgene_edac_l3_check(node->edac_dev);
1853
Loc Hof864b792015-09-23 17:41:00 -07001854 list_for_each_entry(node, &ctx->socs, next)
1855 xgene_edac_soc_check(node->edac_dev);
1856
Loc Ho0d442932015-05-22 17:32:59 -06001857 return IRQ_HANDLED;
1858}
1859
1860static int xgene_edac_probe(struct platform_device *pdev)
1861{
1862 struct xgene_edac *edac;
1863 struct device_node *child;
1864 struct resource *res;
1865 int rc;
1866
1867 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1868 if (!edac)
1869 return -ENOMEM;
1870
1871 edac->dev = &pdev->dev;
1872 platform_set_drvdata(pdev, edac);
1873 INIT_LIST_HEAD(&edac->mcus);
1874 INIT_LIST_HEAD(&edac->pmds);
Loc Ho93474732015-09-23 17:40:59 -07001875 INIT_LIST_HEAD(&edac->l3s);
Loc Hof864b792015-09-23 17:41:00 -07001876 INIT_LIST_HEAD(&edac->socs);
Loc Ho0d442932015-05-22 17:32:59 -06001877 spin_lock_init(&edac->lock);
1878 mutex_init(&edac->mc_lock);
1879
1880 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1881 "regmap-csw");
1882 if (IS_ERR(edac->csw_map)) {
1883 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1884 rc = PTR_ERR(edac->csw_map);
1885 goto out_err;
1886 }
1887
1888 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1889 "regmap-mcba");
1890 if (IS_ERR(edac->mcba_map)) {
1891 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1892 rc = PTR_ERR(edac->mcba_map);
1893 goto out_err;
1894 }
1895
1896 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1897 "regmap-mcbb");
1898 if (IS_ERR(edac->mcbb_map)) {
1899 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1900 rc = PTR_ERR(edac->mcbb_map);
1901 goto out_err;
1902 }
1903 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1904 "regmap-efuse");
1905 if (IS_ERR(edac->efuse_map)) {
1906 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1907 rc = PTR_ERR(edac->efuse_map);
1908 goto out_err;
1909 }
1910
Loc Ho4d67e3c2016-01-22 13:47:04 -07001911 /*
1912 * NOTE: The register bus resource is optional for compatibility
1913 * reason.
1914 */
1915 edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1916 "regmap-rb");
1917 if (IS_ERR(edac->rb_map)) {
1918 dev_warn(edac->dev, "missing syscon regmap rb\n");
1919 edac->rb_map = NULL;
1920 }
1921
Loc Ho0d442932015-05-22 17:32:59 -06001922 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1923 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1924 if (IS_ERR(edac->pcp_csr)) {
1925 dev_err(&pdev->dev, "no PCP resource address\n");
1926 rc = PTR_ERR(edac->pcp_csr);
1927 goto out_err;
1928 }
1929
1930 if (edac_op_state == EDAC_OPSTATE_INT) {
1931 int irq;
1932 int i;
1933
1934 for (i = 0; i < 3; i++) {
1935 irq = platform_get_irq(pdev, i);
1936 if (irq < 0) {
1937 dev_err(&pdev->dev, "No IRQ resource\n");
1938 rc = -EINVAL;
1939 goto out_err;
1940 }
1941 rc = devm_request_irq(&pdev->dev, irq,
1942 xgene_edac_isr, IRQF_SHARED,
1943 dev_name(&pdev->dev), edac);
1944 if (rc) {
1945 dev_err(&pdev->dev,
1946 "Could not request IRQ %d\n", irq);
1947 goto out_err;
1948 }
1949 }
1950 }
1951
Loc Ho93474732015-09-23 17:40:59 -07001952 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1953
Loc Ho0d442932015-05-22 17:32:59 -06001954 for_each_child_of_node(pdev->dev.of_node, child) {
1955 if (!of_device_is_available(child))
1956 continue;
1957 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1958 xgene_edac_mc_add(edac, child);
1959 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
Arnd Bergmann451bb7f2015-06-01 16:09:35 -06001960 xgene_edac_pmd_add(edac, child, 1);
1961 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1962 xgene_edac_pmd_add(edac, child, 2);
Loc Ho93474732015-09-23 17:40:59 -07001963 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1964 xgene_edac_l3_add(edac, child, 1);
1965 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1966 xgene_edac_l3_add(edac, child, 2);
Loc Hof864b792015-09-23 17:41:00 -07001967 if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1968 xgene_edac_soc_add(edac, child, 0);
1969 if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1970 xgene_edac_soc_add(edac, child, 1);
Loc Ho0d442932015-05-22 17:32:59 -06001971 }
1972
1973 return 0;
1974
1975out_err:
1976 return rc;
1977}
1978
1979static int xgene_edac_remove(struct platform_device *pdev)
1980{
1981 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1982 struct xgene_edac_mc_ctx *mcu;
1983 struct xgene_edac_mc_ctx *temp_mcu;
1984 struct xgene_edac_pmd_ctx *pmd;
1985 struct xgene_edac_pmd_ctx *temp_pmd;
Loc Ho93474732015-09-23 17:40:59 -07001986 struct xgene_edac_dev_ctx *node;
1987 struct xgene_edac_dev_ctx *temp_node;
Loc Ho0d442932015-05-22 17:32:59 -06001988
Loc Ho93474732015-09-23 17:40:59 -07001989 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001990 xgene_edac_mc_remove(mcu);
Loc Ho0d442932015-05-22 17:32:59 -06001991
Loc Ho93474732015-09-23 17:40:59 -07001992 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
Loc Ho0d442932015-05-22 17:32:59 -06001993 xgene_edac_pmd_remove(pmd);
Loc Ho93474732015-09-23 17:40:59 -07001994
1995 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1996 xgene_edac_l3_remove(node);
1997
Loc Hof864b792015-09-23 17:41:00 -07001998 list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1999 xgene_edac_soc_remove(node);
2000
Loc Ho0d442932015-05-22 17:32:59 -06002001 return 0;
2002}
2003
2004static const struct of_device_id xgene_edac_of_match[] = {
2005 { .compatible = "apm,xgene-edac" },
2006 {},
2007};
2008MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
2009
2010static struct platform_driver xgene_edac_driver = {
2011 .probe = xgene_edac_probe,
2012 .remove = xgene_edac_remove,
2013 .driver = {
2014 .name = "xgene-edac",
Loc Ho0d442932015-05-22 17:32:59 -06002015 .of_match_table = xgene_edac_of_match,
2016 },
2017};
2018
2019static int __init xgene_edac_init(void)
2020{
2021 int rc;
2022
2023 /* Make sure error reporting method is sane */
2024 switch (edac_op_state) {
2025 case EDAC_OPSTATE_POLL:
2026 case EDAC_OPSTATE_INT:
2027 break;
2028 default:
2029 edac_op_state = EDAC_OPSTATE_INT;
2030 break;
2031 }
2032
2033 rc = platform_driver_register(&xgene_edac_driver);
2034 if (rc) {
2035 edac_printk(KERN_ERR, EDAC_MOD_STR,
2036 "EDAC fails to register\n");
2037 goto reg_failed;
2038 }
2039
2040 return 0;
2041
2042reg_failed:
2043 return rc;
2044}
2045module_init(xgene_edac_init);
2046
2047static void __exit xgene_edac_exit(void)
2048{
2049 platform_driver_unregister(&xgene_edac_driver);
2050}
2051module_exit(xgene_edac_exit);
2052
2053MODULE_LICENSE("GPL");
2054MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2055MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2056module_param(edac_op_state, int, 0444);
2057MODULE_PARM_DESC(edac_op_state,
2058 "EDAC error reporting state: 0=Poll, 2=Interrupt");