blob: aee03c21632a2648014ba0875f98843e61cf9d7a [file] [log] [blame]
Loc Ho0d442932015-05-22 17:32:59 -06001/*
2 * APM X-Gene SoC EDAC (error detection and correction)
3 *
4 * Copyright (c) 2015, Applied Micro Circuits Corporation
5 * Author: Feng Kan <fkan@apm.com>
6 * Loc Ho <lho@apm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <linux/ctype.h>
23#include <linux/edac.h>
24#include <linux/interrupt.h>
25#include <linux/mfd/syscon.h>
26#include <linux/module.h>
27#include <linux/of.h>
28#include <linux/of_address.h>
29#include <linux/regmap.h>
30
31#include "edac_core.h"
Borislav Petkov09bd1b42015-09-22 13:13:46 +020032#include "edac_module.h"
Loc Ho0d442932015-05-22 17:32:59 -060033
34#define EDAC_MOD_STR "xgene_edac"
35
36/* Global error configuration status registers (CSR) */
37#define PCPHPERRINTSTS 0x0000
38#define PCPHPERRINTMSK 0x0004
39#define MCU_CTL_ERR_MASK BIT(12)
40#define IOB_PA_ERR_MASK BIT(11)
41#define IOB_BA_ERR_MASK BIT(10)
42#define IOB_XGIC_ERR_MASK BIT(9)
43#define IOB_RB_ERR_MASK BIT(8)
44#define L3C_UNCORR_ERR_MASK BIT(5)
45#define MCU_UNCORR_ERR_MASK BIT(4)
46#define PMD3_MERR_MASK BIT(3)
47#define PMD2_MERR_MASK BIT(2)
48#define PMD1_MERR_MASK BIT(1)
49#define PMD0_MERR_MASK BIT(0)
50#define PCPLPERRINTSTS 0x0008
51#define PCPLPERRINTMSK 0x000C
52#define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
53#define L3C_CORR_ERR_MASK BIT(1)
54#define MCU_CORR_ERR_MASK BIT(0)
55#define MEMERRINTSTS 0x0010
56#define MEMERRINTMSK 0x0014
57
58struct xgene_edac {
59 struct device *dev;
60 struct regmap *csw_map;
61 struct regmap *mcba_map;
62 struct regmap *mcbb_map;
63 struct regmap *efuse_map;
64 void __iomem *pcp_csr;
65 spinlock_t lock;
Loc Ho93474732015-09-23 17:40:59 -070066 struct dentry *dfs;
Loc Ho0d442932015-05-22 17:32:59 -060067
68 struct list_head mcus;
69 struct list_head pmds;
Loc Ho93474732015-09-23 17:40:59 -070070 struct list_head l3s;
Loc Ho0d442932015-05-22 17:32:59 -060071
72 struct mutex mc_lock;
73 int mc_active_mask;
74 int mc_registered_mask;
75};
76
77static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
78{
79 *val = readl(edac->pcp_csr + reg);
80}
81
82static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
83 u32 bits_mask)
84{
85 u32 val;
86
87 spin_lock(&edac->lock);
88 val = readl(edac->pcp_csr + reg);
89 val &= ~bits_mask;
90 writel(val, edac->pcp_csr + reg);
91 spin_unlock(&edac->lock);
92}
93
94static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
95 u32 bits_mask)
96{
97 u32 val;
98
99 spin_lock(&edac->lock);
100 val = readl(edac->pcp_csr + reg);
101 val |= bits_mask;
102 writel(val, edac->pcp_csr + reg);
103 spin_unlock(&edac->lock);
104}
105
106/* Memory controller error CSR */
107#define MCU_MAX_RANK 8
108#define MCU_RANK_STRIDE 0x40
109
110#define MCUGECR 0x0110
111#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
112#define MCU_GECR_BACKUCINTREN_MASK BIT(1)
113#define MCU_GECR_CINTREN_MASK BIT(2)
114#define MUC_GECR_MCUADDRERREN_MASK BIT(9)
115#define MCUGESR 0x0114
116#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
117#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
118#define MCU_GESR_PHYP_ERR_MASK BIT(3)
119#define MCUESRR0 0x0314
120#define MCU_ESRR_MULTUCERR_MASK BIT(3)
121#define MCU_ESRR_BACKUCERR_MASK BIT(2)
122#define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
123#define MCU_ESRR_CERR_MASK BIT(0)
124#define MCUESRRA0 0x0318
125#define MCUEBLRR0 0x031c
126#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
127#define MCUERCRR0 0x0320
128#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
129#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
130#define MCUSBECNT0 0x0324
131#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
132
133#define CSW_CSWCR 0x0000
134#define CSW_CSWCR_DUALMCB_MASK BIT(0)
135
136#define MCBADDRMR 0x0000
137#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
138#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
139#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
140#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
141
142struct xgene_edac_mc_ctx {
143 struct list_head next;
144 char *name;
145 struct mem_ctl_info *mci;
146 struct xgene_edac *edac;
147 void __iomem *mcu_csr;
148 u32 mcu_id;
149};
150
151static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
152 const char __user *data,
153 size_t count, loff_t *ppos)
154{
155 struct mem_ctl_info *mci = file->private_data;
156 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
157 int i;
158
159 for (i = 0; i < MCU_MAX_RANK; i++) {
160 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
161 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
162 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
163 }
164 return count;
165}
166
167static const struct file_operations xgene_edac_mc_debug_inject_fops = {
168 .open = simple_open,
169 .write = xgene_edac_mc_err_inject_write,
170 .llseek = generic_file_llseek,
171};
172
173static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
174{
175 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
176 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200177
Loc Ho0d442932015-05-22 17:32:59 -0600178 if (!mci->debugfs)
179 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200180
181 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
182 &xgene_edac_mc_debug_inject_fops);
Loc Ho0d442932015-05-22 17:32:59 -0600183}
184
185static void xgene_edac_mc_check(struct mem_ctl_info *mci)
186{
187 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
188 unsigned int pcp_hp_stat;
189 unsigned int pcp_lp_stat;
190 u32 reg;
191 u32 rank;
192 u32 bank;
193 u32 count;
194 u32 col_row;
195
196 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
197 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
198 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
199 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
200 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
201 return;
202
203 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
204 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
205
206 /* Detect uncorrectable memory error */
207 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
208 MCU_ESRR_BACKUCERR_MASK)) {
209 /* Detected uncorrectable memory error */
210 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
211 "MCU uncorrectable error at rank %d\n", rank);
212
213 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
214 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
215 }
216
217 /* Detect correctable memory error */
218 if (reg & MCU_ESRR_CERR_MASK) {
219 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
220 rank * MCU_RANK_STRIDE);
221 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
222 rank * MCU_RANK_STRIDE);
223 count = readl(ctx->mcu_csr + MCUSBECNT0 +
224 rank * MCU_RANK_STRIDE);
225 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
226 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
227 rank, MCU_EBLRR_ERRBANK_RD(bank),
228 MCU_ERCRR_ERRCOL_RD(col_row),
229 MCU_ERCRR_ERRROW_RD(col_row),
230 MCU_SBECNT_COUNT(count));
231
232 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
233 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
234 }
235
236 /* Clear all error registers */
237 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
238 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
239 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
240 rank * MCU_RANK_STRIDE);
241 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
242 }
243
244 /* Detect memory controller error */
245 reg = readl(ctx->mcu_csr + MCUGESR);
246 if (reg) {
247 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
248 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
249 "MCU address miss-match error\n");
250 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
251 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
252 "MCU address multi-match error\n");
253
254 writel(reg, ctx->mcu_csr + MCUGESR);
255 }
256}
257
258static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
259{
260 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
261 unsigned int val;
262
263 if (edac_op_state != EDAC_OPSTATE_INT)
264 return;
265
266 mutex_lock(&ctx->edac->mc_lock);
267
268 /*
269 * As there is only single bit for enable error and interrupt mask,
270 * we must only enable top level interrupt after all MCUs are
271 * registered. Otherwise, if there is an error and the corresponding
272 * MCU has not registered, the interrupt will never get cleared. To
273 * determine all MCU have registered, we will keep track of active
274 * MCUs and registered MCUs.
275 */
276 if (enable) {
277 /* Set registered MCU bit */
278 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
279
280 /* Enable interrupt after all active MCU registered */
281 if (ctx->edac->mc_registered_mask ==
282 ctx->edac->mc_active_mask) {
283 /* Enable memory controller top level interrupt */
284 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
285 MCU_UNCORR_ERR_MASK |
286 MCU_CTL_ERR_MASK);
287 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
288 MCU_CORR_ERR_MASK);
289 }
290
291 /* Enable MCU interrupt and error reporting */
292 val = readl(ctx->mcu_csr + MCUGECR);
293 val |= MCU_GECR_DEMANDUCINTREN_MASK |
294 MCU_GECR_BACKUCINTREN_MASK |
295 MCU_GECR_CINTREN_MASK |
296 MUC_GECR_MCUADDRERREN_MASK;
297 writel(val, ctx->mcu_csr + MCUGECR);
298 } else {
299 /* Disable MCU interrupt */
300 val = readl(ctx->mcu_csr + MCUGECR);
301 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
302 MCU_GECR_BACKUCINTREN_MASK |
303 MCU_GECR_CINTREN_MASK |
304 MUC_GECR_MCUADDRERREN_MASK);
305 writel(val, ctx->mcu_csr + MCUGECR);
306
307 /* Disable memory controller top level interrupt */
308 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
309 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
310 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
311 MCU_CORR_ERR_MASK);
312
313 /* Clear registered MCU bit */
314 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
315 }
316
317 mutex_unlock(&ctx->edac->mc_lock);
318}
319
320static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
321{
322 unsigned int reg;
323 u32 mcu_mask;
324
325 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
326 return 0;
327
328 if (reg & CSW_CSWCR_DUALMCB_MASK) {
329 /*
330 * Dual MCB active - Determine if all 4 active or just MCU0
331 * and MCU2 active
332 */
333 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
334 return 0;
335 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
336 } else {
337 /*
338 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
339 * active
340 */
341 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
342 return 0;
343 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
344 }
345
346 /* Save active MC mask if hasn't set already */
347 if (!ctx->edac->mc_active_mask)
348 ctx->edac->mc_active_mask = mcu_mask;
349
350 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
351}
352
353static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
354{
355 struct mem_ctl_info *mci;
356 struct edac_mc_layer layers[2];
357 struct xgene_edac_mc_ctx tmp_ctx;
358 struct xgene_edac_mc_ctx *ctx;
359 struct resource res;
360 int rc;
361
362 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
363 tmp_ctx.edac = edac;
364
365 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
366 return -ENOMEM;
367
368 rc = of_address_to_resource(np, 0, &res);
369 if (rc < 0) {
370 dev_err(edac->dev, "no MCU resource address\n");
371 goto err_group;
372 }
373 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
374 if (IS_ERR(tmp_ctx.mcu_csr)) {
375 dev_err(edac->dev, "unable to map MCU resource\n");
376 rc = PTR_ERR(tmp_ctx.mcu_csr);
377 goto err_group;
378 }
379
380 /* Ignore non-active MCU */
381 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
382 dev_err(edac->dev, "no memory-controller property\n");
383 rc = -ENODEV;
384 goto err_group;
385 }
386 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
387 rc = -ENODEV;
388 goto err_group;
389 }
390
391 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
392 layers[0].size = 4;
393 layers[0].is_virt_csrow = true;
394 layers[1].type = EDAC_MC_LAYER_CHANNEL;
395 layers[1].size = 2;
396 layers[1].is_virt_csrow = false;
397 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
398 sizeof(*ctx));
399 if (!mci) {
400 rc = -ENOMEM;
401 goto err_group;
402 }
403
404 ctx = mci->pvt_info;
405 *ctx = tmp_ctx; /* Copy over resource value */
406 ctx->name = "xgene_edac_mc_err";
407 ctx->mci = mci;
408 mci->pdev = &mci->dev;
409 mci->ctl_name = ctx->name;
410 mci->dev_name = ctx->name;
411
412 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
413 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
414 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
415 mci->edac_cap = EDAC_FLAG_SECDED;
416 mci->mod_name = EDAC_MOD_STR;
417 mci->mod_ver = "0.1";
418 mci->ctl_page_to_phys = NULL;
419 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
420 mci->scrub_mode = SCRUB_HW_SRC;
421
422 if (edac_op_state == EDAC_OPSTATE_POLL)
423 mci->edac_check = xgene_edac_mc_check;
424
425 if (edac_mc_add_mc(mci)) {
426 dev_err(edac->dev, "edac_mc_add_mc failed\n");
427 rc = -EINVAL;
428 goto err_free;
429 }
430
431 xgene_edac_mc_create_debugfs_node(mci);
432
433 list_add(&ctx->next, &edac->mcus);
434
435 xgene_edac_mc_irq_ctl(mci, true);
436
437 devres_remove_group(edac->dev, xgene_edac_mc_add);
438
439 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
440 return 0;
441
442err_free:
443 edac_mc_free(mci);
444err_group:
445 devres_release_group(edac->dev, xgene_edac_mc_add);
446 return rc;
447}
448
449static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
450{
451 xgene_edac_mc_irq_ctl(mcu->mci, false);
452 edac_mc_del_mc(&mcu->mci->dev);
453 edac_mc_free(mcu->mci);
454 return 0;
455}
456
457/* CPU L1/L2 error CSR */
458#define MAX_CPU_PER_PMD 2
459#define CPU_CSR_STRIDE 0x00100000
460#define CPU_L2C_PAGE 0x000D0000
461#define CPU_MEMERR_L2C_PAGE 0x000E0000
462#define CPU_MEMERR_CPU_PAGE 0x000F0000
463
464#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
465#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
466#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
467#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
468#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
469#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
470#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
471#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
472#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
473#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
474#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
475#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
476#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
477#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
478#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
479#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
480#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
481#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
482#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
483#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
484#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
485#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
486#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
487#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
488#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
489#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
490#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
491#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
492
493#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
494#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
495#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
496#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
497#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
498#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
499#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
500#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
501#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
502#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
503#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
504#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
505#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
506#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
507#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
508#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
509#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
510#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
511#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
512#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
513#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
514
515/*
516 * Processor Module Domain (PMD) context - Context for a pair of processsors.
517 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
518 * its own L1 cache.
519 */
520struct xgene_edac_pmd_ctx {
521 struct list_head next;
522 struct device ddev;
523 char *name;
524 struct xgene_edac *edac;
525 struct edac_device_ctl_info *edac_dev;
526 void __iomem *pmd_csr;
527 u32 pmd;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600528 int version;
Loc Ho0d442932015-05-22 17:32:59 -0600529};
530
531static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
532 int cpu_idx)
533{
534 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
535 void __iomem *pg_f;
536 u32 val;
537
538 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
539
540 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700541 if (!val)
542 goto chk_lsu;
543 dev_err(edac_dev->dev,
544 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
545 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
546 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
547 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
548 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
549 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
550 dev_err(edac_dev->dev, "One or more correctable error\n");
551 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
552 dev_err(edac_dev->dev, "Multiple correctable error\n");
553 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
554 case 1:
555 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
556 break;
557 case 2:
558 dev_err(edac_dev->dev, "Way select multiple hit\n");
559 break;
560 case 3:
561 dev_err(edac_dev->dev, "Physical tag parity error\n");
562 break;
563 case 4:
564 case 5:
565 dev_err(edac_dev->dev, "L1 data parity error\n");
566 break;
567 case 6:
568 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
569 break;
Loc Ho0d442932015-05-22 17:32:59 -0600570 }
571
Loc Ho93474732015-09-23 17:40:59 -0700572 /* Clear any HW errors */
573 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho0d442932015-05-22 17:32:59 -0600574
Loc Ho93474732015-09-23 17:40:59 -0700575 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
576 MEMERR_CPU_ICFESR_MULTCERR_MASK))
Loc Ho0d442932015-05-22 17:32:59 -0600577 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho93474732015-09-23 17:40:59 -0700578
579chk_lsu:
580 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
581 if (!val)
582 goto chk_mmu;
583 dev_err(edac_dev->dev,
584 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
585 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
586 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
587 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
588 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
589 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
590 dev_err(edac_dev->dev, "One or more correctable error\n");
591 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
592 dev_err(edac_dev->dev, "Multiple correctable error\n");
593 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
594 case 0:
595 dev_err(edac_dev->dev, "Load tag error\n");
596 break;
597 case 1:
598 dev_err(edac_dev->dev, "Load data error\n");
599 break;
600 case 2:
601 dev_err(edac_dev->dev, "WSL multihit error\n");
602 break;
603 case 3:
604 dev_err(edac_dev->dev, "Store tag error\n");
605 break;
606 case 4:
607 dev_err(edac_dev->dev,
608 "DTB multihit from load pipeline error\n");
609 break;
610 case 5:
611 dev_err(edac_dev->dev,
612 "DTB multihit from store pipeline error\n");
613 break;
Loc Ho0d442932015-05-22 17:32:59 -0600614 }
Loc Ho93474732015-09-23 17:40:59 -0700615
616 /* Clear any HW errors */
617 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
618
619 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
620 MEMERR_CPU_LSUESR_MULTCERR_MASK))
621 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
622
623chk_mmu:
624 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
625 if (!val)
626 return;
627 dev_err(edac_dev->dev,
628 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
629 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
630 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
631 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
632 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
633 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
634 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
635 dev_err(edac_dev->dev, "One or more correctable error\n");
636 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
637 dev_err(edac_dev->dev, "Multiple correctable error\n");
638 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
639 case 0:
640 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
641 break;
642 case 1:
643 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
644 break;
645 case 2:
646 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
647 break;
648 case 3:
649 dev_err(edac_dev->dev, "TMO operation single bank error\n");
650 break;
651 case 4:
652 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
653 break;
654 case 5:
655 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
656 break;
657 case 6:
658 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
659 break;
660 case 7:
661 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
662 break;
663 }
664
665 /* Clear any HW errors */
666 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
667
668 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho0d442932015-05-22 17:32:59 -0600669}
670
671static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
672{
673 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
674 void __iomem *pg_d;
675 void __iomem *pg_e;
676 u32 val_hi;
677 u32 val_lo;
678 u32 val;
679
680 /* Check L2 */
681 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
682 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700683 if (!val)
684 goto chk_l2c;
685 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
686 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
687 dev_err(edac_dev->dev,
688 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
689 ctx->pmd, val, val_hi, val_lo);
690 dev_err(edac_dev->dev,
691 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
692 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
693 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
694 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
695 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
696 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
Loc Ho0d442932015-05-22 17:32:59 -0600697
Loc Ho93474732015-09-23 17:40:59 -0700698 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
699 dev_err(edac_dev->dev, "One or more correctable error\n");
700 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
701 dev_err(edac_dev->dev, "Multiple correctable error\n");
702 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
703 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
704 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
705 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
Loc Ho0d442932015-05-22 17:32:59 -0600706
Loc Ho93474732015-09-23 17:40:59 -0700707 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
708 case 0:
709 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
710 break;
711 case 1:
712 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
713 break;
714 case 2:
715 dev_err(edac_dev->dev, "Tag ECC error\n");
716 break;
717 case 3:
718 dev_err(edac_dev->dev, "Data ECC error\n");
719 break;
Loc Ho0d442932015-05-22 17:32:59 -0600720 }
721
Loc Ho93474732015-09-23 17:40:59 -0700722 /* Clear any HW errors */
723 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
724
725 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
726 MEMERR_L2C_L2ESR_MULTICERR_MASK))
727 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
728 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
729 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
730 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
731
732chk_l2c:
Loc Ho0d442932015-05-22 17:32:59 -0600733 /* Check if any memory request timed out on L2 cache */
734 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
735 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
736 if (val) {
737 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
738 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
739 dev_err(edac_dev->dev,
740 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
741 ctx->pmd, val, val_hi, val_lo);
742 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
743 }
744}
745
746static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
747{
748 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
749 unsigned int pcp_hp_stat;
750 int i;
751
752 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
753 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
754 return;
755
756 /* Check CPU L1 error */
757 for (i = 0; i < MAX_CPU_PER_PMD; i++)
758 xgene_edac_pmd_l1_check(edac_dev, i);
759
760 /* Check CPU L2 error */
761 xgene_edac_pmd_l2_check(edac_dev);
762}
763
764static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
765 int cpu)
766{
767 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
768 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
769 CPU_MEMERR_CPU_PAGE;
770
771 /*
772 * Enable CPU memory error:
773 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
774 */
775 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
776 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
777 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
778}
779
Loc Ho0d442932015-05-22 17:32:59 -0600780static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
781{
782 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
783 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
784 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
785
786 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
787 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
788 /* Configure L2C HW request time out feature if supported */
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600789 if (ctx->version > 1)
Loc Ho0d442932015-05-22 17:32:59 -0600790 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
791}
792
793static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
794 bool enable)
795{
796 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
797 int i;
798
799 /* Enable PMD error interrupt */
800 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
801 if (enable)
802 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
803 PMD0_MERR_MASK << ctx->pmd);
804 else
805 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
806 PMD0_MERR_MASK << ctx->pmd);
807 }
808
809 if (enable) {
810 xgene_edac_pmd_hw_cfg(edac_dev);
811
812 /* Two CPUs per a PMD */
813 for (i = 0; i < MAX_CPU_PER_PMD; i++)
814 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
815 }
816}
817
818static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
819 const char __user *data,
820 size_t count, loff_t *ppos)
821{
822 struct edac_device_ctl_info *edac_dev = file->private_data;
823 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
824 void __iomem *cpux_pg_f;
825 int i;
826
827 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
828 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
829 CPU_MEMERR_CPU_PAGE;
830
831 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
832 MEMERR_CPU_ICFESR_CERR_MASK,
833 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
834 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
835 MEMERR_CPU_LSUESR_CERR_MASK,
836 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
837 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
838 MEMERR_CPU_MMUESR_CERR_MASK,
839 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
840 }
841 return count;
842}
843
844static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
845 const char __user *data,
846 size_t count, loff_t *ppos)
847{
848 struct edac_device_ctl_info *edac_dev = file->private_data;
849 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
850 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
851
852 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
853 MEMERR_L2C_L2ESR_MULTICERR_MASK |
854 MEMERR_L2C_L2ESR_UCERR_MASK |
855 MEMERR_L2C_L2ESR_ERR_MASK,
856 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
857 return count;
858}
859
860static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
861 {
862 .open = simple_open,
863 .write = xgene_edac_pmd_l1_inject_ctrl_write,
864 .llseek = generic_file_llseek, },
865 {
866 .open = simple_open,
867 .write = xgene_edac_pmd_l2_inject_ctrl_write,
868 .llseek = generic_file_llseek, },
869 { }
870};
871
Loc Ho93474732015-09-23 17:40:59 -0700872static void
873xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
Loc Ho0d442932015-05-22 17:32:59 -0600874{
875 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200876 struct dentry *dbgfs_dir;
Loc Ho93474732015-09-23 17:40:59 -0700877 char name[10];
Loc Ho0d442932015-05-22 17:32:59 -0600878
Loc Ho93474732015-09-23 17:40:59 -0700879 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
Loc Ho0d442932015-05-22 17:32:59 -0600880 return;
881
Loc Ho0d442932015-05-22 17:32:59 -0600882 sprintf(name, "PMD%d", ctx->pmd);
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200883 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
884 if (!dbgfs_dir)
Loc Ho0d442932015-05-22 17:32:59 -0600885 return;
886
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200887 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
888 &xgene_edac_pmd_debug_inject_fops[0]);
889 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
890 &xgene_edac_pmd_debug_inject_fops[1]);
Loc Ho0d442932015-05-22 17:32:59 -0600891}
892
893static int xgene_edac_pmd_available(u32 efuse, int pmd)
894{
895 return (efuse & (1 << pmd)) ? 0 : 1;
896}
897
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600898static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
899 int version)
Loc Ho0d442932015-05-22 17:32:59 -0600900{
901 struct edac_device_ctl_info *edac_dev;
902 struct xgene_edac_pmd_ctx *ctx;
903 struct resource res;
904 char edac_name[10];
905 u32 pmd;
906 int rc;
907 u32 val;
908
909 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
910 return -ENOMEM;
911
912 /* Determine if this PMD is disabled */
913 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
914 dev_err(edac->dev, "no pmd-controller property\n");
915 rc = -ENODEV;
916 goto err_group;
917 }
918 rc = regmap_read(edac->efuse_map, 0, &val);
919 if (rc)
920 goto err_group;
921 if (!xgene_edac_pmd_available(val, pmd)) {
922 rc = -ENODEV;
923 goto err_group;
924 }
925
926 sprintf(edac_name, "l2c%d", pmd);
927 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
928 edac_name, 1, "l2c", 1, 2, NULL,
929 0, edac_device_alloc_index());
930 if (!edac_dev) {
931 rc = -ENOMEM;
932 goto err_group;
933 }
934
935 ctx = edac_dev->pvt_info;
936 ctx->name = "xgene_pmd_err";
937 ctx->pmd = pmd;
938 ctx->edac = edac;
939 ctx->edac_dev = edac_dev;
940 ctx->ddev = *edac->dev;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600941 ctx->version = version;
Loc Ho0d442932015-05-22 17:32:59 -0600942 edac_dev->dev = &ctx->ddev;
943 edac_dev->ctl_name = ctx->name;
944 edac_dev->dev_name = ctx->name;
945 edac_dev->mod_name = EDAC_MOD_STR;
946
947 rc = of_address_to_resource(np, 0, &res);
948 if (rc < 0) {
949 dev_err(edac->dev, "no PMD resource address\n");
950 goto err_free;
951 }
952 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
953 if (IS_ERR(ctx->pmd_csr)) {
954 dev_err(edac->dev,
955 "devm_ioremap_resource failed for PMD resource address\n");
956 rc = PTR_ERR(ctx->pmd_csr);
957 goto err_free;
958 }
959
960 if (edac_op_state == EDAC_OPSTATE_POLL)
961 edac_dev->edac_check = xgene_edac_pmd_check;
962
963 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
964
965 rc = edac_device_add_device(edac_dev);
966 if (rc > 0) {
967 dev_err(edac->dev, "edac_device_add_device failed\n");
968 rc = -ENOMEM;
969 goto err_free;
970 }
971
972 if (edac_op_state == EDAC_OPSTATE_INT)
973 edac_dev->op_state = OP_RUNNING_INTERRUPT;
974
975 list_add(&ctx->next, &edac->pmds);
976
977 xgene_edac_pmd_hw_ctl(edac_dev, 1);
978
979 devres_remove_group(edac->dev, xgene_edac_pmd_add);
980
981 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
982 return 0;
983
984err_free:
985 edac_device_free_ctl_info(edac_dev);
986err_group:
987 devres_release_group(edac->dev, xgene_edac_pmd_add);
988 return rc;
989}
990
991static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
992{
993 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
994
995 xgene_edac_pmd_hw_ctl(edac_dev, 0);
996 edac_device_del_device(edac_dev->dev);
997 edac_device_free_ctl_info(edac_dev);
998 return 0;
999}
1000
Loc Ho93474732015-09-23 17:40:59 -07001001/* L3 Error device */
1002#define L3C_ESR (0x0A * 4)
1003#define L3C_ESR_DATATAG_MASK BIT(9)
1004#define L3C_ESR_MULTIHIT_MASK BIT(8)
1005#define L3C_ESR_UCEVICT_MASK BIT(6)
1006#define L3C_ESR_MULTIUCERR_MASK BIT(5)
1007#define L3C_ESR_MULTICERR_MASK BIT(4)
1008#define L3C_ESR_UCERR_MASK BIT(3)
1009#define L3C_ESR_CERR_MASK BIT(2)
1010#define L3C_ESR_UCERRINTR_MASK BIT(1)
1011#define L3C_ESR_CERRINTR_MASK BIT(0)
1012#define L3C_ECR (0x0B * 4)
1013#define L3C_ECR_UCINTREN BIT(3)
1014#define L3C_ECR_CINTREN BIT(2)
1015#define L3C_UCERREN BIT(1)
1016#define L3C_CERREN BIT(0)
1017#define L3C_ELR (0x0C * 4)
1018#define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1019#define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1020#define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1021#define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1022#define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1023#define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1024#define L3C_AELR (0x0D * 4)
1025#define L3C_BELR (0x0E * 4)
1026#define L3C_BELR_BANK(src) (src & 0x0000000F)
1027
1028struct xgene_edac_dev_ctx {
1029 struct list_head next;
1030 struct device ddev;
1031 char *name;
1032 struct xgene_edac *edac;
1033 struct edac_device_ctl_info *edac_dev;
1034 int edac_idx;
1035 void __iomem *dev_csr;
1036 int version;
1037};
1038
1039/*
1040 * Version 1 of the L3 controller has broken single bit correctable logic for
1041 * certain error syndromes. Log them as uncorrectable in that case.
1042 */
1043static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1044{
1045 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1046 switch (L3C_ELR_ERRSYN(l3celr)) {
1047 case 0x13C:
1048 case 0x0B4:
1049 case 0x007:
1050 case 0x00D:
1051 case 0x00E:
1052 case 0x019:
1053 case 0x01A:
1054 case 0x01C:
1055 case 0x04E:
1056 case 0x041:
1057 return true;
1058 }
1059 } else if (L3C_ELR_ERRSYN(l3celr) == 9)
1060 return true;
1061
1062 return false;
1063}
1064
1065static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1066{
1067 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1068 u32 l3cesr;
1069 u32 l3celr;
1070 u32 l3caelr;
1071 u32 l3cbelr;
1072
1073 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1074 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1075 return;
1076
1077 if (l3cesr & L3C_ESR_UCERR_MASK)
1078 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1079 if (l3cesr & L3C_ESR_CERR_MASK)
1080 dev_warn(edac_dev->dev, "L3C correctable error\n");
1081
1082 l3celr = readl(ctx->dev_csr + L3C_ELR);
1083 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1084 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1085 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1086 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1087 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1088 dev_err(edac_dev->dev,
1089 "L3C dropped eviction of line with error\n");
1090 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1091 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1092 if (l3cesr & L3C_ESR_DATATAG_MASK)
1093 dev_err(edac_dev->dev,
1094 "L3C data error syndrome 0x%X group 0x%X\n",
1095 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1096 else
1097 dev_err(edac_dev->dev,
1098 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1099 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1100 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1101 /*
1102 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1103 * Address [37:6] in l3caelr. Lower 6 bits are zero.
1104 */
1105 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1106 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1107 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1108 dev_err(edac_dev->dev,
1109 "L3C error status register value 0x%X\n", l3cesr);
1110
1111 /* Clear L3C error interrupt */
1112 writel(0, ctx->dev_csr + L3C_ESR);
1113
1114 if (ctx->version <= 1 &&
1115 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1116 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1117 return;
1118 }
1119 if (l3cesr & L3C_ESR_CERR_MASK)
1120 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1121 if (l3cesr & L3C_ESR_UCERR_MASK)
1122 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1123}
1124
1125static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1126 bool enable)
1127{
1128 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1129 u32 val;
1130
1131 val = readl(ctx->dev_csr + L3C_ECR);
1132 val |= L3C_UCERREN | L3C_CERREN;
1133 /* On disable, we just disable interrupt but keep error enabled */
1134 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1135 if (enable)
1136 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1137 else
1138 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1139 }
1140 writel(val, ctx->dev_csr + L3C_ECR);
1141
1142 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1143 /* Enable/disable L3 error top level interrupt */
1144 if (enable) {
1145 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1146 L3C_UNCORR_ERR_MASK);
1147 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1148 L3C_CORR_ERR_MASK);
1149 } else {
1150 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1151 L3C_UNCORR_ERR_MASK);
1152 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1153 L3C_CORR_ERR_MASK);
1154 }
1155 }
1156}
1157
1158static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1159 const char __user *data,
1160 size_t count, loff_t *ppos)
1161{
1162 struct edac_device_ctl_info *edac_dev = file->private_data;
1163 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1164
1165 /* Generate all errors */
1166 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1167 return count;
1168}
1169
1170static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1171 .open = simple_open,
1172 .write = xgene_edac_l3_inject_ctrl_write,
1173 .llseek = generic_file_llseek
1174};
1175
1176static void
1177xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1178{
1179 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1180 struct dentry *dbgfs_dir;
1181 char name[10];
1182
1183 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1184 return;
1185
1186 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1187 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1188 if (!dbgfs_dir)
1189 return;
1190
1191 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1192 &xgene_edac_l3_debug_inject_fops);
1193}
1194
1195static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1196 int version)
1197{
1198 struct edac_device_ctl_info *edac_dev;
1199 struct xgene_edac_dev_ctx *ctx;
1200 struct resource res;
1201 void __iomem *dev_csr;
1202 int edac_idx;
1203 int rc = 0;
1204
1205 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1206 return -ENOMEM;
1207
1208 rc = of_address_to_resource(np, 0, &res);
1209 if (rc < 0) {
1210 dev_err(edac->dev, "no L3 resource address\n");
1211 goto err_release_group;
1212 }
1213 dev_csr = devm_ioremap_resource(edac->dev, &res);
1214 if (IS_ERR(dev_csr)) {
1215 dev_err(edac->dev,
1216 "devm_ioremap_resource failed for L3 resource address\n");
1217 rc = PTR_ERR(dev_csr);
1218 goto err_release_group;
1219 }
1220
1221 edac_idx = edac_device_alloc_index();
1222 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1223 "l3c", 1, "l3c", 1, 0, NULL, 0,
1224 edac_idx);
1225 if (!edac_dev) {
1226 rc = -ENOMEM;
1227 goto err_release_group;
1228 }
1229
1230 ctx = edac_dev->pvt_info;
1231 ctx->dev_csr = dev_csr;
1232 ctx->name = "xgene_l3_err";
1233 ctx->edac_idx = edac_idx;
1234 ctx->edac = edac;
1235 ctx->edac_dev = edac_dev;
1236 ctx->ddev = *edac->dev;
1237 ctx->version = version;
1238 edac_dev->dev = &ctx->ddev;
1239 edac_dev->ctl_name = ctx->name;
1240 edac_dev->dev_name = ctx->name;
1241 edac_dev->mod_name = EDAC_MOD_STR;
1242
1243 if (edac_op_state == EDAC_OPSTATE_POLL)
1244 edac_dev->edac_check = xgene_edac_l3_check;
1245
1246 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1247
1248 rc = edac_device_add_device(edac_dev);
1249 if (rc > 0) {
1250 dev_err(edac->dev, "failed edac_device_add_device()\n");
1251 rc = -ENOMEM;
1252 goto err_ctl_free;
1253 }
1254
1255 if (edac_op_state == EDAC_OPSTATE_INT)
1256 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1257
1258 list_add(&ctx->next, &edac->l3s);
1259
1260 xgene_edac_l3_hw_init(edac_dev, 1);
1261
1262 devres_remove_group(edac->dev, xgene_edac_l3_add);
1263
1264 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1265 return 0;
1266
1267err_ctl_free:
1268 edac_device_free_ctl_info(edac_dev);
1269err_release_group:
1270 devres_release_group(edac->dev, xgene_edac_l3_add);
1271 return rc;
1272}
1273
1274static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1275{
1276 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1277
1278 xgene_edac_l3_hw_init(edac_dev, 0);
1279 edac_device_del_device(l3->edac->dev);
1280 edac_device_free_ctl_info(edac_dev);
1281 return 0;
1282}
1283
Loc Ho0d442932015-05-22 17:32:59 -06001284static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1285{
1286 struct xgene_edac *ctx = dev_id;
1287 struct xgene_edac_pmd_ctx *pmd;
Loc Ho93474732015-09-23 17:40:59 -07001288 struct xgene_edac_dev_ctx *node;
Loc Ho0d442932015-05-22 17:32:59 -06001289 unsigned int pcp_hp_stat;
1290 unsigned int pcp_lp_stat;
1291
1292 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1293 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1294 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1295 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1296 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1297 struct xgene_edac_mc_ctx *mcu;
1298
Loc Ho93474732015-09-23 17:40:59 -07001299 list_for_each_entry(mcu, &ctx->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001300 xgene_edac_mc_check(mcu->mci);
Loc Ho0d442932015-05-22 17:32:59 -06001301 }
1302
1303 list_for_each_entry(pmd, &ctx->pmds, next) {
1304 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1305 xgene_edac_pmd_check(pmd->edac_dev);
1306 }
1307
Loc Ho93474732015-09-23 17:40:59 -07001308 list_for_each_entry(node, &ctx->l3s, next)
1309 xgene_edac_l3_check(node->edac_dev);
1310
Loc Ho0d442932015-05-22 17:32:59 -06001311 return IRQ_HANDLED;
1312}
1313
1314static int xgene_edac_probe(struct platform_device *pdev)
1315{
1316 struct xgene_edac *edac;
1317 struct device_node *child;
1318 struct resource *res;
1319 int rc;
1320
1321 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1322 if (!edac)
1323 return -ENOMEM;
1324
1325 edac->dev = &pdev->dev;
1326 platform_set_drvdata(pdev, edac);
1327 INIT_LIST_HEAD(&edac->mcus);
1328 INIT_LIST_HEAD(&edac->pmds);
Loc Ho93474732015-09-23 17:40:59 -07001329 INIT_LIST_HEAD(&edac->l3s);
Loc Ho0d442932015-05-22 17:32:59 -06001330 spin_lock_init(&edac->lock);
1331 mutex_init(&edac->mc_lock);
1332
1333 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1334 "regmap-csw");
1335 if (IS_ERR(edac->csw_map)) {
1336 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1337 rc = PTR_ERR(edac->csw_map);
1338 goto out_err;
1339 }
1340
1341 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1342 "regmap-mcba");
1343 if (IS_ERR(edac->mcba_map)) {
1344 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1345 rc = PTR_ERR(edac->mcba_map);
1346 goto out_err;
1347 }
1348
1349 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1350 "regmap-mcbb");
1351 if (IS_ERR(edac->mcbb_map)) {
1352 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1353 rc = PTR_ERR(edac->mcbb_map);
1354 goto out_err;
1355 }
1356 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1357 "regmap-efuse");
1358 if (IS_ERR(edac->efuse_map)) {
1359 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1360 rc = PTR_ERR(edac->efuse_map);
1361 goto out_err;
1362 }
1363
1364 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1365 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1366 if (IS_ERR(edac->pcp_csr)) {
1367 dev_err(&pdev->dev, "no PCP resource address\n");
1368 rc = PTR_ERR(edac->pcp_csr);
1369 goto out_err;
1370 }
1371
1372 if (edac_op_state == EDAC_OPSTATE_INT) {
1373 int irq;
1374 int i;
1375
1376 for (i = 0; i < 3; i++) {
1377 irq = platform_get_irq(pdev, i);
1378 if (irq < 0) {
1379 dev_err(&pdev->dev, "No IRQ resource\n");
1380 rc = -EINVAL;
1381 goto out_err;
1382 }
1383 rc = devm_request_irq(&pdev->dev, irq,
1384 xgene_edac_isr, IRQF_SHARED,
1385 dev_name(&pdev->dev), edac);
1386 if (rc) {
1387 dev_err(&pdev->dev,
1388 "Could not request IRQ %d\n", irq);
1389 goto out_err;
1390 }
1391 }
1392 }
1393
Loc Ho93474732015-09-23 17:40:59 -07001394 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1395
Loc Ho0d442932015-05-22 17:32:59 -06001396 for_each_child_of_node(pdev->dev.of_node, child) {
1397 if (!of_device_is_available(child))
1398 continue;
1399 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1400 xgene_edac_mc_add(edac, child);
1401 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
Arnd Bergmann451bb7f2015-06-01 16:09:35 -06001402 xgene_edac_pmd_add(edac, child, 1);
1403 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1404 xgene_edac_pmd_add(edac, child, 2);
Loc Ho93474732015-09-23 17:40:59 -07001405 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1406 xgene_edac_l3_add(edac, child, 1);
1407 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1408 xgene_edac_l3_add(edac, child, 2);
Loc Ho0d442932015-05-22 17:32:59 -06001409 }
1410
1411 return 0;
1412
1413out_err:
1414 return rc;
1415}
1416
1417static int xgene_edac_remove(struct platform_device *pdev)
1418{
1419 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1420 struct xgene_edac_mc_ctx *mcu;
1421 struct xgene_edac_mc_ctx *temp_mcu;
1422 struct xgene_edac_pmd_ctx *pmd;
1423 struct xgene_edac_pmd_ctx *temp_pmd;
Loc Ho93474732015-09-23 17:40:59 -07001424 struct xgene_edac_dev_ctx *node;
1425 struct xgene_edac_dev_ctx *temp_node;
Loc Ho0d442932015-05-22 17:32:59 -06001426
Loc Ho93474732015-09-23 17:40:59 -07001427 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001428 xgene_edac_mc_remove(mcu);
Loc Ho0d442932015-05-22 17:32:59 -06001429
Loc Ho93474732015-09-23 17:40:59 -07001430 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
Loc Ho0d442932015-05-22 17:32:59 -06001431 xgene_edac_pmd_remove(pmd);
Loc Ho93474732015-09-23 17:40:59 -07001432
1433 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1434 xgene_edac_l3_remove(node);
1435
Loc Ho0d442932015-05-22 17:32:59 -06001436 return 0;
1437}
1438
1439static const struct of_device_id xgene_edac_of_match[] = {
1440 { .compatible = "apm,xgene-edac" },
1441 {},
1442};
1443MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1444
1445static struct platform_driver xgene_edac_driver = {
1446 .probe = xgene_edac_probe,
1447 .remove = xgene_edac_remove,
1448 .driver = {
1449 .name = "xgene-edac",
Loc Ho0d442932015-05-22 17:32:59 -06001450 .of_match_table = xgene_edac_of_match,
1451 },
1452};
1453
1454static int __init xgene_edac_init(void)
1455{
1456 int rc;
1457
1458 /* Make sure error reporting method is sane */
1459 switch (edac_op_state) {
1460 case EDAC_OPSTATE_POLL:
1461 case EDAC_OPSTATE_INT:
1462 break;
1463 default:
1464 edac_op_state = EDAC_OPSTATE_INT;
1465 break;
1466 }
1467
1468 rc = platform_driver_register(&xgene_edac_driver);
1469 if (rc) {
1470 edac_printk(KERN_ERR, EDAC_MOD_STR,
1471 "EDAC fails to register\n");
1472 goto reg_failed;
1473 }
1474
1475 return 0;
1476
1477reg_failed:
1478 return rc;
1479}
1480module_init(xgene_edac_init);
1481
1482static void __exit xgene_edac_exit(void)
1483{
1484 platform_driver_unregister(&xgene_edac_driver);
1485}
1486module_exit(xgene_edac_exit);
1487
1488MODULE_LICENSE("GPL");
1489MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
1490MODULE_DESCRIPTION("APM X-Gene EDAC driver");
1491module_param(edac_op_state, int, 0444);
1492MODULE_PARM_DESC(edac_op_state,
1493 "EDAC error reporting state: 0=Poll, 2=Interrupt");