blob: 3f8c890d79c67b040c59577e279f0146739d68fe [file] [log] [blame]
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor family Sandy Bridge.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License version 2 only.
8 *
9 * Copyright (c) 2011 by:
10 * Mauro Carvalho Chehab <mchehab@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/edac.h>
20#include <linux/mmzone.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020021#include <linux/smp.h>
22#include <linux/bitmap.h>
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -030023#include <linux/math64.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020024#include <asm/processor.h>
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -020025#include <asm/mce.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020026
27#include "edac_core.h"
28
29/* Static vars */
30static LIST_HEAD(sbridge_edac_list);
31static DEFINE_MUTEX(sbridge_edac_lock);
32static int probed;
33
34/*
35 * Alter this version for the module when modifications are made
36 */
37#define SBRIDGE_REVISION " Ver: 1.0.0 "
38#define EDAC_MOD_STR "sbridge_edac"
39
40/*
41 * Debug macros
42 */
43#define sbridge_printk(level, fmt, arg...) \
44 edac_printk(level, "sbridge", fmt, ##arg)
45
46#define sbridge_mc_printk(mci, level, fmt, arg...) \
47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
48
49/*
50 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
51 */
52#define GET_BITFIELD(v, lo, hi) \
53 (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
54
55/*
56 * sbridge Memory Controller Registers
57 */
58
59/*
60 * FIXME: For now, let's order by device function, as it makes
David Mackey15ed1032012-04-17 11:30:52 -070061 * easier for driver's development process. This table should be
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020062 * moved to pci_id.h when submitted upstream
63 */
64#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
65#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
66#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
67#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
68#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
69#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
70#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
71#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
72#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
73#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
74#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
75
76 /*
77 * Currently, unused, but will be needed in the future
78 * implementations, as they hold the error counters
79 */
80#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
81#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
82#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
83#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
84
85/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
86static const u32 dram_rule[] = {
87 0x80, 0x88, 0x90, 0x98, 0xa0,
88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
89};
90#define MAX_SAD ARRAY_SIZE(dram_rule)
91
92#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
93#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
94#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
95#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
96
97static char *get_dram_attr(u32 reg)
98{
99 switch(DRAM_ATTR(reg)) {
100 case 0:
101 return "DRAM";
102 case 1:
103 return "MMCFG";
104 case 2:
105 return "NXM";
106 default:
107 return "unknown";
108 }
109}
110
111static const u32 interleave_list[] = {
112 0x84, 0x8c, 0x94, 0x9c, 0xa4,
113 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
114};
115#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list)
116
117#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2)
118#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5)
119#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10)
120#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13)
121#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18)
122#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21)
123#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26)
124#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29)
125
126static inline int sad_pkg(u32 reg, int interleave)
127{
128 switch (interleave) {
129 case 0:
130 return SAD_PKG0(reg);
131 case 1:
132 return SAD_PKG1(reg);
133 case 2:
134 return SAD_PKG2(reg);
135 case 3:
136 return SAD_PKG3(reg);
137 case 4:
138 return SAD_PKG4(reg);
139 case 5:
140 return SAD_PKG5(reg);
141 case 6:
142 return SAD_PKG6(reg);
143 case 7:
144 return SAD_PKG7(reg);
145 default:
146 return -EINVAL;
147 }
148}
149
150/* Devices 12 Function 7 */
151
152#define TOLM 0x80
153#define TOHM 0x84
154
155#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
156#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
157
158/* Device 13 Function 6 */
159
160#define SAD_TARGET 0xf0
161
162#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
163
164#define SAD_CONTROL 0xf4
165
166#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
167
168/* Device 14 function 0 */
169
170static const u32 tad_dram_rule[] = {
171 0x40, 0x44, 0x48, 0x4c,
172 0x50, 0x54, 0x58, 0x5c,
173 0x60, 0x64, 0x68, 0x6c,
174};
175#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
176
177#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
178#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
179#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
180#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
181#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
182#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
183#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
184
185/* Device 15, function 0 */
186
187#define MCMTR 0x7c
188
189#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
190#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
191#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
192
193/* Device 15, function 1 */
194
195#define RASENABLES 0xac
196#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
197
198/* Device 15, functions 2-5 */
199
200static const int mtr_regs[] = {
201 0x80, 0x84, 0x88,
202};
203
204#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
205#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
206#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
207#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
208#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
209
210static const u32 tad_ch_nilv_offset[] = {
211 0x90, 0x94, 0x98, 0x9c,
212 0xa0, 0xa4, 0xa8, 0xac,
213 0xb0, 0xb4, 0xb8, 0xbc,
214};
215#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
216#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
217
218static const u32 rir_way_limit[] = {
219 0x108, 0x10c, 0x110, 0x114, 0x118,
220};
221#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
222
223#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
224#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
225#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
226
227#define MAX_RIR_WAY 8
228
229static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
230 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
231 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
232 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
233 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
234 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
235};
236
237#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
238#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
239
240/* Device 16, functions 2-7 */
241
242/*
243 * FIXME: Implement the error count reads directly
244 */
245
246static const u32 correrrcnt[] = {
247 0x104, 0x108, 0x10c, 0x110,
248};
249
250#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
251#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
252#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
253#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
254
255static const u32 correrrthrsld[] = {
256 0x11c, 0x120, 0x124, 0x128,
257};
258
259#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
260#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
261
262
263/* Device 17, function 0 */
264
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300265#define SB_RANK_CFG_A 0x0328
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200266
267#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
268
269/*
270 * sbridge structs
271 */
272
273#define NUM_CHANNELS 4
274#define MAX_DIMMS 3 /* Max DIMMS per channel */
275
276struct sbridge_info {
277 u32 mcmtr;
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300278 u32 rankcfgr;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200279};
280
281struct sbridge_channel {
282 u32 ranks;
283 u32 dimms;
284};
285
286struct pci_id_descr {
287 int dev;
288 int func;
289 int dev_id;
290 int optional;
291};
292
293struct pci_id_table {
294 const struct pci_id_descr *descr;
295 int n_devs;
296};
297
298struct sbridge_dev {
299 struct list_head list;
300 u8 bus, mc;
301 u8 node_id, source_id;
302 struct pci_dev **pdev;
303 int n_devs;
304 struct mem_ctl_info *mci;
305};
306
307struct sbridge_pvt {
308 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
309 struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0;
310 struct pci_dev *pci_br;
311 struct pci_dev *pci_tad[NUM_CHANNELS];
312
313 struct sbridge_dev *sbridge_dev;
314
315 struct sbridge_info info;
316 struct sbridge_channel channel[NUM_CHANNELS];
317
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200318 /* Memory type detection */
319 bool is_mirrored, is_lockstep, is_close_pg;
320
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200321 /* Fifo double buffers */
322 struct mce mce_entry[MCE_LOG_LEN];
323 struct mce mce_outentry[MCE_LOG_LEN];
324
325 /* Fifo in/out counters */
326 unsigned mce_in, mce_out;
327
328 /* Count indicator to show errors not got */
329 unsigned mce_overrun;
330
331 /* Memory description */
332 u64 tolm, tohm;
333};
334
Luck, Tonyde4772c2013-03-28 09:59:15 -0700335#define PCI_DESCR(device, function, device_id, opt) \
336 .dev = (device), \
337 .func = (function), \
338 .dev_id = (device_id), \
339 .optional = opt
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200340
341static const struct pci_id_descr pci_dev_descr_sbridge[] = {
342 /* Processor Home Agent */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700343 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200344
345 /* Memory controller */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700346 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
347 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
348 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
349 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
350 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
351 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
352 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200353
354 /* System Address Decoder */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700355 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
356 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200357
358 /* Broadcast Registers */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700359 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200360};
361
362#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
363static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
364 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
365 {0,} /* 0 terminated list. */
366};
367
368/*
369 * pci_device_id table for which devices we are looking for
370 */
Lionel Debroux36c46f32012-02-27 07:41:47 +0100371static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200372 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
373 {0,} /* 0 terminated list. */
374};
375
376
377/****************************************************************************
David Mackey15ed1032012-04-17 11:30:52 -0700378 Ancillary status routines
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200379 ****************************************************************************/
380
381static inline int numrank(u32 mtr)
382{
383 int ranks = (1 << RANK_CNT_BITS(mtr));
384
385 if (ranks > 4) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300386 edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
387 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200388 return -EINVAL;
389 }
390
391 return ranks;
392}
393
394static inline int numrow(u32 mtr)
395{
396 int rows = (RANK_WIDTH_BITS(mtr) + 12);
397
398 if (rows < 13 || rows > 18) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300399 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
400 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200401 return -EINVAL;
402 }
403
404 return 1 << rows;
405}
406
407static inline int numcol(u32 mtr)
408{
409 int cols = (COL_WIDTH_BITS(mtr) + 10);
410
411 if (cols > 12) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300412 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
413 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200414 return -EINVAL;
415 }
416
417 return 1 << cols;
418}
419
420static struct sbridge_dev *get_sbridge_dev(u8 bus)
421{
422 struct sbridge_dev *sbridge_dev;
423
424 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
425 if (sbridge_dev->bus == bus)
426 return sbridge_dev;
427 }
428
429 return NULL;
430}
431
432static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
433 const struct pci_id_table *table)
434{
435 struct sbridge_dev *sbridge_dev;
436
437 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
438 if (!sbridge_dev)
439 return NULL;
440
441 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
442 GFP_KERNEL);
443 if (!sbridge_dev->pdev) {
444 kfree(sbridge_dev);
445 return NULL;
446 }
447
448 sbridge_dev->bus = bus;
449 sbridge_dev->n_devs = table->n_devs;
450 list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
451
452 return sbridge_dev;
453}
454
455static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
456{
457 list_del(&sbridge_dev->list);
458 kfree(sbridge_dev->pdev);
459 kfree(sbridge_dev);
460}
461
462/****************************************************************************
463 Memory check routines
464 ****************************************************************************/
465static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
466 unsigned func)
467{
468 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
469 int i;
470
471 if (!sbridge_dev)
472 return NULL;
473
474 for (i = 0; i < sbridge_dev->n_devs; i++) {
475 if (!sbridge_dev->pdev[i])
476 continue;
477
478 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
479 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300480 edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
481 bus, slot, func, sbridge_dev->pdev[i]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200482 return sbridge_dev->pdev[i];
483 }
484 }
485
486 return NULL;
487}
488
489/**
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300490 * check_if_ecc_is_active() - Checks if ECC is active
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200491 * bus: Device bus
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200492 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300493static int check_if_ecc_is_active(const u8 bus)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200494{
495 struct pci_dev *pdev = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200496 u32 mcmtr;
497
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200498 pdev = get_pdev_slot_func(bus, 15, 0);
499 if (!pdev) {
500 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
501 "%2x.%02d.%d!!!\n",
502 bus, 15, 0);
503 return -ENODEV;
504 }
505
506 pci_read_config_dword(pdev, MCMTR, &mcmtr);
507 if (!IS_ECC_ENABLED(mcmtr)) {
508 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
509 return -ENODEV;
510 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200511 return 0;
512}
513
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300514static int get_dimm_config(struct mem_ctl_info *mci)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200515{
516 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300517 struct dimm_info *dimm;
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300518 unsigned i, j, banks, ranks, rows, cols, npages;
519 u64 size;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200520 u32 reg;
521 enum edac_type mode;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200522 enum mem_type mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200523
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300524 pvt->info.rankcfgr = SB_RANK_CFG_A;
525
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200526 pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
527 pvt->sbridge_dev->source_id = SOURCE_ID(reg);
528
529 pci_read_config_dword(pvt->pci_br, SAD_CONTROL, &reg);
530 pvt->sbridge_dev->node_id = NODE_ID(reg);
Joe Perches956b9ba2012-04-29 17:08:39 -0300531 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
532 pvt->sbridge_dev->mc,
533 pvt->sbridge_dev->node_id,
534 pvt->sbridge_dev->source_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200535
536 pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
537 if (IS_MIRROR_ENABLED(reg)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300538 edac_dbg(0, "Memory mirror is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200539 pvt->is_mirrored = true;
540 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300541 edac_dbg(0, "Memory mirror is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200542 pvt->is_mirrored = false;
543 }
544
545 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
546 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300547 edac_dbg(0, "Lockstep is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200548 mode = EDAC_S8ECD8ED;
549 pvt->is_lockstep = true;
550 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300551 edac_dbg(0, "Lockstep is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200552 mode = EDAC_S4ECD4ED;
553 pvt->is_lockstep = false;
554 }
555 if (IS_CLOSE_PG(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300556 edac_dbg(0, "address map is on closed page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200557 pvt->is_close_pg = true;
558 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300559 edac_dbg(0, "address map is on open page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200560 pvt->is_close_pg = false;
561 }
562
Luck, Tonyde4772c2013-03-28 09:59:15 -0700563 if (pvt->pci_ddrio) {
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300564 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
565 &reg);
Luck, Tonyde4772c2013-03-28 09:59:15 -0700566 if (IS_RDIMM_ENABLED(reg)) {
567 /* FIXME: Can also be LRDIMM */
568 edac_dbg(0, "Memory is registered\n");
569 mtype = MEM_RDDR3;
570 } else {
571 edac_dbg(0, "Memory is unregistered\n");
572 mtype = MEM_DDR3;
573 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200574 } else {
Luck, Tonyde4772c2013-03-28 09:59:15 -0700575 edac_dbg(0, "Cannot determine memory type\n");
576 mtype = MEM_UNKNOWN;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200577 }
578
579 /* On all supported DDR3 DIMM types, there are 8 banks available */
580 banks = 8;
581
582 for (i = 0; i < NUM_CHANNELS; i++) {
583 u32 mtr;
584
585 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300586 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
587 i, j, 0);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200588 pci_read_config_dword(pvt->pci_tad[i],
589 mtr_regs[j], &mtr);
Joe Perches956b9ba2012-04-29 17:08:39 -0300590 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200591 if (IS_DIMM_PRESENT(mtr)) {
592 pvt->channel[i].dimms++;
593
594 ranks = numrank(mtr);
595 rows = numrow(mtr);
596 cols = numcol(mtr);
597
598 /* DDR3 has 8 I/O banks */
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300599 size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200600 npages = MiB_TO_PAGES(size);
601
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300602 edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
Joe Perches956b9ba2012-04-29 17:08:39 -0300603 pvt->sbridge_dev->mc, i, j,
604 size, npages,
605 banks, ranks, rows, cols);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200606
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300607 dimm->nr_pages = npages;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300608 dimm->grain = 32;
609 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
610 dimm->mtype = mtype;
611 dimm->edac_mode = mode;
612 snprintf(dimm->label, sizeof(dimm->label),
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200613 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
614 pvt->sbridge_dev->source_id, i, j);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200615 }
616 }
617 }
618
619 return 0;
620}
621
622static void get_memory_layout(const struct mem_ctl_info *mci)
623{
624 struct sbridge_pvt *pvt = mci->pvt_info;
625 int i, j, k, n_sads, n_tads, sad_interl;
626 u32 reg;
627 u64 limit, prv = 0;
628 u64 tmp_mb;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300629 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200630 u32 rir_way;
631
632 /*
633 * Step 1) Get TOLM/TOHM ranges
634 */
635
636 /* Address range is 32:28 */
637 pci_read_config_dword(pvt->pci_sad1, TOLM,
638 &reg);
639 pvt->tolm = GET_TOLM(reg);
640 tmp_mb = (1 + pvt->tolm) >> 20;
641
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300642 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300643 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200644
645 /* Address range is already 45:25 */
646 pci_read_config_dword(pvt->pci_sad1, TOHM,
647 &reg);
648 pvt->tohm = GET_TOHM(reg);
649 tmp_mb = (1 + pvt->tohm) >> 20;
650
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300651 mb = div_u64_rem(tmp_mb, 1000, &kb);
Mauro Carvalho Chehabda14d932012-10-25 09:07:21 -0200652 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200653
654 /*
655 * Step 2) Get SAD range and SAD Interleave list
656 * TAD registers contain the interleave wayness. However, it
657 * seems simpler to just discover it indirectly, with the
658 * algorithm bellow.
659 */
660 prv = 0;
661 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
662 /* SAD_LIMIT Address range is 45:26 */
663 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
664 &reg);
665 limit = SAD_LIMIT(reg);
666
667 if (!DRAM_RULE_ENABLE(reg))
668 continue;
669
670 if (limit <= prv)
671 break;
672
673 tmp_mb = (limit + 1) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300674 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300675 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
676 n_sads,
677 get_dram_attr(reg),
678 mb, kb,
679 ((u64)tmp_mb) << 20L,
680 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
681 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200682 prv = limit;
683
684 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
685 &reg);
686 sad_interl = sad_pkg(reg, 0);
687 for (j = 0; j < 8; j++) {
688 if (j > 0 && sad_interl == sad_pkg(reg, j))
689 break;
690
Joe Perches956b9ba2012-04-29 17:08:39 -0300691 edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
692 n_sads, j, sad_pkg(reg, j));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200693 }
694 }
695
696 /*
697 * Step 3) Get TAD range
698 */
699 prv = 0;
700 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
701 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
702 &reg);
703 limit = TAD_LIMIT(reg);
704 if (limit <= prv)
705 break;
706 tmp_mb = (limit + 1) >> 20;
707
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300708 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300709 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
710 n_tads, mb, kb,
711 ((u64)tmp_mb) << 20L,
712 (u32)TAD_SOCK(reg),
713 (u32)TAD_CH(reg),
714 (u32)TAD_TGT0(reg),
715 (u32)TAD_TGT1(reg),
716 (u32)TAD_TGT2(reg),
717 (u32)TAD_TGT3(reg),
718 reg);
Hui Wang7fae0db2012-02-06 04:11:01 -0300719 prv = limit;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200720 }
721
722 /*
723 * Step 4) Get TAD offsets, per each channel
724 */
725 for (i = 0; i < NUM_CHANNELS; i++) {
726 if (!pvt->channel[i].dimms)
727 continue;
728 for (j = 0; j < n_tads; j++) {
729 pci_read_config_dword(pvt->pci_tad[i],
730 tad_ch_nilv_offset[j],
731 &reg);
732 tmp_mb = TAD_OFFSET(reg) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300733 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300734 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
735 i, j,
736 mb, kb,
737 ((u64)tmp_mb) << 20L,
738 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200739 }
740 }
741
742 /*
743 * Step 6) Get RIR Wayness/Limit, per each channel
744 */
745 for (i = 0; i < NUM_CHANNELS; i++) {
746 if (!pvt->channel[i].dimms)
747 continue;
748 for (j = 0; j < MAX_RIR_RANGES; j++) {
749 pci_read_config_dword(pvt->pci_tad[i],
750 rir_way_limit[j],
751 &reg);
752
753 if (!IS_RIR_VALID(reg))
754 continue;
755
756 tmp_mb = RIR_LIMIT(reg) >> 20;
757 rir_way = 1 << RIR_WAY(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300758 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300759 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
760 i, j,
761 mb, kb,
762 ((u64)tmp_mb) << 20L,
763 rir_way,
764 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200765
766 for (k = 0; k < rir_way; k++) {
767 pci_read_config_dword(pvt->pci_tad[i],
768 rir_offset[j][k],
769 &reg);
770 tmp_mb = RIR_OFFSET(reg) << 6;
771
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300772 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300773 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
774 i, j, k,
775 mb, kb,
776 ((u64)tmp_mb) << 20L,
777 (u32)RIR_RNK_TGT(reg),
778 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200779 }
780 }
781 }
782}
783
784struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
785{
786 struct sbridge_dev *sbridge_dev;
787
788 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
789 if (sbridge_dev->node_id == node_id)
790 return sbridge_dev->mci;
791 }
792 return NULL;
793}
794
795static int get_memory_error_data(struct mem_ctl_info *mci,
796 u64 addr,
797 u8 *socket,
798 long *channel_mask,
799 u8 *rank,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300800 char **area_type, char *msg)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200801{
802 struct mem_ctl_info *new_mci;
803 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200804 int n_rir, n_sads, n_tads, sad_way, sck_xch;
805 int sad_interl, idx, base_ch;
806 int interleave_mode;
807 unsigned sad_interleave[MAX_INTERLEAVE];
808 u32 reg;
809 u8 ch_way,sck_way;
810 u32 tad_offset;
811 u32 rir_way;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300812 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200813 u64 ch_addr, offset, limit, prv = 0;
814
815
816 /*
817 * Step 0) Check if the address is at special memory ranges
818 * The check bellow is probably enough to fill all cases where
819 * the error is not inside a memory, except for the legacy
820 * range (e. g. VGA addresses). It is unlikely, however, that the
821 * memory controller would generate an error on that range.
822 */
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300823 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200824 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200825 return -EINVAL;
826 }
827 if (addr >= (u64)pvt->tohm) {
828 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200829 return -EINVAL;
830 }
831
832 /*
833 * Step 1) Get socket
834 */
835 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
836 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
837 &reg);
838
839 if (!DRAM_RULE_ENABLE(reg))
840 continue;
841
842 limit = SAD_LIMIT(reg);
843 if (limit <= prv) {
844 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200845 return -EINVAL;
846 }
847 if (addr <= limit)
848 break;
849 prv = limit;
850 }
851 if (n_sads == MAX_SAD) {
852 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200853 return -EINVAL;
854 }
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300855 *area_type = get_dram_attr(reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200856 interleave_mode = INTERLEAVE_MODE(reg);
857
858 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
859 &reg);
860 sad_interl = sad_pkg(reg, 0);
861 for (sad_way = 0; sad_way < 8; sad_way++) {
862 if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
863 break;
864 sad_interleave[sad_way] = sad_pkg(reg, sad_way);
Joe Perches956b9ba2012-04-29 17:08:39 -0300865 edac_dbg(0, "SAD interleave #%d: %d\n",
866 sad_way, sad_interleave[sad_way]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200867 }
Joe Perches956b9ba2012-04-29 17:08:39 -0300868 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
869 pvt->sbridge_dev->mc,
870 n_sads,
871 addr,
872 limit,
873 sad_way + 7,
874 interleave_mode ? "" : "XOR[18:16]");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200875 if (interleave_mode)
876 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
877 else
878 idx = (addr >> 6) & 7;
879 switch (sad_way) {
880 case 1:
881 idx = 0;
882 break;
883 case 2:
884 idx = idx & 1;
885 break;
886 case 4:
887 idx = idx & 3;
888 break;
889 case 8:
890 break;
891 default:
892 sprintf(msg, "Can't discover socket interleave");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200893 return -EINVAL;
894 }
895 *socket = sad_interleave[idx];
Joe Perches956b9ba2012-04-29 17:08:39 -0300896 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
897 idx, sad_way, *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200898
899 /*
900 * Move to the proper node structure, in order to access the
901 * right PCI registers
902 */
903 new_mci = get_mci_for_node_id(*socket);
904 if (!new_mci) {
905 sprintf(msg, "Struct for socket #%u wasn't initialized",
906 *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200907 return -EINVAL;
908 }
909 mci = new_mci;
910 pvt = mci->pvt_info;
911
912 /*
913 * Step 2) Get memory channel
914 */
915 prv = 0;
916 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
917 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
918 &reg);
919 limit = TAD_LIMIT(reg);
920 if (limit <= prv) {
921 sprintf(msg, "Can't discover the memory channel");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200922 return -EINVAL;
923 }
924 if (addr <= limit)
925 break;
926 prv = limit;
927 }
928 ch_way = TAD_CH(reg) + 1;
929 sck_way = TAD_SOCK(reg) + 1;
930 /*
931 * FIXME: Is it right to always use channel 0 for offsets?
932 */
933 pci_read_config_dword(pvt->pci_tad[0],
934 tad_ch_nilv_offset[n_tads],
935 &tad_offset);
936
937 if (ch_way == 3)
938 idx = addr >> 6;
939 else
940 idx = addr >> (6 + sck_way);
941 idx = idx % ch_way;
942
943 /*
944 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
945 */
946 switch (idx) {
947 case 0:
948 base_ch = TAD_TGT0(reg);
949 break;
950 case 1:
951 base_ch = TAD_TGT1(reg);
952 break;
953 case 2:
954 base_ch = TAD_TGT2(reg);
955 break;
956 case 3:
957 base_ch = TAD_TGT3(reg);
958 break;
959 default:
960 sprintf(msg, "Can't discover the TAD target");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200961 return -EINVAL;
962 }
963 *channel_mask = 1 << base_ch;
964
965 if (pvt->is_mirrored) {
966 *channel_mask |= 1 << ((base_ch + 2) % 4);
967 switch(ch_way) {
968 case 2:
969 case 4:
970 sck_xch = 1 << sck_way * (ch_way >> 1);
971 break;
972 default:
973 sprintf(msg, "Invalid mirror set. Can't decode addr");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200974 return -EINVAL;
975 }
976 } else
977 sck_xch = (1 << sck_way) * ch_way;
978
979 if (pvt->is_lockstep)
980 *channel_mask |= 1 << ((base_ch + 1) % 4);
981
982 offset = TAD_OFFSET(tad_offset);
983
Joe Perches956b9ba2012-04-29 17:08:39 -0300984 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
985 n_tads,
986 addr,
987 limit,
988 (u32)TAD_SOCK(reg),
989 ch_way,
990 offset,
991 idx,
992 base_ch,
993 *channel_mask);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200994
995 /* Calculate channel address */
996 /* Remove the TAD offset */
997
998 if (offset > addr) {
999 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1000 offset, addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001001 return -EINVAL;
1002 }
1003 addr -= offset;
1004 /* Store the low bits [0:6] of the addr */
1005 ch_addr = addr & 0x7f;
1006 /* Remove socket wayness and remove 6 bits */
1007 addr >>= 6;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001008 addr = div_u64(addr, sck_xch);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001009#if 0
1010 /* Divide by channel way */
1011 addr = addr / ch_way;
1012#endif
1013 /* Recover the last 6 bits */
1014 ch_addr |= addr << 6;
1015
1016 /*
1017 * Step 3) Decode rank
1018 */
1019 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
1020 pci_read_config_dword(pvt->pci_tad[base_ch],
1021 rir_way_limit[n_rir],
1022 &reg);
1023
1024 if (!IS_RIR_VALID(reg))
1025 continue;
1026
1027 limit = RIR_LIMIT(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001028 mb = div_u64_rem(limit >> 20, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -03001029 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
1030 n_rir,
1031 mb, kb,
1032 limit,
1033 1 << RIR_WAY(reg));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001034 if (ch_addr <= limit)
1035 break;
1036 }
1037 if (n_rir == MAX_RIR_RANGES) {
1038 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1039 ch_addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001040 return -EINVAL;
1041 }
1042 rir_way = RIR_WAY(reg);
1043 if (pvt->is_close_pg)
1044 idx = (ch_addr >> 6);
1045 else
1046 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
1047 idx %= 1 << rir_way;
1048
1049 pci_read_config_dword(pvt->pci_tad[base_ch],
1050 rir_offset[n_rir][idx],
1051 &reg);
1052 *rank = RIR_RNK_TGT(reg);
1053
Joe Perches956b9ba2012-04-29 17:08:39 -03001054 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
1055 n_rir,
1056 ch_addr,
1057 limit,
1058 rir_way,
1059 idx);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001060
1061 return 0;
1062}
1063
1064/****************************************************************************
1065 Device initialization routines: put/get, init/exit
1066 ****************************************************************************/
1067
1068/*
1069 * sbridge_put_all_devices 'put' all the devices that we have
1070 * reserved via 'get'
1071 */
1072static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
1073{
1074 int i;
1075
Joe Perches956b9ba2012-04-29 17:08:39 -03001076 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001077 for (i = 0; i < sbridge_dev->n_devs; i++) {
1078 struct pci_dev *pdev = sbridge_dev->pdev[i];
1079 if (!pdev)
1080 continue;
Joe Perches956b9ba2012-04-29 17:08:39 -03001081 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1082 pdev->bus->number,
1083 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001084 pci_dev_put(pdev);
1085 }
1086}
1087
1088static void sbridge_put_all_devices(void)
1089{
1090 struct sbridge_dev *sbridge_dev, *tmp;
1091
1092 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
1093 sbridge_put_devices(sbridge_dev);
1094 free_sbridge_dev(sbridge_dev);
1095 }
1096}
1097
1098/*
1099 * sbridge_get_all_devices Find and perform 'get' operation on the MCH's
1100 * device/functions we want to reference for this driver
1101 *
1102 * Need to 'get' device 16 func 1 and func 2
1103 */
1104static int sbridge_get_onedevice(struct pci_dev **prev,
1105 u8 *num_mc,
1106 const struct pci_id_table *table,
1107 const unsigned devno)
1108{
1109 struct sbridge_dev *sbridge_dev;
1110 const struct pci_id_descr *dev_descr = &table->descr[devno];
1111
1112 struct pci_dev *pdev = NULL;
1113 u8 bus = 0;
1114
1115 sbridge_printk(KERN_INFO,
1116 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
1117 dev_descr->dev, dev_descr->func,
1118 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1119
1120 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1121 dev_descr->dev_id, *prev);
1122
1123 if (!pdev) {
1124 if (*prev) {
1125 *prev = pdev;
1126 return 0;
1127 }
1128
1129 if (dev_descr->optional)
1130 return 0;
1131
1132 if (devno == 0)
1133 return -ENODEV;
1134
1135 sbridge_printk(KERN_INFO,
1136 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1137 dev_descr->dev, dev_descr->func,
1138 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1139
1140 /* End of list, leave */
1141 return -ENODEV;
1142 }
1143 bus = pdev->bus->number;
1144
1145 sbridge_dev = get_sbridge_dev(bus);
1146 if (!sbridge_dev) {
1147 sbridge_dev = alloc_sbridge_dev(bus, table);
1148 if (!sbridge_dev) {
1149 pci_dev_put(pdev);
1150 return -ENOMEM;
1151 }
1152 (*num_mc)++;
1153 }
1154
1155 if (sbridge_dev->pdev[devno]) {
1156 sbridge_printk(KERN_ERR,
1157 "Duplicated device for "
1158 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1159 bus, dev_descr->dev, dev_descr->func,
1160 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1161 pci_dev_put(pdev);
1162 return -ENODEV;
1163 }
1164
1165 sbridge_dev->pdev[devno] = pdev;
1166
1167 /* Sanity check */
1168 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1169 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1170 sbridge_printk(KERN_ERR,
1171 "Device PCI ID %04x:%04x "
1172 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
1173 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1174 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1175 bus, dev_descr->dev, dev_descr->func);
1176 return -ENODEV;
1177 }
1178
1179 /* Be sure that the device is enabled */
1180 if (unlikely(pci_enable_device(pdev) < 0)) {
1181 sbridge_printk(KERN_ERR,
1182 "Couldn't enable "
1183 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1184 bus, dev_descr->dev, dev_descr->func,
1185 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1186 return -ENODEV;
1187 }
1188
Joe Perches956b9ba2012-04-29 17:08:39 -03001189 edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
1190 bus, dev_descr->dev, dev_descr->func,
1191 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001192
1193 /*
1194 * As stated on drivers/pci/search.c, the reference count for
1195 * @from is always decremented if it is not %NULL. So, as we need
1196 * to get all devices up to null, we need to do a get for the device
1197 */
1198 pci_dev_get(pdev);
1199
1200 *prev = pdev;
1201
1202 return 0;
1203}
1204
1205static int sbridge_get_all_devices(u8 *num_mc)
1206{
1207 int i, rc;
1208 struct pci_dev *pdev = NULL;
1209 const struct pci_id_table *table = pci_dev_descr_sbridge_table;
1210
1211 while (table && table->descr) {
1212 for (i = 0; i < table->n_devs; i++) {
1213 pdev = NULL;
1214 do {
1215 rc = sbridge_get_onedevice(&pdev, num_mc,
1216 table, i);
1217 if (rc < 0) {
1218 if (i == 0) {
1219 i = table->n_devs;
1220 break;
1221 }
1222 sbridge_put_all_devices();
1223 return -ENODEV;
1224 }
1225 } while (pdev);
1226 }
1227 table++;
1228 }
1229
1230 return 0;
1231}
1232
1233static int mci_bind_devs(struct mem_ctl_info *mci,
1234 struct sbridge_dev *sbridge_dev)
1235{
1236 struct sbridge_pvt *pvt = mci->pvt_info;
1237 struct pci_dev *pdev;
1238 int i, func, slot;
1239
1240 for (i = 0; i < sbridge_dev->n_devs; i++) {
1241 pdev = sbridge_dev->pdev[i];
1242 if (!pdev)
1243 continue;
1244 slot = PCI_SLOT(pdev->devfn);
1245 func = PCI_FUNC(pdev->devfn);
1246 switch (slot) {
1247 case 12:
1248 switch (func) {
1249 case 6:
1250 pvt->pci_sad0 = pdev;
1251 break;
1252 case 7:
1253 pvt->pci_sad1 = pdev;
1254 break;
1255 default:
1256 goto error;
1257 }
1258 break;
1259 case 13:
1260 switch (func) {
1261 case 6:
1262 pvt->pci_br = pdev;
1263 break;
1264 default:
1265 goto error;
1266 }
1267 break;
1268 case 14:
1269 switch (func) {
1270 case 0:
1271 pvt->pci_ha0 = pdev;
1272 break;
1273 default:
1274 goto error;
1275 }
1276 break;
1277 case 15:
1278 switch (func) {
1279 case 0:
1280 pvt->pci_ta = pdev;
1281 break;
1282 case 1:
1283 pvt->pci_ras = pdev;
1284 break;
1285 case 2:
1286 case 3:
1287 case 4:
1288 case 5:
1289 pvt->pci_tad[func - 2] = pdev;
1290 break;
1291 default:
1292 goto error;
1293 }
1294 break;
1295 case 17:
1296 switch (func) {
1297 case 0:
1298 pvt->pci_ddrio = pdev;
1299 break;
1300 default:
1301 goto error;
1302 }
1303 break;
1304 default:
1305 goto error;
1306 }
1307
Joe Perches956b9ba2012-04-29 17:08:39 -03001308 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
1309 sbridge_dev->bus,
1310 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1311 pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001312 }
1313
1314 /* Check if everything were registered */
1315 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
Luck, Tonyde4772c2013-03-28 09:59:15 -07001316 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001317 goto enodev;
1318
1319 for (i = 0; i < NUM_CHANNELS; i++) {
1320 if (!pvt->pci_tad[i])
1321 goto enodev;
1322 }
1323 return 0;
1324
1325enodev:
1326 sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
1327 return -ENODEV;
1328
1329error:
1330 sbridge_printk(KERN_ERR, "Device %d, function %d "
1331 "is out of the expected range\n",
1332 slot, func);
1333 return -EINVAL;
1334}
1335
1336/****************************************************************************
1337 Error check routines
1338 ****************************************************************************/
1339
1340/*
1341 * While Sandy Bridge has error count registers, SMI BIOS read values from
1342 * and resets the counters. So, they are not reliable for the OS to read
1343 * from them. So, we have no option but to just trust on whatever MCE is
1344 * telling us about the errors.
1345 */
1346static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1347 const struct mce *m)
1348{
1349 struct mem_ctl_info *new_mci;
1350 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001351 enum hw_event_mc_err_type tp_event;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001352 char *type, *optype, msg[256];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001353 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1354 bool overflow = GET_BITFIELD(m->status, 62, 62);
1355 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
1356 bool recoverable = GET_BITFIELD(m->status, 56, 56);
1357 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
1358 u32 mscod = GET_BITFIELD(m->status, 16, 31);
1359 u32 errcode = GET_BITFIELD(m->status, 0, 15);
1360 u32 channel = GET_BITFIELD(m->status, 0, 3);
1361 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1362 long channel_mask, first_channel;
1363 u8 rank, socket;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001364 int rc, dimm;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001365 char *area_type = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001366
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001367 if (uncorrected_error) {
1368 if (ripv) {
1369 type = "FATAL";
1370 tp_event = HW_EVENT_ERR_FATAL;
1371 } else {
1372 type = "NON_FATAL";
1373 tp_event = HW_EVENT_ERR_UNCORRECTED;
1374 }
1375 } else {
1376 type = "CORRECTED";
1377 tp_event = HW_EVENT_ERR_CORRECTED;
1378 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001379
1380 /*
David Mackey15ed1032012-04-17 11:30:52 -07001381 * According with Table 15-9 of the Intel Architecture spec vol 3A,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001382 * memory errors should fit in this mask:
1383 * 000f 0000 1mmm cccc (binary)
1384 * where:
1385 * f = Correction Report Filtering Bit. If 1, subsequent errors
1386 * won't be shown
1387 * mmm = error type
1388 * cccc = channel
1389 * If the mask doesn't match, report an error to the parsing logic
1390 */
1391 if (! ((errcode & 0xef80) == 0x80)) {
1392 optype = "Can't parse: it is not a mem";
1393 } else {
1394 switch (optypenum) {
1395 case 0:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001396 optype = "generic undef request error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001397 break;
1398 case 1:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001399 optype = "memory read error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001400 break;
1401 case 2:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001402 optype = "memory write error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001403 break;
1404 case 3:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001405 optype = "addr/cmd error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001406 break;
1407 case 4:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001408 optype = "memory scrubbing error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001409 break;
1410 default:
1411 optype = "reserved";
1412 break;
1413 }
1414 }
1415
1416 rc = get_memory_error_data(mci, m->addr, &socket,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001417 &channel_mask, &rank, &area_type, msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001418 if (rc < 0)
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001419 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001420 new_mci = get_mci_for_node_id(socket);
1421 if (!new_mci) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001422 strcpy(msg, "Error: socket got corrupted!");
1423 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001424 }
1425 mci = new_mci;
1426 pvt = mci->pvt_info;
1427
1428 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
1429
1430 if (rank < 4)
1431 dimm = 0;
1432 else if (rank < 8)
1433 dimm = 1;
1434 else
1435 dimm = 2;
1436
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001437
1438 /*
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001439 * FIXME: On some memory configurations (mirror, lockstep), the
1440 * Memory Controller can't point the error to a single DIMM. The
1441 * EDAC core should be handling the channel mask, in order to point
1442 * to the group of dimm's where the error may be happening.
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001443 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001444 snprintf(msg, sizeof(msg),
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001445 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001446 overflow ? " OVERFLOW" : "",
1447 (uncorrected_error && recoverable) ? " recoverable" : "",
1448 area_type,
1449 mscod, errcode,
1450 socket,
1451 channel_mask,
1452 rank);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001453
Joe Perches956b9ba2012-04-29 17:08:39 -03001454 edac_dbg(0, "%s\n", msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001455
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001456 /* FIXME: need support for channel mask */
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001457
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001458 /* Call the helper to output message */
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001459 edac_mc_handle_error(tp_event, mci, core_err_cnt,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001460 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1461 channel, dimm, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001462 optype, msg);
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001463 return;
1464err_parsing:
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001465 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001466 -1, -1, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001467 msg, "");
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001468
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001469}
1470
1471/*
1472 * sbridge_check_error Retrieve and process errors reported by the
1473 * hardware. Called by the Core module.
1474 */
1475static void sbridge_check_error(struct mem_ctl_info *mci)
1476{
1477 struct sbridge_pvt *pvt = mci->pvt_info;
1478 int i;
1479 unsigned count = 0;
1480 struct mce *m;
1481
1482 /*
1483 * MCE first step: Copy all mce errors into a temporary buffer
1484 * We use a double buffering here, to reduce the risk of
1485 * loosing an error.
1486 */
1487 smp_rmb();
1488 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1489 % MCE_LOG_LEN;
1490 if (!count)
1491 return;
1492
1493 m = pvt->mce_outentry;
1494 if (pvt->mce_in + count > MCE_LOG_LEN) {
1495 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1496
1497 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1498 smp_wmb();
1499 pvt->mce_in = 0;
1500 count -= l;
1501 m += l;
1502 }
1503 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1504 smp_wmb();
1505 pvt->mce_in += count;
1506
1507 smp_rmb();
1508 if (pvt->mce_overrun) {
1509 sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
1510 pvt->mce_overrun);
1511 smp_wmb();
1512 pvt->mce_overrun = 0;
1513 }
1514
1515 /*
1516 * MCE second step: parse errors and display
1517 */
1518 for (i = 0; i < count; i++)
1519 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
1520}
1521
1522/*
1523 * sbridge_mce_check_error Replicates mcelog routine to get errors
1524 * This routine simply queues mcelog errors, and
1525 * return. The error itself should be handled later
1526 * by sbridge_check_error.
1527 * WARNING: As this routine should be called at NMI time, extra care should
1528 * be taken to avoid deadlocks, and to be as fast as possible.
1529 */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001530static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1531 void *data)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001532{
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001533 struct mce *mce = (struct mce *)data;
1534 struct mem_ctl_info *mci;
1535 struct sbridge_pvt *pvt;
1536
1537 mci = get_mci_for_node_id(mce->socketid);
1538 if (!mci)
1539 return NOTIFY_BAD;
1540 pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001541
1542 /*
1543 * Just let mcelog handle it if the error is
1544 * outside the memory controller. A memory error
1545 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
1546 * bit 12 has an special meaning.
1547 */
1548 if ((mce->status & 0xefff) >> 7 != 1)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001549 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001550
1551 printk("sbridge: HANDLING MCE MEMORY ERROR\n");
1552
1553 printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
1554 mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
1555 printk("TSC %llx ", mce->tsc);
1556 printk("ADDR %llx ", mce->addr);
1557 printk("MISC %llx ", mce->misc);
1558
1559 printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
1560 mce->cpuvendor, mce->cpuid, mce->time,
1561 mce->socketid, mce->apicid);
1562
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001563 /* Only handle if it is the right mc controller */
1564 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001565 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001566
1567 smp_rmb();
1568 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1569 smp_wmb();
1570 pvt->mce_overrun++;
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001571 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001572 }
1573
1574 /* Copy memory error at the ringbuffer */
1575 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1576 smp_wmb();
1577 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1578
1579 /* Handle fatal errors immediately */
1580 if (mce->mcgstatus & 1)
1581 sbridge_check_error(mci);
1582
1583 /* Advice mcelog that the error were handled */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001584 return NOTIFY_STOP;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001585}
1586
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001587static struct notifier_block sbridge_mce_dec = {
1588 .notifier_call = sbridge_mce_check_error,
1589};
1590
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001591/****************************************************************************
1592 EDAC register/unregister logic
1593 ****************************************************************************/
1594
1595static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1596{
1597 struct mem_ctl_info *mci = sbridge_dev->mci;
1598 struct sbridge_pvt *pvt;
1599
1600 if (unlikely(!mci || !mci->pvt_info)) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001601 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001602
1603 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
1604 return;
1605 }
1606
1607 pvt = mci->pvt_info;
1608
Joe Perches956b9ba2012-04-29 17:08:39 -03001609 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1610 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001611
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001612 /* Remove MC sysfs nodes */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001613 edac_mc_del_mc(mci->pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001614
Joe Perches956b9ba2012-04-29 17:08:39 -03001615 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001616 kfree(mci->ctl_name);
1617 edac_mc_free(mci);
1618 sbridge_dev->mci = NULL;
1619}
1620
1621static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1622{
1623 struct mem_ctl_info *mci;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001624 struct edac_mc_layer layers[2];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001625 struct sbridge_pvt *pvt;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001626 int rc;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001627
1628 /* Check the number of active and not disabled channels */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001629 rc = check_if_ecc_is_active(sbridge_dev->bus);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001630 if (unlikely(rc < 0))
1631 return rc;
1632
1633 /* allocate a new MC control structure */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001634 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1635 layers[0].size = NUM_CHANNELS;
1636 layers[0].is_virt_csrow = false;
1637 layers[1].type = EDAC_MC_LAYER_SLOT;
1638 layers[1].size = MAX_DIMMS;
1639 layers[1].is_virt_csrow = true;
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -03001640 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001641 sizeof(*pvt));
1642
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001643 if (unlikely(!mci))
1644 return -ENOMEM;
1645
Joe Perches956b9ba2012-04-29 17:08:39 -03001646 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1647 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001648
1649 pvt = mci->pvt_info;
1650 memset(pvt, 0, sizeof(*pvt));
1651
1652 /* Associate sbridge_dev and mci for future usage */
1653 pvt->sbridge_dev = sbridge_dev;
1654 sbridge_dev->mci = mci;
1655
1656 mci->mtype_cap = MEM_FLAG_DDR3;
1657 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1658 mci->edac_cap = EDAC_FLAG_NONE;
1659 mci->mod_name = "sbridge_edac.c";
1660 mci->mod_ver = SBRIDGE_REVISION;
1661 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
1662 mci->dev_name = pci_name(sbridge_dev->pdev[0]);
1663 mci->ctl_page_to_phys = NULL;
1664
1665 /* Set the function pointer to an actual operation function */
1666 mci->edac_check = sbridge_check_error;
1667
1668 /* Store pci devices at mci for faster access */
1669 rc = mci_bind_devs(mci, sbridge_dev);
1670 if (unlikely(rc < 0))
1671 goto fail0;
1672
1673 /* Get dimm basic config and the memory layout */
1674 get_dimm_config(mci);
1675 get_memory_layout(mci);
1676
1677 /* record ptr to the generic device */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001678 mci->pdev = &sbridge_dev->pdev[0]->dev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001679
1680 /* add this new MC control structure to EDAC's list of MCs */
1681 if (unlikely(edac_mc_add_mc(mci))) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001682 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001683 rc = -EINVAL;
1684 goto fail0;
1685 }
1686
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001687 return 0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001688
1689fail0:
1690 kfree(mci->ctl_name);
1691 edac_mc_free(mci);
1692 sbridge_dev->mci = NULL;
1693 return rc;
1694}
1695
1696/*
1697 * sbridge_probe Probe for ONE instance of device to see if it is
1698 * present.
1699 * return:
1700 * 0 for FOUND a device
1701 * < 0 for error code
1702 */
1703
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001704static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001705{
1706 int rc;
1707 u8 mc, num_mc = 0;
1708 struct sbridge_dev *sbridge_dev;
1709
1710 /* get the pci devices we want to reserve for our use */
1711 mutex_lock(&sbridge_edac_lock);
1712
1713 /*
1714 * All memory controllers are allocated at the first pass.
1715 */
1716 if (unlikely(probed >= 1)) {
1717 mutex_unlock(&sbridge_edac_lock);
1718 return -ENODEV;
1719 }
1720 probed++;
1721
1722 rc = sbridge_get_all_devices(&num_mc);
1723 if (unlikely(rc < 0))
1724 goto fail0;
1725 mc = 0;
1726
1727 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001728 edac_dbg(0, "Registering MC#%d (%d of %d)\n",
1729 mc, mc + 1, num_mc);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001730 sbridge_dev->mc = mc++;
1731 rc = sbridge_register_mci(sbridge_dev);
1732 if (unlikely(rc < 0))
1733 goto fail1;
1734 }
1735
1736 sbridge_printk(KERN_INFO, "Driver loaded.\n");
1737
1738 mutex_unlock(&sbridge_edac_lock);
1739 return 0;
1740
1741fail1:
1742 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1743 sbridge_unregister_mci(sbridge_dev);
1744
1745 sbridge_put_all_devices();
1746fail0:
1747 mutex_unlock(&sbridge_edac_lock);
1748 return rc;
1749}
1750
1751/*
1752 * sbridge_remove destructor for one instance of device
1753 *
1754 */
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001755static void sbridge_remove(struct pci_dev *pdev)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001756{
1757 struct sbridge_dev *sbridge_dev;
1758
Joe Perches956b9ba2012-04-29 17:08:39 -03001759 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001760
1761 /*
1762 * we have a trouble here: pdev value for removal will be wrong, since
1763 * it will point to the X58 register used to detect that the machine
1764 * is a Nehalem or upper design. However, due to the way several PCI
1765 * devices are grouped together to provide MC functionality, we need
1766 * to use a different method for releasing the devices
1767 */
1768
1769 mutex_lock(&sbridge_edac_lock);
1770
1771 if (unlikely(!probed)) {
1772 mutex_unlock(&sbridge_edac_lock);
1773 return;
1774 }
1775
1776 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1777 sbridge_unregister_mci(sbridge_dev);
1778
1779 /* Release PCI resources */
1780 sbridge_put_all_devices();
1781
1782 probed--;
1783
1784 mutex_unlock(&sbridge_edac_lock);
1785}
1786
1787MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
1788
1789/*
1790 * sbridge_driver pci_driver structure for this module
1791 *
1792 */
1793static struct pci_driver sbridge_driver = {
1794 .name = "sbridge_edac",
1795 .probe = sbridge_probe,
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001796 .remove = sbridge_remove,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001797 .id_table = sbridge_pci_tbl,
1798};
1799
1800/*
1801 * sbridge_init Module entry function
1802 * Try to initialize this module for its devices
1803 */
1804static int __init sbridge_init(void)
1805{
1806 int pci_rc;
1807
Joe Perches956b9ba2012-04-29 17:08:39 -03001808 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001809
1810 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1811 opstate_init();
1812
1813 pci_rc = pci_register_driver(&sbridge_driver);
1814
Chen Gonge35fca42012-05-08 20:40:12 -03001815 if (pci_rc >= 0) {
1816 mce_register_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001817 return 0;
Chen Gonge35fca42012-05-08 20:40:12 -03001818 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001819
1820 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
1821 pci_rc);
1822
1823 return pci_rc;
1824}
1825
1826/*
1827 * sbridge_exit() Module exit function
1828 * Unregister the driver
1829 */
1830static void __exit sbridge_exit(void)
1831{
Joe Perches956b9ba2012-04-29 17:08:39 -03001832 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001833 pci_unregister_driver(&sbridge_driver);
Chen Gonge35fca42012-05-08 20:40:12 -03001834 mce_unregister_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001835}
1836
1837module_init(sbridge_init);
1838module_exit(sbridge_exit);
1839
1840module_param(edac_op_state, int, 0444);
1841MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1842
1843MODULE_LICENSE("GPL");
1844MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1845MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1846MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
1847 SBRIDGE_REVISION);