blob: 82318d4062a78fafb9cd514f68a28bafb74dc70f [file] [log] [blame]
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor family Sandy Bridge.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License version 2 only.
8 *
9 * Copyright (c) 2011 by:
10 * Mauro Carvalho Chehab <mchehab@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/edac.h>
20#include <linux/mmzone.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020021#include <linux/smp.h>
22#include <linux/bitmap.h>
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -030023#include <linux/math64.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020024#include <asm/processor.h>
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -020025#include <asm/mce.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020026
27#include "edac_core.h"
28
29/* Static vars */
30static LIST_HEAD(sbridge_edac_list);
31static DEFINE_MUTEX(sbridge_edac_lock);
32static int probed;
33
34/*
35 * Alter this version for the module when modifications are made
36 */
37#define SBRIDGE_REVISION " Ver: 1.0.0 "
38#define EDAC_MOD_STR "sbridge_edac"
39
40/*
41 * Debug macros
42 */
43#define sbridge_printk(level, fmt, arg...) \
44 edac_printk(level, "sbridge", fmt, ##arg)
45
46#define sbridge_mc_printk(mci, level, fmt, arg...) \
47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
48
49/*
50 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
51 */
52#define GET_BITFIELD(v, lo, hi) \
53 (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
54
55/*
56 * sbridge Memory Controller Registers
57 */
58
59/*
60 * FIXME: For now, let's order by device function, as it makes
David Mackey15ed1032012-04-17 11:30:52 -070061 * easier for driver's development process. This table should be
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020062 * moved to pci_id.h when submitted upstream
63 */
64#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
65#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
66#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
67#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
68#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
69#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
70#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
71#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
72#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
73#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
74#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
75
76 /*
77 * Currently, unused, but will be needed in the future
78 * implementations, as they hold the error counters
79 */
80#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
81#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
82#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
83#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
84
85/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -030086static const u32 sbridge_dram_rule[] = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020087 0x80, 0x88, 0x90, 0x98, 0xa0,
88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
89};
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020090
91#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
92#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
93#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
94#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
95
96static char *get_dram_attr(u32 reg)
97{
98 switch(DRAM_ATTR(reg)) {
99 case 0:
100 return "DRAM";
101 case 1:
102 return "MMCFG";
103 case 2:
104 return "NXM";
105 default:
106 return "unknown";
107 }
108}
109
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300110static const u32 sbridge_interleave_list[] = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200111 0x84, 0x8c, 0x94, 0x9c, 0xa4,
112 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
113};
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200114
115#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2)
116#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5)
117#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10)
118#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13)
119#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18)
120#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21)
121#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26)
122#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29)
123
124static inline int sad_pkg(u32 reg, int interleave)
125{
126 switch (interleave) {
127 case 0:
128 return SAD_PKG0(reg);
129 case 1:
130 return SAD_PKG1(reg);
131 case 2:
132 return SAD_PKG2(reg);
133 case 3:
134 return SAD_PKG3(reg);
135 case 4:
136 return SAD_PKG4(reg);
137 case 5:
138 return SAD_PKG5(reg);
139 case 6:
140 return SAD_PKG6(reg);
141 case 7:
142 return SAD_PKG7(reg);
143 default:
144 return -EINVAL;
145 }
146}
147
148/* Devices 12 Function 7 */
149
150#define TOLM 0x80
151#define TOHM 0x84
152
153#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
154#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
155
156/* Device 13 Function 6 */
157
158#define SAD_TARGET 0xf0
159
160#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
161
162#define SAD_CONTROL 0xf4
163
164#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
165
166/* Device 14 function 0 */
167
168static const u32 tad_dram_rule[] = {
169 0x40, 0x44, 0x48, 0x4c,
170 0x50, 0x54, 0x58, 0x5c,
171 0x60, 0x64, 0x68, 0x6c,
172};
173#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
174
175#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
176#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
177#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
178#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
179#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
180#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
181#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
182
183/* Device 15, function 0 */
184
185#define MCMTR 0x7c
186
187#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
188#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
189#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
190
191/* Device 15, function 1 */
192
193#define RASENABLES 0xac
194#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
195
196/* Device 15, functions 2-5 */
197
198static const int mtr_regs[] = {
199 0x80, 0x84, 0x88,
200};
201
202#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
203#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
204#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
205#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
206#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
207
208static const u32 tad_ch_nilv_offset[] = {
209 0x90, 0x94, 0x98, 0x9c,
210 0xa0, 0xa4, 0xa8, 0xac,
211 0xb0, 0xb4, 0xb8, 0xbc,
212};
213#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
214#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
215
216static const u32 rir_way_limit[] = {
217 0x108, 0x10c, 0x110, 0x114, 0x118,
218};
219#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
220
221#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
222#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
223#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
224
225#define MAX_RIR_WAY 8
226
227static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
228 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
229 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
230 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
231 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
232 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
233};
234
235#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
236#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
237
238/* Device 16, functions 2-7 */
239
240/*
241 * FIXME: Implement the error count reads directly
242 */
243
244static const u32 correrrcnt[] = {
245 0x104, 0x108, 0x10c, 0x110,
246};
247
248#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
249#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
250#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
251#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
252
253static const u32 correrrthrsld[] = {
254 0x11c, 0x120, 0x124, 0x128,
255};
256
257#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
258#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
259
260
261/* Device 17, function 0 */
262
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300263#define SB_RANK_CFG_A 0x0328
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200264
265#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
266
267/*
268 * sbridge structs
269 */
270
271#define NUM_CHANNELS 4
272#define MAX_DIMMS 3 /* Max DIMMS per channel */
273
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300274struct sbridge_pvt;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200275struct sbridge_info {
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300276 u32 mcmtr;
277 u32 rankcfgr;
278 u64 (*get_tolm)(struct sbridge_pvt *pvt);
279 u64 (*get_tohm)(struct sbridge_pvt *pvt);
280 const u32 *dram_rule;
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300281 const u32 *interleave_list;
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300282 u8 max_sad;
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300283 u8 max_interleave;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200284};
285
286struct sbridge_channel {
287 u32 ranks;
288 u32 dimms;
289};
290
291struct pci_id_descr {
292 int dev;
293 int func;
294 int dev_id;
295 int optional;
296};
297
298struct pci_id_table {
299 const struct pci_id_descr *descr;
300 int n_devs;
301};
302
303struct sbridge_dev {
304 struct list_head list;
305 u8 bus, mc;
306 u8 node_id, source_id;
307 struct pci_dev **pdev;
308 int n_devs;
309 struct mem_ctl_info *mci;
310};
311
312struct sbridge_pvt {
313 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
314 struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0;
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300315 struct pci_dev *pci_br0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200316 struct pci_dev *pci_tad[NUM_CHANNELS];
317
318 struct sbridge_dev *sbridge_dev;
319
320 struct sbridge_info info;
321 struct sbridge_channel channel[NUM_CHANNELS];
322
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200323 /* Memory type detection */
324 bool is_mirrored, is_lockstep, is_close_pg;
325
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200326 /* Fifo double buffers */
327 struct mce mce_entry[MCE_LOG_LEN];
328 struct mce mce_outentry[MCE_LOG_LEN];
329
330 /* Fifo in/out counters */
331 unsigned mce_in, mce_out;
332
333 /* Count indicator to show errors not got */
334 unsigned mce_overrun;
335
336 /* Memory description */
337 u64 tolm, tohm;
338};
339
Luck, Tonyde4772c2013-03-28 09:59:15 -0700340#define PCI_DESCR(device, function, device_id, opt) \
341 .dev = (device), \
342 .func = (function), \
343 .dev_id = (device_id), \
344 .optional = opt
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200345
346static const struct pci_id_descr pci_dev_descr_sbridge[] = {
347 /* Processor Home Agent */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700348 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200349
350 /* Memory controller */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700351 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
352 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
353 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
354 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
355 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
356 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
357 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200358
359 /* System Address Decoder */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700360 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
361 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200362
363 /* Broadcast Registers */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700364 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200365};
366
367#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
368static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
369 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
370 {0,} /* 0 terminated list. */
371};
372
373/*
374 * pci_device_id table for which devices we are looking for
375 */
Lionel Debroux36c46f32012-02-27 07:41:47 +0100376static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200377 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
378 {0,} /* 0 terminated list. */
379};
380
381
382/****************************************************************************
David Mackey15ed1032012-04-17 11:30:52 -0700383 Ancillary status routines
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200384 ****************************************************************************/
385
386static inline int numrank(u32 mtr)
387{
388 int ranks = (1 << RANK_CNT_BITS(mtr));
389
390 if (ranks > 4) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300391 edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
392 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200393 return -EINVAL;
394 }
395
396 return ranks;
397}
398
399static inline int numrow(u32 mtr)
400{
401 int rows = (RANK_WIDTH_BITS(mtr) + 12);
402
403 if (rows < 13 || rows > 18) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300404 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
405 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200406 return -EINVAL;
407 }
408
409 return 1 << rows;
410}
411
412static inline int numcol(u32 mtr)
413{
414 int cols = (COL_WIDTH_BITS(mtr) + 10);
415
416 if (cols > 12) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300417 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
418 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200419 return -EINVAL;
420 }
421
422 return 1 << cols;
423}
424
425static struct sbridge_dev *get_sbridge_dev(u8 bus)
426{
427 struct sbridge_dev *sbridge_dev;
428
429 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
430 if (sbridge_dev->bus == bus)
431 return sbridge_dev;
432 }
433
434 return NULL;
435}
436
437static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
438 const struct pci_id_table *table)
439{
440 struct sbridge_dev *sbridge_dev;
441
442 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
443 if (!sbridge_dev)
444 return NULL;
445
446 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
447 GFP_KERNEL);
448 if (!sbridge_dev->pdev) {
449 kfree(sbridge_dev);
450 return NULL;
451 }
452
453 sbridge_dev->bus = bus;
454 sbridge_dev->n_devs = table->n_devs;
455 list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
456
457 return sbridge_dev;
458}
459
460static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
461{
462 list_del(&sbridge_dev->list);
463 kfree(sbridge_dev->pdev);
464 kfree(sbridge_dev);
465}
466
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300467static u64 sbridge_get_tolm(struct sbridge_pvt *pvt)
468{
469 u32 reg;
470
471 /* Address range is 32:28 */
472 pci_read_config_dword(pvt->pci_sad1, TOLM, &reg);
473 return GET_TOLM(reg);
474}
475
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -0300476static u64 sbridge_get_tohm(struct sbridge_pvt *pvt)
477{
478 u32 reg;
479
480 pci_read_config_dword(pvt->pci_sad1, TOHM, &reg);
481 return GET_TOHM(reg);
482}
483
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200484/****************************************************************************
485 Memory check routines
486 ****************************************************************************/
487static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
488 unsigned func)
489{
490 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
491 int i;
492
493 if (!sbridge_dev)
494 return NULL;
495
496 for (i = 0; i < sbridge_dev->n_devs; i++) {
497 if (!sbridge_dev->pdev[i])
498 continue;
499
500 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
501 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300502 edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
503 bus, slot, func, sbridge_dev->pdev[i]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200504 return sbridge_dev->pdev[i];
505 }
506 }
507
508 return NULL;
509}
510
511/**
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300512 * check_if_ecc_is_active() - Checks if ECC is active
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200513 * bus: Device bus
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200514 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300515static int check_if_ecc_is_active(const u8 bus)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200516{
517 struct pci_dev *pdev = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200518 u32 mcmtr;
519
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200520 pdev = get_pdev_slot_func(bus, 15, 0);
521 if (!pdev) {
522 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
523 "%2x.%02d.%d!!!\n",
524 bus, 15, 0);
525 return -ENODEV;
526 }
527
528 pci_read_config_dword(pdev, MCMTR, &mcmtr);
529 if (!IS_ECC_ENABLED(mcmtr)) {
530 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
531 return -ENODEV;
532 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200533 return 0;
534}
535
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300536static int get_dimm_config(struct mem_ctl_info *mci)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200537{
538 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300539 struct dimm_info *dimm;
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300540 unsigned i, j, banks, ranks, rows, cols, npages;
541 u64 size;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200542 u32 reg;
543 enum edac_type mode;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200544 enum mem_type mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200545
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300546 pvt->info.rankcfgr = SB_RANK_CFG_A;
547
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300548 pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200549 pvt->sbridge_dev->source_id = SOURCE_ID(reg);
550
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300551 pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, &reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200552 pvt->sbridge_dev->node_id = NODE_ID(reg);
Joe Perches956b9ba2012-04-29 17:08:39 -0300553 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
554 pvt->sbridge_dev->mc,
555 pvt->sbridge_dev->node_id,
556 pvt->sbridge_dev->source_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200557
558 pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
559 if (IS_MIRROR_ENABLED(reg)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300560 edac_dbg(0, "Memory mirror is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200561 pvt->is_mirrored = true;
562 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300563 edac_dbg(0, "Memory mirror is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200564 pvt->is_mirrored = false;
565 }
566
567 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
568 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300569 edac_dbg(0, "Lockstep is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200570 mode = EDAC_S8ECD8ED;
571 pvt->is_lockstep = true;
572 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300573 edac_dbg(0, "Lockstep is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200574 mode = EDAC_S4ECD4ED;
575 pvt->is_lockstep = false;
576 }
577 if (IS_CLOSE_PG(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300578 edac_dbg(0, "address map is on closed page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200579 pvt->is_close_pg = true;
580 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300581 edac_dbg(0, "address map is on open page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200582 pvt->is_close_pg = false;
583 }
584
Luck, Tonyde4772c2013-03-28 09:59:15 -0700585 if (pvt->pci_ddrio) {
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300586 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
587 &reg);
Luck, Tonyde4772c2013-03-28 09:59:15 -0700588 if (IS_RDIMM_ENABLED(reg)) {
589 /* FIXME: Can also be LRDIMM */
590 edac_dbg(0, "Memory is registered\n");
591 mtype = MEM_RDDR3;
592 } else {
593 edac_dbg(0, "Memory is unregistered\n");
594 mtype = MEM_DDR3;
595 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200596 } else {
Luck, Tonyde4772c2013-03-28 09:59:15 -0700597 edac_dbg(0, "Cannot determine memory type\n");
598 mtype = MEM_UNKNOWN;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200599 }
600
601 /* On all supported DDR3 DIMM types, there are 8 banks available */
602 banks = 8;
603
604 for (i = 0; i < NUM_CHANNELS; i++) {
605 u32 mtr;
606
607 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300608 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
609 i, j, 0);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200610 pci_read_config_dword(pvt->pci_tad[i],
611 mtr_regs[j], &mtr);
Joe Perches956b9ba2012-04-29 17:08:39 -0300612 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200613 if (IS_DIMM_PRESENT(mtr)) {
614 pvt->channel[i].dimms++;
615
616 ranks = numrank(mtr);
617 rows = numrow(mtr);
618 cols = numcol(mtr);
619
620 /* DDR3 has 8 I/O banks */
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300621 size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200622 npages = MiB_TO_PAGES(size);
623
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300624 edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
Joe Perches956b9ba2012-04-29 17:08:39 -0300625 pvt->sbridge_dev->mc, i, j,
626 size, npages,
627 banks, ranks, rows, cols);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200628
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300629 dimm->nr_pages = npages;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300630 dimm->grain = 32;
631 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
632 dimm->mtype = mtype;
633 dimm->edac_mode = mode;
634 snprintf(dimm->label, sizeof(dimm->label),
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200635 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
636 pvt->sbridge_dev->source_id, i, j);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200637 }
638 }
639 }
640
641 return 0;
642}
643
644static void get_memory_layout(const struct mem_ctl_info *mci)
645{
646 struct sbridge_pvt *pvt = mci->pvt_info;
647 int i, j, k, n_sads, n_tads, sad_interl;
648 u32 reg;
649 u64 limit, prv = 0;
650 u64 tmp_mb;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300651 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200652 u32 rir_way;
653
654 /*
655 * Step 1) Get TOLM/TOHM ranges
656 */
657
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300658 pvt->tolm = pvt->info.get_tolm(pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200659 tmp_mb = (1 + pvt->tolm) >> 20;
660
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300661 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300662 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200663
664 /* Address range is already 45:25 */
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -0300665 pvt->tohm = pvt->info.get_tohm(pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200666 tmp_mb = (1 + pvt->tohm) >> 20;
667
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300668 mb = div_u64_rem(tmp_mb, 1000, &kb);
Mauro Carvalho Chehabda14d932012-10-25 09:07:21 -0200669 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200670
671 /*
672 * Step 2) Get SAD range and SAD Interleave list
673 * TAD registers contain the interleave wayness. However, it
674 * seems simpler to just discover it indirectly, with the
675 * algorithm bellow.
676 */
677 prv = 0;
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300678 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200679 /* SAD_LIMIT Address range is 45:26 */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300680 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200681 &reg);
682 limit = SAD_LIMIT(reg);
683
684 if (!DRAM_RULE_ENABLE(reg))
685 continue;
686
687 if (limit <= prv)
688 break;
689
690 tmp_mb = (limit + 1) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300691 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300692 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
693 n_sads,
694 get_dram_attr(reg),
695 mb, kb,
696 ((u64)tmp_mb) << 20L,
697 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
698 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200699 prv = limit;
700
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300701 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200702 &reg);
703 sad_interl = sad_pkg(reg, 0);
704 for (j = 0; j < 8; j++) {
705 if (j > 0 && sad_interl == sad_pkg(reg, j))
706 break;
707
Joe Perches956b9ba2012-04-29 17:08:39 -0300708 edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
709 n_sads, j, sad_pkg(reg, j));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200710 }
711 }
712
713 /*
714 * Step 3) Get TAD range
715 */
716 prv = 0;
717 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
718 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
719 &reg);
720 limit = TAD_LIMIT(reg);
721 if (limit <= prv)
722 break;
723 tmp_mb = (limit + 1) >> 20;
724
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300725 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300726 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
727 n_tads, mb, kb,
728 ((u64)tmp_mb) << 20L,
729 (u32)TAD_SOCK(reg),
730 (u32)TAD_CH(reg),
731 (u32)TAD_TGT0(reg),
732 (u32)TAD_TGT1(reg),
733 (u32)TAD_TGT2(reg),
734 (u32)TAD_TGT3(reg),
735 reg);
Hui Wang7fae0db2012-02-06 04:11:01 -0300736 prv = limit;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200737 }
738
739 /*
740 * Step 4) Get TAD offsets, per each channel
741 */
742 for (i = 0; i < NUM_CHANNELS; i++) {
743 if (!pvt->channel[i].dimms)
744 continue;
745 for (j = 0; j < n_tads; j++) {
746 pci_read_config_dword(pvt->pci_tad[i],
747 tad_ch_nilv_offset[j],
748 &reg);
749 tmp_mb = TAD_OFFSET(reg) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300750 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300751 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
752 i, j,
753 mb, kb,
754 ((u64)tmp_mb) << 20L,
755 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200756 }
757 }
758
759 /*
760 * Step 6) Get RIR Wayness/Limit, per each channel
761 */
762 for (i = 0; i < NUM_CHANNELS; i++) {
763 if (!pvt->channel[i].dimms)
764 continue;
765 for (j = 0; j < MAX_RIR_RANGES; j++) {
766 pci_read_config_dword(pvt->pci_tad[i],
767 rir_way_limit[j],
768 &reg);
769
770 if (!IS_RIR_VALID(reg))
771 continue;
772
773 tmp_mb = RIR_LIMIT(reg) >> 20;
774 rir_way = 1 << RIR_WAY(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300775 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300776 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
777 i, j,
778 mb, kb,
779 ((u64)tmp_mb) << 20L,
780 rir_way,
781 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200782
783 for (k = 0; k < rir_way; k++) {
784 pci_read_config_dword(pvt->pci_tad[i],
785 rir_offset[j][k],
786 &reg);
787 tmp_mb = RIR_OFFSET(reg) << 6;
788
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300789 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300790 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
791 i, j, k,
792 mb, kb,
793 ((u64)tmp_mb) << 20L,
794 (u32)RIR_RNK_TGT(reg),
795 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200796 }
797 }
798 }
799}
800
801struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
802{
803 struct sbridge_dev *sbridge_dev;
804
805 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
806 if (sbridge_dev->node_id == node_id)
807 return sbridge_dev->mci;
808 }
809 return NULL;
810}
811
812static int get_memory_error_data(struct mem_ctl_info *mci,
813 u64 addr,
814 u8 *socket,
815 long *channel_mask,
816 u8 *rank,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300817 char **area_type, char *msg)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200818{
819 struct mem_ctl_info *new_mci;
820 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200821 int n_rir, n_sads, n_tads, sad_way, sck_xch;
822 int sad_interl, idx, base_ch;
823 int interleave_mode;
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300824 unsigned sad_interleave[pvt->info.max_interleave];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200825 u32 reg;
826 u8 ch_way,sck_way;
827 u32 tad_offset;
828 u32 rir_way;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300829 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200830 u64 ch_addr, offset, limit, prv = 0;
831
832
833 /*
834 * Step 0) Check if the address is at special memory ranges
835 * The check bellow is probably enough to fill all cases where
836 * the error is not inside a memory, except for the legacy
837 * range (e. g. VGA addresses). It is unlikely, however, that the
838 * memory controller would generate an error on that range.
839 */
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300840 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200841 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200842 return -EINVAL;
843 }
844 if (addr >= (u64)pvt->tohm) {
845 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200846 return -EINVAL;
847 }
848
849 /*
850 * Step 1) Get socket
851 */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300852 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
853 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200854 &reg);
855
856 if (!DRAM_RULE_ENABLE(reg))
857 continue;
858
859 limit = SAD_LIMIT(reg);
860 if (limit <= prv) {
861 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200862 return -EINVAL;
863 }
864 if (addr <= limit)
865 break;
866 prv = limit;
867 }
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300868 if (n_sads == pvt->info.max_sad) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200869 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200870 return -EINVAL;
871 }
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300872 *area_type = get_dram_attr(reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200873 interleave_mode = INTERLEAVE_MODE(reg);
874
Aristeu Rozanskief1ce512013-10-30 13:27:01 -0300875 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200876 &reg);
877 sad_interl = sad_pkg(reg, 0);
878 for (sad_way = 0; sad_way < 8; sad_way++) {
879 if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
880 break;
881 sad_interleave[sad_way] = sad_pkg(reg, sad_way);
Joe Perches956b9ba2012-04-29 17:08:39 -0300882 edac_dbg(0, "SAD interleave #%d: %d\n",
883 sad_way, sad_interleave[sad_way]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200884 }
Joe Perches956b9ba2012-04-29 17:08:39 -0300885 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
886 pvt->sbridge_dev->mc,
887 n_sads,
888 addr,
889 limit,
890 sad_way + 7,
891 interleave_mode ? "" : "XOR[18:16]");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200892 if (interleave_mode)
893 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
894 else
895 idx = (addr >> 6) & 7;
896 switch (sad_way) {
897 case 1:
898 idx = 0;
899 break;
900 case 2:
901 idx = idx & 1;
902 break;
903 case 4:
904 idx = idx & 3;
905 break;
906 case 8:
907 break;
908 default:
909 sprintf(msg, "Can't discover socket interleave");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200910 return -EINVAL;
911 }
912 *socket = sad_interleave[idx];
Joe Perches956b9ba2012-04-29 17:08:39 -0300913 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
914 idx, sad_way, *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200915
916 /*
917 * Move to the proper node structure, in order to access the
918 * right PCI registers
919 */
920 new_mci = get_mci_for_node_id(*socket);
921 if (!new_mci) {
922 sprintf(msg, "Struct for socket #%u wasn't initialized",
923 *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200924 return -EINVAL;
925 }
926 mci = new_mci;
927 pvt = mci->pvt_info;
928
929 /*
930 * Step 2) Get memory channel
931 */
932 prv = 0;
933 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
934 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
935 &reg);
936 limit = TAD_LIMIT(reg);
937 if (limit <= prv) {
938 sprintf(msg, "Can't discover the memory channel");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200939 return -EINVAL;
940 }
941 if (addr <= limit)
942 break;
943 prv = limit;
944 }
945 ch_way = TAD_CH(reg) + 1;
946 sck_way = TAD_SOCK(reg) + 1;
947 /*
948 * FIXME: Is it right to always use channel 0 for offsets?
949 */
950 pci_read_config_dword(pvt->pci_tad[0],
951 tad_ch_nilv_offset[n_tads],
952 &tad_offset);
953
954 if (ch_way == 3)
955 idx = addr >> 6;
956 else
957 idx = addr >> (6 + sck_way);
958 idx = idx % ch_way;
959
960 /*
961 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
962 */
963 switch (idx) {
964 case 0:
965 base_ch = TAD_TGT0(reg);
966 break;
967 case 1:
968 base_ch = TAD_TGT1(reg);
969 break;
970 case 2:
971 base_ch = TAD_TGT2(reg);
972 break;
973 case 3:
974 base_ch = TAD_TGT3(reg);
975 break;
976 default:
977 sprintf(msg, "Can't discover the TAD target");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200978 return -EINVAL;
979 }
980 *channel_mask = 1 << base_ch;
981
982 if (pvt->is_mirrored) {
983 *channel_mask |= 1 << ((base_ch + 2) % 4);
984 switch(ch_way) {
985 case 2:
986 case 4:
987 sck_xch = 1 << sck_way * (ch_way >> 1);
988 break;
989 default:
990 sprintf(msg, "Invalid mirror set. Can't decode addr");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200991 return -EINVAL;
992 }
993 } else
994 sck_xch = (1 << sck_way) * ch_way;
995
996 if (pvt->is_lockstep)
997 *channel_mask |= 1 << ((base_ch + 1) % 4);
998
999 offset = TAD_OFFSET(tad_offset);
1000
Joe Perches956b9ba2012-04-29 17:08:39 -03001001 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
1002 n_tads,
1003 addr,
1004 limit,
1005 (u32)TAD_SOCK(reg),
1006 ch_way,
1007 offset,
1008 idx,
1009 base_ch,
1010 *channel_mask);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001011
1012 /* Calculate channel address */
1013 /* Remove the TAD offset */
1014
1015 if (offset > addr) {
1016 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1017 offset, addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001018 return -EINVAL;
1019 }
1020 addr -= offset;
1021 /* Store the low bits [0:6] of the addr */
1022 ch_addr = addr & 0x7f;
1023 /* Remove socket wayness and remove 6 bits */
1024 addr >>= 6;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001025 addr = div_u64(addr, sck_xch);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001026#if 0
1027 /* Divide by channel way */
1028 addr = addr / ch_way;
1029#endif
1030 /* Recover the last 6 bits */
1031 ch_addr |= addr << 6;
1032
1033 /*
1034 * Step 3) Decode rank
1035 */
1036 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
1037 pci_read_config_dword(pvt->pci_tad[base_ch],
1038 rir_way_limit[n_rir],
1039 &reg);
1040
1041 if (!IS_RIR_VALID(reg))
1042 continue;
1043
1044 limit = RIR_LIMIT(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001045 mb = div_u64_rem(limit >> 20, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -03001046 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
1047 n_rir,
1048 mb, kb,
1049 limit,
1050 1 << RIR_WAY(reg));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001051 if (ch_addr <= limit)
1052 break;
1053 }
1054 if (n_rir == MAX_RIR_RANGES) {
1055 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1056 ch_addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001057 return -EINVAL;
1058 }
1059 rir_way = RIR_WAY(reg);
1060 if (pvt->is_close_pg)
1061 idx = (ch_addr >> 6);
1062 else
1063 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
1064 idx %= 1 << rir_way;
1065
1066 pci_read_config_dword(pvt->pci_tad[base_ch],
1067 rir_offset[n_rir][idx],
1068 &reg);
1069 *rank = RIR_RNK_TGT(reg);
1070
Joe Perches956b9ba2012-04-29 17:08:39 -03001071 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
1072 n_rir,
1073 ch_addr,
1074 limit,
1075 rir_way,
1076 idx);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001077
1078 return 0;
1079}
1080
1081/****************************************************************************
1082 Device initialization routines: put/get, init/exit
1083 ****************************************************************************/
1084
1085/*
1086 * sbridge_put_all_devices 'put' all the devices that we have
1087 * reserved via 'get'
1088 */
1089static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
1090{
1091 int i;
1092
Joe Perches956b9ba2012-04-29 17:08:39 -03001093 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001094 for (i = 0; i < sbridge_dev->n_devs; i++) {
1095 struct pci_dev *pdev = sbridge_dev->pdev[i];
1096 if (!pdev)
1097 continue;
Joe Perches956b9ba2012-04-29 17:08:39 -03001098 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1099 pdev->bus->number,
1100 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001101 pci_dev_put(pdev);
1102 }
1103}
1104
1105static void sbridge_put_all_devices(void)
1106{
1107 struct sbridge_dev *sbridge_dev, *tmp;
1108
1109 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
1110 sbridge_put_devices(sbridge_dev);
1111 free_sbridge_dev(sbridge_dev);
1112 }
1113}
1114
1115/*
1116 * sbridge_get_all_devices Find and perform 'get' operation on the MCH's
1117 * device/functions we want to reference for this driver
1118 *
1119 * Need to 'get' device 16 func 1 and func 2
1120 */
1121static int sbridge_get_onedevice(struct pci_dev **prev,
1122 u8 *num_mc,
1123 const struct pci_id_table *table,
1124 const unsigned devno)
1125{
1126 struct sbridge_dev *sbridge_dev;
1127 const struct pci_id_descr *dev_descr = &table->descr[devno];
1128
1129 struct pci_dev *pdev = NULL;
1130 u8 bus = 0;
1131
1132 sbridge_printk(KERN_INFO,
1133 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
1134 dev_descr->dev, dev_descr->func,
1135 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1136
1137 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1138 dev_descr->dev_id, *prev);
1139
1140 if (!pdev) {
1141 if (*prev) {
1142 *prev = pdev;
1143 return 0;
1144 }
1145
1146 if (dev_descr->optional)
1147 return 0;
1148
1149 if (devno == 0)
1150 return -ENODEV;
1151
1152 sbridge_printk(KERN_INFO,
1153 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1154 dev_descr->dev, dev_descr->func,
1155 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1156
1157 /* End of list, leave */
1158 return -ENODEV;
1159 }
1160 bus = pdev->bus->number;
1161
1162 sbridge_dev = get_sbridge_dev(bus);
1163 if (!sbridge_dev) {
1164 sbridge_dev = alloc_sbridge_dev(bus, table);
1165 if (!sbridge_dev) {
1166 pci_dev_put(pdev);
1167 return -ENOMEM;
1168 }
1169 (*num_mc)++;
1170 }
1171
1172 if (sbridge_dev->pdev[devno]) {
1173 sbridge_printk(KERN_ERR,
1174 "Duplicated device for "
1175 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1176 bus, dev_descr->dev, dev_descr->func,
1177 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1178 pci_dev_put(pdev);
1179 return -ENODEV;
1180 }
1181
1182 sbridge_dev->pdev[devno] = pdev;
1183
1184 /* Sanity check */
1185 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1186 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1187 sbridge_printk(KERN_ERR,
1188 "Device PCI ID %04x:%04x "
1189 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
1190 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1191 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1192 bus, dev_descr->dev, dev_descr->func);
1193 return -ENODEV;
1194 }
1195
1196 /* Be sure that the device is enabled */
1197 if (unlikely(pci_enable_device(pdev) < 0)) {
1198 sbridge_printk(KERN_ERR,
1199 "Couldn't enable "
1200 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1201 bus, dev_descr->dev, dev_descr->func,
1202 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1203 return -ENODEV;
1204 }
1205
Joe Perches956b9ba2012-04-29 17:08:39 -03001206 edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
1207 bus, dev_descr->dev, dev_descr->func,
1208 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001209
1210 /*
1211 * As stated on drivers/pci/search.c, the reference count for
1212 * @from is always decremented if it is not %NULL. So, as we need
1213 * to get all devices up to null, we need to do a get for the device
1214 */
1215 pci_dev_get(pdev);
1216
1217 *prev = pdev;
1218
1219 return 0;
1220}
1221
1222static int sbridge_get_all_devices(u8 *num_mc)
1223{
1224 int i, rc;
1225 struct pci_dev *pdev = NULL;
1226 const struct pci_id_table *table = pci_dev_descr_sbridge_table;
1227
1228 while (table && table->descr) {
1229 for (i = 0; i < table->n_devs; i++) {
1230 pdev = NULL;
1231 do {
1232 rc = sbridge_get_onedevice(&pdev, num_mc,
1233 table, i);
1234 if (rc < 0) {
1235 if (i == 0) {
1236 i = table->n_devs;
1237 break;
1238 }
1239 sbridge_put_all_devices();
1240 return -ENODEV;
1241 }
1242 } while (pdev);
1243 }
1244 table++;
1245 }
1246
1247 return 0;
1248}
1249
1250static int mci_bind_devs(struct mem_ctl_info *mci,
1251 struct sbridge_dev *sbridge_dev)
1252{
1253 struct sbridge_pvt *pvt = mci->pvt_info;
1254 struct pci_dev *pdev;
1255 int i, func, slot;
1256
1257 for (i = 0; i < sbridge_dev->n_devs; i++) {
1258 pdev = sbridge_dev->pdev[i];
1259 if (!pdev)
1260 continue;
1261 slot = PCI_SLOT(pdev->devfn);
1262 func = PCI_FUNC(pdev->devfn);
1263 switch (slot) {
1264 case 12:
1265 switch (func) {
1266 case 6:
1267 pvt->pci_sad0 = pdev;
1268 break;
1269 case 7:
1270 pvt->pci_sad1 = pdev;
1271 break;
1272 default:
1273 goto error;
1274 }
1275 break;
1276 case 13:
1277 switch (func) {
1278 case 6:
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -03001279 pvt->pci_br0 = pdev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001280 break;
1281 default:
1282 goto error;
1283 }
1284 break;
1285 case 14:
1286 switch (func) {
1287 case 0:
1288 pvt->pci_ha0 = pdev;
1289 break;
1290 default:
1291 goto error;
1292 }
1293 break;
1294 case 15:
1295 switch (func) {
1296 case 0:
1297 pvt->pci_ta = pdev;
1298 break;
1299 case 1:
1300 pvt->pci_ras = pdev;
1301 break;
1302 case 2:
1303 case 3:
1304 case 4:
1305 case 5:
1306 pvt->pci_tad[func - 2] = pdev;
1307 break;
1308 default:
1309 goto error;
1310 }
1311 break;
1312 case 17:
1313 switch (func) {
1314 case 0:
1315 pvt->pci_ddrio = pdev;
1316 break;
1317 default:
1318 goto error;
1319 }
1320 break;
1321 default:
1322 goto error;
1323 }
1324
Joe Perches956b9ba2012-04-29 17:08:39 -03001325 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
1326 sbridge_dev->bus,
1327 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1328 pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001329 }
1330
1331 /* Check if everything were registered */
1332 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
Luck, Tonyde4772c2013-03-28 09:59:15 -07001333 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001334 goto enodev;
1335
1336 for (i = 0; i < NUM_CHANNELS; i++) {
1337 if (!pvt->pci_tad[i])
1338 goto enodev;
1339 }
1340 return 0;
1341
1342enodev:
1343 sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
1344 return -ENODEV;
1345
1346error:
1347 sbridge_printk(KERN_ERR, "Device %d, function %d "
1348 "is out of the expected range\n",
1349 slot, func);
1350 return -EINVAL;
1351}
1352
1353/****************************************************************************
1354 Error check routines
1355 ****************************************************************************/
1356
1357/*
1358 * While Sandy Bridge has error count registers, SMI BIOS read values from
1359 * and resets the counters. So, they are not reliable for the OS to read
1360 * from them. So, we have no option but to just trust on whatever MCE is
1361 * telling us about the errors.
1362 */
1363static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1364 const struct mce *m)
1365{
1366 struct mem_ctl_info *new_mci;
1367 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001368 enum hw_event_mc_err_type tp_event;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001369 char *type, *optype, msg[256];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001370 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1371 bool overflow = GET_BITFIELD(m->status, 62, 62);
1372 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
1373 bool recoverable = GET_BITFIELD(m->status, 56, 56);
1374 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
1375 u32 mscod = GET_BITFIELD(m->status, 16, 31);
1376 u32 errcode = GET_BITFIELD(m->status, 0, 15);
1377 u32 channel = GET_BITFIELD(m->status, 0, 3);
1378 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1379 long channel_mask, first_channel;
1380 u8 rank, socket;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001381 int rc, dimm;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001382 char *area_type = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001383
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001384 if (uncorrected_error) {
1385 if (ripv) {
1386 type = "FATAL";
1387 tp_event = HW_EVENT_ERR_FATAL;
1388 } else {
1389 type = "NON_FATAL";
1390 tp_event = HW_EVENT_ERR_UNCORRECTED;
1391 }
1392 } else {
1393 type = "CORRECTED";
1394 tp_event = HW_EVENT_ERR_CORRECTED;
1395 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001396
1397 /*
David Mackey15ed1032012-04-17 11:30:52 -07001398 * According with Table 15-9 of the Intel Architecture spec vol 3A,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001399 * memory errors should fit in this mask:
1400 * 000f 0000 1mmm cccc (binary)
1401 * where:
1402 * f = Correction Report Filtering Bit. If 1, subsequent errors
1403 * won't be shown
1404 * mmm = error type
1405 * cccc = channel
1406 * If the mask doesn't match, report an error to the parsing logic
1407 */
1408 if (! ((errcode & 0xef80) == 0x80)) {
1409 optype = "Can't parse: it is not a mem";
1410 } else {
1411 switch (optypenum) {
1412 case 0:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001413 optype = "generic undef request error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001414 break;
1415 case 1:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001416 optype = "memory read error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001417 break;
1418 case 2:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001419 optype = "memory write error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001420 break;
1421 case 3:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001422 optype = "addr/cmd error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001423 break;
1424 case 4:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001425 optype = "memory scrubbing error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001426 break;
1427 default:
1428 optype = "reserved";
1429 break;
1430 }
1431 }
1432
1433 rc = get_memory_error_data(mci, m->addr, &socket,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001434 &channel_mask, &rank, &area_type, msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001435 if (rc < 0)
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001436 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001437 new_mci = get_mci_for_node_id(socket);
1438 if (!new_mci) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001439 strcpy(msg, "Error: socket got corrupted!");
1440 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001441 }
1442 mci = new_mci;
1443 pvt = mci->pvt_info;
1444
1445 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
1446
1447 if (rank < 4)
1448 dimm = 0;
1449 else if (rank < 8)
1450 dimm = 1;
1451 else
1452 dimm = 2;
1453
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001454
1455 /*
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001456 * FIXME: On some memory configurations (mirror, lockstep), the
1457 * Memory Controller can't point the error to a single DIMM. The
1458 * EDAC core should be handling the channel mask, in order to point
1459 * to the group of dimm's where the error may be happening.
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001460 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001461 snprintf(msg, sizeof(msg),
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001462 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001463 overflow ? " OVERFLOW" : "",
1464 (uncorrected_error && recoverable) ? " recoverable" : "",
1465 area_type,
1466 mscod, errcode,
1467 socket,
1468 channel_mask,
1469 rank);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001470
Joe Perches956b9ba2012-04-29 17:08:39 -03001471 edac_dbg(0, "%s\n", msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001472
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001473 /* FIXME: need support for channel mask */
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001474
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001475 /* Call the helper to output message */
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001476 edac_mc_handle_error(tp_event, mci, core_err_cnt,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001477 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1478 channel, dimm, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001479 optype, msg);
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001480 return;
1481err_parsing:
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001482 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001483 -1, -1, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001484 msg, "");
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001485
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001486}
1487
1488/*
1489 * sbridge_check_error Retrieve and process errors reported by the
1490 * hardware. Called by the Core module.
1491 */
1492static void sbridge_check_error(struct mem_ctl_info *mci)
1493{
1494 struct sbridge_pvt *pvt = mci->pvt_info;
1495 int i;
1496 unsigned count = 0;
1497 struct mce *m;
1498
1499 /*
1500 * MCE first step: Copy all mce errors into a temporary buffer
1501 * We use a double buffering here, to reduce the risk of
1502 * loosing an error.
1503 */
1504 smp_rmb();
1505 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1506 % MCE_LOG_LEN;
1507 if (!count)
1508 return;
1509
1510 m = pvt->mce_outentry;
1511 if (pvt->mce_in + count > MCE_LOG_LEN) {
1512 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1513
1514 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1515 smp_wmb();
1516 pvt->mce_in = 0;
1517 count -= l;
1518 m += l;
1519 }
1520 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1521 smp_wmb();
1522 pvt->mce_in += count;
1523
1524 smp_rmb();
1525 if (pvt->mce_overrun) {
1526 sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
1527 pvt->mce_overrun);
1528 smp_wmb();
1529 pvt->mce_overrun = 0;
1530 }
1531
1532 /*
1533 * MCE second step: parse errors and display
1534 */
1535 for (i = 0; i < count; i++)
1536 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
1537}
1538
1539/*
1540 * sbridge_mce_check_error Replicates mcelog routine to get errors
1541 * This routine simply queues mcelog errors, and
1542 * return. The error itself should be handled later
1543 * by sbridge_check_error.
1544 * WARNING: As this routine should be called at NMI time, extra care should
1545 * be taken to avoid deadlocks, and to be as fast as possible.
1546 */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001547static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1548 void *data)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001549{
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001550 struct mce *mce = (struct mce *)data;
1551 struct mem_ctl_info *mci;
1552 struct sbridge_pvt *pvt;
1553
1554 mci = get_mci_for_node_id(mce->socketid);
1555 if (!mci)
1556 return NOTIFY_BAD;
1557 pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001558
1559 /*
1560 * Just let mcelog handle it if the error is
1561 * outside the memory controller. A memory error
1562 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
1563 * bit 12 has an special meaning.
1564 */
1565 if ((mce->status & 0xefff) >> 7 != 1)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001566 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001567
1568 printk("sbridge: HANDLING MCE MEMORY ERROR\n");
1569
1570 printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
1571 mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
1572 printk("TSC %llx ", mce->tsc);
1573 printk("ADDR %llx ", mce->addr);
1574 printk("MISC %llx ", mce->misc);
1575
1576 printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
1577 mce->cpuvendor, mce->cpuid, mce->time,
1578 mce->socketid, mce->apicid);
1579
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001580 /* Only handle if it is the right mc controller */
1581 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001582 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001583
1584 smp_rmb();
1585 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1586 smp_wmb();
1587 pvt->mce_overrun++;
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001588 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001589 }
1590
1591 /* Copy memory error at the ringbuffer */
1592 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1593 smp_wmb();
1594 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1595
1596 /* Handle fatal errors immediately */
1597 if (mce->mcgstatus & 1)
1598 sbridge_check_error(mci);
1599
1600 /* Advice mcelog that the error were handled */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001601 return NOTIFY_STOP;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001602}
1603
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001604static struct notifier_block sbridge_mce_dec = {
1605 .notifier_call = sbridge_mce_check_error,
1606};
1607
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001608/****************************************************************************
1609 EDAC register/unregister logic
1610 ****************************************************************************/
1611
1612static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1613{
1614 struct mem_ctl_info *mci = sbridge_dev->mci;
1615 struct sbridge_pvt *pvt;
1616
1617 if (unlikely(!mci || !mci->pvt_info)) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001618 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001619
1620 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
1621 return;
1622 }
1623
1624 pvt = mci->pvt_info;
1625
Joe Perches956b9ba2012-04-29 17:08:39 -03001626 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1627 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001628
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001629 /* Remove MC sysfs nodes */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001630 edac_mc_del_mc(mci->pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001631
Joe Perches956b9ba2012-04-29 17:08:39 -03001632 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001633 kfree(mci->ctl_name);
1634 edac_mc_free(mci);
1635 sbridge_dev->mci = NULL;
1636}
1637
1638static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1639{
1640 struct mem_ctl_info *mci;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001641 struct edac_mc_layer layers[2];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001642 struct sbridge_pvt *pvt;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001643 int rc;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001644
1645 /* Check the number of active and not disabled channels */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001646 rc = check_if_ecc_is_active(sbridge_dev->bus);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001647 if (unlikely(rc < 0))
1648 return rc;
1649
1650 /* allocate a new MC control structure */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001651 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1652 layers[0].size = NUM_CHANNELS;
1653 layers[0].is_virt_csrow = false;
1654 layers[1].type = EDAC_MC_LAYER_SLOT;
1655 layers[1].size = MAX_DIMMS;
1656 layers[1].is_virt_csrow = true;
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -03001657 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001658 sizeof(*pvt));
1659
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001660 if (unlikely(!mci))
1661 return -ENOMEM;
1662
Joe Perches956b9ba2012-04-29 17:08:39 -03001663 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1664 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001665
1666 pvt = mci->pvt_info;
1667 memset(pvt, 0, sizeof(*pvt));
1668
1669 /* Associate sbridge_dev and mci for future usage */
1670 pvt->sbridge_dev = sbridge_dev;
1671 sbridge_dev->mci = mci;
1672
1673 mci->mtype_cap = MEM_FLAG_DDR3;
1674 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1675 mci->edac_cap = EDAC_FLAG_NONE;
1676 mci->mod_name = "sbridge_edac.c";
1677 mci->mod_ver = SBRIDGE_REVISION;
1678 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
1679 mci->dev_name = pci_name(sbridge_dev->pdev[0]);
1680 mci->ctl_page_to_phys = NULL;
Aristeu Rozanskifb79a502013-10-30 13:26:57 -03001681 pvt->info.get_tolm = sbridge_get_tolm;
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -03001682 pvt->info.get_tohm = sbridge_get_tohm;
Aristeu Rozanski464f1d82013-10-30 13:27:00 -03001683 pvt->info.dram_rule = sbridge_dram_rule;
1684 pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule);
Aristeu Rozanskief1ce512013-10-30 13:27:01 -03001685 pvt->info.interleave_list = sbridge_interleave_list;
1686 pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001687
1688 /* Set the function pointer to an actual operation function */
1689 mci->edac_check = sbridge_check_error;
1690
1691 /* Store pci devices at mci for faster access */
1692 rc = mci_bind_devs(mci, sbridge_dev);
1693 if (unlikely(rc < 0))
1694 goto fail0;
1695
1696 /* Get dimm basic config and the memory layout */
1697 get_dimm_config(mci);
1698 get_memory_layout(mci);
1699
1700 /* record ptr to the generic device */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001701 mci->pdev = &sbridge_dev->pdev[0]->dev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001702
1703 /* add this new MC control structure to EDAC's list of MCs */
1704 if (unlikely(edac_mc_add_mc(mci))) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001705 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001706 rc = -EINVAL;
1707 goto fail0;
1708 }
1709
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001710 return 0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001711
1712fail0:
1713 kfree(mci->ctl_name);
1714 edac_mc_free(mci);
1715 sbridge_dev->mci = NULL;
1716 return rc;
1717}
1718
1719/*
1720 * sbridge_probe Probe for ONE instance of device to see if it is
1721 * present.
1722 * return:
1723 * 0 for FOUND a device
1724 * < 0 for error code
1725 */
1726
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001727static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001728{
1729 int rc;
1730 u8 mc, num_mc = 0;
1731 struct sbridge_dev *sbridge_dev;
1732
1733 /* get the pci devices we want to reserve for our use */
1734 mutex_lock(&sbridge_edac_lock);
1735
1736 /*
1737 * All memory controllers are allocated at the first pass.
1738 */
1739 if (unlikely(probed >= 1)) {
1740 mutex_unlock(&sbridge_edac_lock);
1741 return -ENODEV;
1742 }
1743 probed++;
1744
1745 rc = sbridge_get_all_devices(&num_mc);
1746 if (unlikely(rc < 0))
1747 goto fail0;
1748 mc = 0;
1749
1750 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001751 edac_dbg(0, "Registering MC#%d (%d of %d)\n",
1752 mc, mc + 1, num_mc);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001753 sbridge_dev->mc = mc++;
1754 rc = sbridge_register_mci(sbridge_dev);
1755 if (unlikely(rc < 0))
1756 goto fail1;
1757 }
1758
1759 sbridge_printk(KERN_INFO, "Driver loaded.\n");
1760
1761 mutex_unlock(&sbridge_edac_lock);
1762 return 0;
1763
1764fail1:
1765 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1766 sbridge_unregister_mci(sbridge_dev);
1767
1768 sbridge_put_all_devices();
1769fail0:
1770 mutex_unlock(&sbridge_edac_lock);
1771 return rc;
1772}
1773
1774/*
1775 * sbridge_remove destructor for one instance of device
1776 *
1777 */
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001778static void sbridge_remove(struct pci_dev *pdev)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001779{
1780 struct sbridge_dev *sbridge_dev;
1781
Joe Perches956b9ba2012-04-29 17:08:39 -03001782 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001783
1784 /*
1785 * we have a trouble here: pdev value for removal will be wrong, since
1786 * it will point to the X58 register used to detect that the machine
1787 * is a Nehalem or upper design. However, due to the way several PCI
1788 * devices are grouped together to provide MC functionality, we need
1789 * to use a different method for releasing the devices
1790 */
1791
1792 mutex_lock(&sbridge_edac_lock);
1793
1794 if (unlikely(!probed)) {
1795 mutex_unlock(&sbridge_edac_lock);
1796 return;
1797 }
1798
1799 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1800 sbridge_unregister_mci(sbridge_dev);
1801
1802 /* Release PCI resources */
1803 sbridge_put_all_devices();
1804
1805 probed--;
1806
1807 mutex_unlock(&sbridge_edac_lock);
1808}
1809
1810MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
1811
1812/*
1813 * sbridge_driver pci_driver structure for this module
1814 *
1815 */
1816static struct pci_driver sbridge_driver = {
1817 .name = "sbridge_edac",
1818 .probe = sbridge_probe,
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001819 .remove = sbridge_remove,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001820 .id_table = sbridge_pci_tbl,
1821};
1822
1823/*
1824 * sbridge_init Module entry function
1825 * Try to initialize this module for its devices
1826 */
1827static int __init sbridge_init(void)
1828{
1829 int pci_rc;
1830
Joe Perches956b9ba2012-04-29 17:08:39 -03001831 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001832
1833 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1834 opstate_init();
1835
1836 pci_rc = pci_register_driver(&sbridge_driver);
1837
Chen Gonge35fca42012-05-08 20:40:12 -03001838 if (pci_rc >= 0) {
1839 mce_register_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001840 return 0;
Chen Gonge35fca42012-05-08 20:40:12 -03001841 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001842
1843 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
1844 pci_rc);
1845
1846 return pci_rc;
1847}
1848
1849/*
1850 * sbridge_exit() Module exit function
1851 * Unregister the driver
1852 */
1853static void __exit sbridge_exit(void)
1854{
Joe Perches956b9ba2012-04-29 17:08:39 -03001855 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001856 pci_unregister_driver(&sbridge_driver);
Chen Gonge35fca42012-05-08 20:40:12 -03001857 mce_unregister_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001858}
1859
1860module_init(sbridge_init);
1861module_exit(sbridge_exit);
1862
1863module_param(edac_op_state, int, 0444);
1864MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1865
1866MODULE_LICENSE("GPL");
1867MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1868MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1869MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
1870 SBRIDGE_REVISION);