blob: b4acc4f2074d37ab10af514c01b21106ad0b7e82 [file] [log] [blame]
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -03001/*
2 * GHES/EDAC Linux driver
3 *
4 * This file may be distributed under the terms of the GNU General Public
5 * License version 2.
6 *
7 * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 */
11
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -030012#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -030014#include <acpi/ghes.h>
15#include <linux/edac.h>
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -030016#include <linux/dmi.h>
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -030017#include "edac_core.h"
18
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -030019#define GHES_EDAC_REVISION " Ver: 1.0.0"
20
21struct ghes_edac_pvt {
22 struct list_head list;
23 struct ghes *ghes;
24 struct mem_ctl_info *mci;
25};
26
27static LIST_HEAD(ghes_reglist);
28static DEFINE_MUTEX(ghes_edac_lock);
29static int ghes_edac_mc_num;
30
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -030031
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -030032/* Memory Device - Type 17 of SMBIOS spec */
33struct memdev_dmi_entry {
34 u8 type;
35 u8 length;
36 u16 handle;
37 u16 phys_mem_array_handle;
38 u16 mem_err_info_handle;
39 u16 total_width;
40 u16 data_width;
41 u16 size;
42 u8 form_factor;
43 u8 device_set;
44 u8 device_locator;
45 u8 bank_locator;
46 u8 memory_type;
47 u16 type_detail;
48 u16 speed;
49 u8 manufacturer;
50 u8 serial_number;
51 u8 asset_tag;
52 u8 part_number;
53 u8 attributes;
54 u32 extended_size;
55 u16 conf_mem_clk_speed;
56} __attribute__((__packed__));
57
58struct ghes_edac_dimm_fill {
59 struct mem_ctl_info *mci;
60 unsigned count;
61};
62
63char *memory_type[] = {
64 [MEM_EMPTY] = "EMPTY",
65 [MEM_RESERVED] = "RESERVED",
66 [MEM_UNKNOWN] = "UNKNOWN",
67 [MEM_FPM] = "FPM",
68 [MEM_EDO] = "EDO",
69 [MEM_BEDO] = "BEDO",
70 [MEM_SDR] = "SDR",
71 [MEM_RDR] = "RDR",
72 [MEM_DDR] = "DDR",
73 [MEM_RDDR] = "RDDR",
74 [MEM_RMBS] = "RMBS",
75 [MEM_DDR2] = "DDR2",
76 [MEM_FB_DDR2] = "FB_DDR2",
77 [MEM_RDDR2] = "RDDR2",
78 [MEM_XDR] = "XDR",
79 [MEM_DDR3] = "DDR3",
80 [MEM_RDDR3] = "RDDR3",
81};
82
83static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
84{
85 int *num_dimm = arg;
86
87 if (dh->type == DMI_ENTRY_MEM_DEVICE)
88 (*num_dimm)++;
89}
90
91static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
92{
93 struct ghes_edac_dimm_fill *dimm_fill = arg;
94 struct mem_ctl_info *mci = dimm_fill->mci;
95
96 if (dh->type == DMI_ENTRY_MEM_DEVICE) {
97 struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
98 struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
99 mci->n_layers,
100 dimm_fill->count, 0, 0);
101
102 if (entry->size == 0xffff) {
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300103 pr_info("Can't get DIMM%i size\n",
104 dimm_fill->count);
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300105 dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
106 } else if (entry->size == 0x7fff) {
107 dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
108 } else {
109 if (entry->size & 1 << 15)
110 dimm->nr_pages = MiB_TO_PAGES((entry->size &
111 0x7fff) << 10);
112 else
113 dimm->nr_pages = MiB_TO_PAGES(entry->size);
114 }
115
116 switch (entry->memory_type) {
117 case 0x12:
118 if (entry->type_detail & 1 << 13)
119 dimm->mtype = MEM_RDDR;
120 else
121 dimm->mtype = MEM_DDR;
122 break;
123 case 0x13:
124 if (entry->type_detail & 1 << 13)
125 dimm->mtype = MEM_RDDR2;
126 else
127 dimm->mtype = MEM_DDR2;
128 break;
129 case 0x14:
130 dimm->mtype = MEM_FB_DDR2;
131 break;
132 case 0x18:
133 if (entry->type_detail & 1 << 13)
134 dimm->mtype = MEM_RDDR3;
135 else
136 dimm->mtype = MEM_DDR3;
137 break;
138 default:
139 if (entry->type_detail & 1 << 6)
140 dimm->mtype = MEM_RMBS;
141 else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
142 == ((1 << 7) | (1 << 13)))
143 dimm->mtype = MEM_RDR;
144 else if (entry->type_detail & 1 << 7)
145 dimm->mtype = MEM_SDR;
146 else if (entry->type_detail & 1 << 9)
147 dimm->mtype = MEM_EDO;
148 else
149 dimm->mtype = MEM_UNKNOWN;
150 }
151
152 /*
153 * Actually, we can only detect if the memory has bits for
154 * checksum or not
155 */
156 if (entry->total_width == entry->data_width)
157 dimm->edac_mode = EDAC_NONE;
158 else
159 dimm->edac_mode = EDAC_SECDED;
160
161 dimm->dtype = DEV_UNKNOWN;
162 dimm->grain = 128; /* Likely, worse case */
163
164 /*
165 * FIXME: It shouldn't be hard to also fill the DIMM labels
166 */
167
168 if (dimm->nr_pages) {
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300169 edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300170 dimm_fill->count, memory_type[dimm->mtype],
171 PAGES_TO_MiB(dimm->nr_pages),
172 (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300173 edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300174 entry->memory_type, entry->type_detail,
175 entry->total_width, entry->data_width);
176 }
177
178 dimm_fill->count++;
179 }
180}
181
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300182void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300183 struct cper_sec_mem_err *mem_err)
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300184{
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300185 enum hw_event_mc_err_type type;
186 struct edac_raw_error_desc *e;
187 struct mem_ctl_info *mci;
188 struct ghes_edac_pvt *pvt = NULL;
189
190 list_for_each_entry(pvt, &ghes_reglist, list) {
191 if (ghes == pvt->ghes)
192 break;
193 }
194 if (!pvt) {
195 pr_err("Internal error: Can't find EDAC structure\n");
196 return;
197 }
198 mci = pvt->mci;
199 e = &mci->error_desc;
200
201 /* Cleans the error report buffer */
202 memset(e, 0, sizeof (*e));
203 e->error_count = 1;
204 e->msg = "APEI";
205 strcpy(e->label, "unknown");
206 e->other_detail = "";
207
208 if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
209 e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
210 e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
211 e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
212 }
213
214 switch (sev) {
215 case GHES_SEV_CORRECTED:
216 type = HW_EVENT_ERR_CORRECTED;
217 break;
218 case GHES_SEV_RECOVERABLE:
219 type = HW_EVENT_ERR_UNCORRECTED;
220 break;
221 case GHES_SEV_PANIC:
222 type = HW_EVENT_ERR_FATAL;
223 break;
224 default:
225 case GHES_SEV_NO:
226 type = HW_EVENT_ERR_INFO;
227 }
228
229 sprintf(e->location,
230 "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d",
231 mem_err->node, mem_err->card, mem_err->module,
232 mem_err->bank, mem_err->device, mem_err->row, mem_err->column,
233 mem_err->bit_pos);
234 edac_dbg(3, "error at location %s\n", e->location);
235
236 edac_raw_mc_handle_error(type, mci, e);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300237}
238EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
239
240int ghes_edac_register(struct ghes *ghes, struct device *dev)
241{
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300242 bool fake = false;
243 int rc, num_dimm = 0;
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300244 struct mem_ctl_info *mci;
245 struct edac_mc_layer layers[1];
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300246 struct ghes_edac_pvt *pvt;
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300247 struct ghes_edac_dimm_fill dimm_fill;
248
249 /* Get the number of DIMMs */
250 dmi_walk(ghes_edac_count_dimms, &num_dimm);
251
252 /* Check if we've got a bogus BIOS */
253 if (num_dimm == 0) {
254 fake = true;
255 num_dimm = 1;
256 }
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300257
258 layers[0].type = EDAC_MC_LAYER_ALL_MEM;
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300259 layers[0].size = num_dimm;
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300260 layers[0].is_virt_csrow = true;
261
262 /*
263 * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
264 * to avoid duplicated memory controller numbers
265 */
266 mutex_lock(&ghes_edac_lock);
267 mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
268 sizeof(*pvt));
269 if (!mci) {
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300270 pr_info("Can't allocate memory for EDAC data\n");
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300271 mutex_unlock(&ghes_edac_lock);
272 return -ENOMEM;
273 }
274
275 pvt = mci->pvt_info;
276 memset(pvt, 0, sizeof(*pvt));
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300277 list_add_tail(&pvt->list, &ghes_reglist);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300278 pvt->ghes = ghes;
279 pvt->mci = mci;
280 mci->pdev = dev;
281
282 mci->mtype_cap = MEM_FLAG_EMPTY;
283 mci->edac_ctl_cap = EDAC_FLAG_NONE;
284 mci->edac_cap = EDAC_FLAG_NONE;
285 mci->mod_name = "ghes_edac.c";
286 mci->mod_ver = GHES_EDAC_REVISION;
287 mci->ctl_name = "ghes_edac";
288 mci->dev_name = "ghes";
289
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300290 if (!ghes_edac_mc_num) {
291 if (!fake) {
292 pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
293 pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
294 pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
295 pr_info("If you find incorrect reports, please contact your hardware vendor\n");
296 pr_info("to correct its BIOS.\n");
297 pr_info("This system has %d DIMM sockets.\n",
298 num_dimm);
299 } else {
300 pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
301 pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
302 pr_info("work on such system. Use this driver with caution\n");
303 }
304 }
305
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300306 if (!fake) {
Mauro Carvalho Chehab5ee726d2013-02-15 08:45:00 -0300307 /*
308 * Fill DIMM info from DMI for the memory controller #0
309 *
310 * Keep it in blank for the other memory controllers, as
311 * there's no reliable way to properly credit each DIMM to
312 * the memory controller, as different BIOSes fill the
313 * DMI bank location fields on different ways
314 */
315 if (!ghes_edac_mc_num) {
316 dimm_fill.count = 0;
317 dimm_fill.mci = mci;
318 dmi_walk(ghes_edac_dmidecode, &dimm_fill);
319 }
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300320 } else {
321 struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
322 mci->n_layers, 0, 0, 0);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300323
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300324 dimm->nr_pages = 1;
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300325 dimm->grain = 128;
326 dimm->mtype = MEM_UNKNOWN;
327 dimm->dtype = DEV_UNKNOWN;
328 dimm->edac_mode = EDAC_SECDED;
329 }
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300330
331 rc = edac_mc_add_mc(mci);
332 if (rc < 0) {
Mauro Carvalho Chehabd2a68562013-02-15 09:06:38 -0300333 pr_info("Can't register at EDAC core\n");
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300334 edac_mc_free(mci);
335 mutex_unlock(&ghes_edac_lock);
336 return -ENODEV;
337 }
338
339 ghes_edac_mc_num++;
340 mutex_unlock(&ghes_edac_lock);
341 return 0;
342}
343EXPORT_SYMBOL_GPL(ghes_edac_register);
344
345void ghes_edac_unregister(struct ghes *ghes)
346{
347 struct mem_ctl_info *mci;
348 struct ghes_edac_pvt *pvt;
349
350 list_for_each_entry(pvt, &ghes_reglist, list) {
351 if (ghes == pvt->ghes) {
352 mci = pvt->mci;
353 edac_mc_del_mc(mci->pdev);
354 edac_mc_free(mci);
355 list_del(&pvt->list);
356 }
357 }
358}
359EXPORT_SYMBOL_GPL(ghes_edac_unregister);