blob: 22ac29e4733fe003fad463e5531e63d041dd36d6 [file] [log] [blame]
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -03001/*
2 * GHES/EDAC Linux driver
3 *
4 * This file may be distributed under the terms of the GNU General Public
5 * License version 2.
6 *
7 * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 */
11
12#include <acpi/ghes.h>
13#include <linux/edac.h>
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -030014#include <linux/dmi.h>
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -030015#include "edac_core.h"
16
17#define GHES_PFX "ghes_edac: "
18#define GHES_EDAC_REVISION " Ver: 1.0.0"
19
20struct ghes_edac_pvt {
21 struct list_head list;
22 struct ghes *ghes;
23 struct mem_ctl_info *mci;
24};
25
26static LIST_HEAD(ghes_reglist);
27static DEFINE_MUTEX(ghes_edac_lock);
28static int ghes_edac_mc_num;
29
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -030030/* Memory Device - Type 17 of SMBIOS spec */
31struct memdev_dmi_entry {
32 u8 type;
33 u8 length;
34 u16 handle;
35 u16 phys_mem_array_handle;
36 u16 mem_err_info_handle;
37 u16 total_width;
38 u16 data_width;
39 u16 size;
40 u8 form_factor;
41 u8 device_set;
42 u8 device_locator;
43 u8 bank_locator;
44 u8 memory_type;
45 u16 type_detail;
46 u16 speed;
47 u8 manufacturer;
48 u8 serial_number;
49 u8 asset_tag;
50 u8 part_number;
51 u8 attributes;
52 u32 extended_size;
53 u16 conf_mem_clk_speed;
54} __attribute__((__packed__));
55
56struct ghes_edac_dimm_fill {
57 struct mem_ctl_info *mci;
58 unsigned count;
59};
60
61char *memory_type[] = {
62 [MEM_EMPTY] = "EMPTY",
63 [MEM_RESERVED] = "RESERVED",
64 [MEM_UNKNOWN] = "UNKNOWN",
65 [MEM_FPM] = "FPM",
66 [MEM_EDO] = "EDO",
67 [MEM_BEDO] = "BEDO",
68 [MEM_SDR] = "SDR",
69 [MEM_RDR] = "RDR",
70 [MEM_DDR] = "DDR",
71 [MEM_RDDR] = "RDDR",
72 [MEM_RMBS] = "RMBS",
73 [MEM_DDR2] = "DDR2",
74 [MEM_FB_DDR2] = "FB_DDR2",
75 [MEM_RDDR2] = "RDDR2",
76 [MEM_XDR] = "XDR",
77 [MEM_DDR3] = "DDR3",
78 [MEM_RDDR3] = "RDDR3",
79};
80
81static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
82{
83 int *num_dimm = arg;
84
85 if (dh->type == DMI_ENTRY_MEM_DEVICE)
86 (*num_dimm)++;
87}
88
89static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
90{
91 struct ghes_edac_dimm_fill *dimm_fill = arg;
92 struct mem_ctl_info *mci = dimm_fill->mci;
93
94 if (dh->type == DMI_ENTRY_MEM_DEVICE) {
95 struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
96 struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
97 mci->n_layers,
98 dimm_fill->count, 0, 0);
99
100 if (entry->size == 0xffff) {
101 pr_info(GHES_PFX "Can't get dimm size\n");
102 dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
103 } else if (entry->size == 0x7fff) {
104 dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
105 } else {
106 if (entry->size & 1 << 15)
107 dimm->nr_pages = MiB_TO_PAGES((entry->size &
108 0x7fff) << 10);
109 else
110 dimm->nr_pages = MiB_TO_PAGES(entry->size);
111 }
112
113 switch (entry->memory_type) {
114 case 0x12:
115 if (entry->type_detail & 1 << 13)
116 dimm->mtype = MEM_RDDR;
117 else
118 dimm->mtype = MEM_DDR;
119 break;
120 case 0x13:
121 if (entry->type_detail & 1 << 13)
122 dimm->mtype = MEM_RDDR2;
123 else
124 dimm->mtype = MEM_DDR2;
125 break;
126 case 0x14:
127 dimm->mtype = MEM_FB_DDR2;
128 break;
129 case 0x18:
130 if (entry->type_detail & 1 << 13)
131 dimm->mtype = MEM_RDDR3;
132 else
133 dimm->mtype = MEM_DDR3;
134 break;
135 default:
136 if (entry->type_detail & 1 << 6)
137 dimm->mtype = MEM_RMBS;
138 else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
139 == ((1 << 7) | (1 << 13)))
140 dimm->mtype = MEM_RDR;
141 else if (entry->type_detail & 1 << 7)
142 dimm->mtype = MEM_SDR;
143 else if (entry->type_detail & 1 << 9)
144 dimm->mtype = MEM_EDO;
145 else
146 dimm->mtype = MEM_UNKNOWN;
147 }
148
149 /*
150 * Actually, we can only detect if the memory has bits for
151 * checksum or not
152 */
153 if (entry->total_width == entry->data_width)
154 dimm->edac_mode = EDAC_NONE;
155 else
156 dimm->edac_mode = EDAC_SECDED;
157
158 dimm->dtype = DEV_UNKNOWN;
159 dimm->grain = 128; /* Likely, worse case */
160
161 /*
162 * FIXME: It shouldn't be hard to also fill the DIMM labels
163 */
164
165 if (dimm->nr_pages) {
166 pr_info(GHES_PFX "DIMM%i: %s size = %d MB%s\n",
167 dimm_fill->count, memory_type[dimm->mtype],
168 PAGES_TO_MiB(dimm->nr_pages),
169 (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
170 pr_info(GHES_PFX "\ttype %d, detail 0x%02x, width %d(total %d)\n",
171 entry->memory_type, entry->type_detail,
172 entry->total_width, entry->data_width);
173 }
174
175 dimm_fill->count++;
176 }
177}
178
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300179void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300180 struct cper_sec_mem_err *mem_err)
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300181{
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300182 enum hw_event_mc_err_type type;
183 struct edac_raw_error_desc *e;
184 struct mem_ctl_info *mci;
185 struct ghes_edac_pvt *pvt = NULL;
186
187 list_for_each_entry(pvt, &ghes_reglist, list) {
188 if (ghes == pvt->ghes)
189 break;
190 }
191 if (!pvt) {
192 pr_err("Internal error: Can't find EDAC structure\n");
193 return;
194 }
195 mci = pvt->mci;
196 e = &mci->error_desc;
197
198 /* Cleans the error report buffer */
199 memset(e, 0, sizeof (*e));
200 e->error_count = 1;
201 e->msg = "APEI";
202 strcpy(e->label, "unknown");
203 e->other_detail = "";
204
205 if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
206 e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
207 e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
208 e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
209 }
210
211 switch (sev) {
212 case GHES_SEV_CORRECTED:
213 type = HW_EVENT_ERR_CORRECTED;
214 break;
215 case GHES_SEV_RECOVERABLE:
216 type = HW_EVENT_ERR_UNCORRECTED;
217 break;
218 case GHES_SEV_PANIC:
219 type = HW_EVENT_ERR_FATAL;
220 break;
221 default:
222 case GHES_SEV_NO:
223 type = HW_EVENT_ERR_INFO;
224 }
225
226 sprintf(e->location,
227 "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d",
228 mem_err->node, mem_err->card, mem_err->module,
229 mem_err->bank, mem_err->device, mem_err->row, mem_err->column,
230 mem_err->bit_pos);
231 edac_dbg(3, "error at location %s\n", e->location);
232
233 edac_raw_mc_handle_error(type, mci, e);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300234}
235EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
236
237int ghes_edac_register(struct ghes *ghes, struct device *dev)
238{
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300239 bool fake = false;
240 int rc, num_dimm = 0;
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300241 struct mem_ctl_info *mci;
242 struct edac_mc_layer layers[1];
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300243 struct ghes_edac_pvt *pvt;
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300244 struct ghes_edac_dimm_fill dimm_fill;
245
246 /* Get the number of DIMMs */
247 dmi_walk(ghes_edac_count_dimms, &num_dimm);
248
249 /* Check if we've got a bogus BIOS */
250 if (num_dimm == 0) {
251 fake = true;
252 num_dimm = 1;
253 }
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300254
255 layers[0].type = EDAC_MC_LAYER_ALL_MEM;
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300256 layers[0].size = num_dimm;
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300257 layers[0].is_virt_csrow = true;
258
259 /*
260 * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
261 * to avoid duplicated memory controller numbers
262 */
263 mutex_lock(&ghes_edac_lock);
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300264 pr_info("ghes_edac#%d: allocating space for %d dimms\n",
265 ghes_edac_mc_num, num_dimm);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300266 mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
267 sizeof(*pvt));
268 if (!mci) {
269 pr_info(GHES_PFX "Can't allocate memory for EDAC data\n");
270 mutex_unlock(&ghes_edac_lock);
271 return -ENOMEM;
272 }
273
274 pvt = mci->pvt_info;
275 memset(pvt, 0, sizeof(*pvt));
Mauro Carvalho Chehabf04c62a2013-02-15 06:36:27 -0300276 list_add_tail(&pvt->list, &ghes_reglist);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300277 pvt->ghes = ghes;
278 pvt->mci = mci;
279 mci->pdev = dev;
280
281 mci->mtype_cap = MEM_FLAG_EMPTY;
282 mci->edac_ctl_cap = EDAC_FLAG_NONE;
283 mci->edac_cap = EDAC_FLAG_NONE;
284 mci->mod_name = "ghes_edac.c";
285 mci->mod_ver = GHES_EDAC_REVISION;
286 mci->ctl_name = "ghes_edac";
287 mci->dev_name = "ghes";
288
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300289 if (!fake) {
290 /* Fill DIMM info from DMI */
291 dimm_fill.count = 0;
292 dimm_fill.mci = mci;
293 dmi_walk(ghes_edac_dmidecode, &dimm_fill);
294 } else {
295 struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
296 mci->n_layers, 0, 0, 0);
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300297
Mauro Carvalho Chehab32fa1f52013-02-14 09:11:08 -0300298 pr_info(GHES_PFX "Crappy BIOS detected. Faking DIMM EDAC data\n");
299 dimm->nr_pages = 1000;
300 dimm->grain = 128;
301 dimm->mtype = MEM_UNKNOWN;
302 dimm->dtype = DEV_UNKNOWN;
303 dimm->edac_mode = EDAC_SECDED;
304 }
Mauro Carvalho Chehab77c5f5d2013-02-15 06:11:57 -0300305
306 rc = edac_mc_add_mc(mci);
307 if (rc < 0) {
308 pr_info(GHES_PFX "Can't register at EDAC core\n");
309 edac_mc_free(mci);
310 mutex_unlock(&ghes_edac_lock);
311 return -ENODEV;
312 }
313
314 ghes_edac_mc_num++;
315 mutex_unlock(&ghes_edac_lock);
316 return 0;
317}
318EXPORT_SYMBOL_GPL(ghes_edac_register);
319
320void ghes_edac_unregister(struct ghes *ghes)
321{
322 struct mem_ctl_info *mci;
323 struct ghes_edac_pvt *pvt;
324
325 list_for_each_entry(pvt, &ghes_reglist, list) {
326 if (ghes == pvt->ghes) {
327 mci = pvt->mci;
328 edac_mc_del_mc(mci->pdev);
329 edac_mc_free(mci);
330 list_del(&pvt->list);
331 }
332 }
333}
334EXPORT_SYMBOL_GPL(ghes_edac_unregister);