blob: db2ba31ba2b10cd2202d769beee8201b3fc4fe4a [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030030#include <linux/bitops.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080031#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070034#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070035#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080036
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030037#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
Alan Coxda9bb1d2006-01-18 17:44:13 -080041/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070042static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070043static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080044
Alan Coxda9bb1d2006-01-18 17:44:13 -080045#ifdef CONFIG_EDAC_DEBUG
46
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030047static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080048{
49 debugf4("\tchannel = %p\n", chan);
50 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -080051 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030052 debugf4("\tchannel->dimm = %p\n", chan->dimm);
53}
54
55static void edac_mc_dump_dimm(struct dimm_info *dimm)
56{
57 int i;
58
59 debugf4("\tdimm = %p\n", dimm);
60 debugf4("\tdimm->label = '%s'\n", dimm->label);
61 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
62 debugf4("\tdimm location ");
63 for (i = 0; i < dimm->mci->n_layers; i++) {
64 printk(KERN_CONT "%d", dimm->location[i]);
65 if (i < dimm->mci->n_layers - 1)
66 printk(KERN_CONT ".");
67 }
68 printk(KERN_CONT "\n");
69 debugf4("\tdimm->grain = %d\n", dimm->grain);
70 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080071}
72
Adrian Bunk2da1c112007-07-19 01:49:32 -070073static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080074{
75 debugf4("\tcsrow = %p\n", csrow);
76 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
Douglas Thompson079708b2007-07-19 01:49:58 -070077 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -080078 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
79 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
Douglas Thompson079708b2007-07-19 01:49:58 -070080 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
Alan Coxda9bb1d2006-01-18 17:44:13 -080081 debugf4("\tcsrow->channels = %p\n", csrow->channels);
82 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
83}
84
Adrian Bunk2da1c112007-07-19 01:49:32 -070085static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -080086{
87 debugf3("\tmci = %p\n", mci);
88 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
89 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
90 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
91 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
92 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
93 mci->nr_csrows, mci->csrows);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030094 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
95 mci->tot_dimms, mci->dimms);
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -030096 debugf3("\tdev = %p\n", mci->pdev);
Douglas Thompson079708b2007-07-19 01:49:58 -070097 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
Alan Coxda9bb1d2006-01-18 17:44:13 -080098 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
99}
100
Borislav Petkov24f9a7f2010-10-07 18:29:15 +0200101#endif /* CONFIG_EDAC_DEBUG */
102
Borislav Petkov239642f2009-11-12 15:33:16 +0100103/*
104 * keep those in sync with the enum mem_type
105 */
106const char *edac_mem_types[] = {
107 "Empty csrow",
108 "Reserved csrow type",
109 "Unknown csrow type",
110 "Fast page mode RAM",
111 "Extended data out RAM",
112 "Burst Extended data out RAM",
113 "Single data rate SDRAM",
114 "Registered single data rate SDRAM",
115 "Double data rate SDRAM",
116 "Registered Double data rate SDRAM",
117 "Rambus DRAM",
118 "Unbuffered DDR2 RAM",
119 "Fully buffered DDR2",
120 "Registered DDR2 RAM",
121 "Rambus XDR",
122 "Unbuffered DDR3 RAM",
123 "Registered DDR3 RAM",
124};
125EXPORT_SYMBOL_GPL(edac_mem_types);
126
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300127/**
128 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
129 * @p: pointer to a pointer with the memory offset to be used. At
130 * return, this will be incremented to point to the next offset
131 * @size: Size of the data structure to be reserved
132 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800133 *
134 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300135 * down to either a no-op or the addition of a constant to the value of '*p'.
136 *
137 * The 'p' pointer is absolutely needed to keep the proper advancing
138 * further in memory to the proper offsets when allocating the struct along
139 * with its embedded structs, as edac_device_alloc_ctl_info() does it
140 * above, for example.
141 *
142 * At return, the pointer 'p' will be incremented to be used on a next call
143 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800144 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300145void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800146{
147 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300148 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800149
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300150 *p += size * n_elems;
151
152 /*
153 * 'p' can possibly be an unaligned item X such that sizeof(X) is
154 * 'size'. Adjust 'p' so that its alignment is at least as
155 * stringent as what the compiler would provide for X and return
156 * the aligned result.
157 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800158 * stringent alignment that the compiler will ever provide by default.
159 * As far as I know, this is a reasonable assumption.
160 */
161 if (size > sizeof(long))
162 align = sizeof(long long);
163 else if (size > sizeof(int))
164 align = sizeof(long);
165 else if (size > sizeof(short))
166 align = sizeof(int);
167 else if (size > sizeof(char))
168 align = sizeof(short);
169 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700170 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800171
172 r = size % align;
173
174 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700175 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800176
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300177 *p += align - r;
178
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700179 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800180}
181
Alan Coxda9bb1d2006-01-18 17:44:13 -0800182/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300183 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
184 * @mc_num: Memory controller number
185 * @n_layers: Number of MC hierarchy layers
186 * layers: Describes each layer as seen by the Memory Controller
187 * @size_pvt: size of private storage needed
188 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800189 *
190 * Everything is kmalloc'ed as one big chunk - more efficient.
191 * Only can be used if all structures have the same lifetime - otherwise
192 * you have to allocate and initialize your own structures.
193 *
194 * Use edac_mc_free() to free mc structures allocated by this function.
195 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300196 * NOTE: drivers handle multi-rank memories in different ways: in some
197 * drivers, one multi-rank memory stick is mapped as one entry, while, in
198 * others, a single multi-rank memory stick would be mapped into several
199 * entries. Currently, this function will allocate multiple struct dimm_info
200 * on such scenarios, as grouping the multiple ranks require drivers change.
201 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800202 * Returns:
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300203 * On failure: NULL
204 * On success: struct mem_ctl_info pointer
Alan Coxda9bb1d2006-01-18 17:44:13 -0800205 */
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300206struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
207 unsigned n_layers,
208 struct edac_mc_layer *layers,
209 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800210{
211 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300212 struct edac_mc_layer *layer;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300213 struct csrow_info *csr;
214 struct rank_info *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300215 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300216 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
217 unsigned pos[EDAC_MAX_LAYERS];
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300218 unsigned size, tot_dimms = 1, count = 1;
219 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300220 void *pvt, *p, *ptr = NULL;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300221 int i, j, row, chn, n, len, off;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300222 bool per_rank = false;
223
224 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
225 /*
226 * Calculate the total amount of dimms and csrows/cschannels while
227 * in the old API emulation mode
228 */
229 for (i = 0; i < n_layers; i++) {
230 tot_dimms *= layers[i].size;
231 if (layers[i].is_virt_csrow)
232 tot_csrows *= layers[i].size;
233 else
234 tot_channels *= layers[i].size;
235
236 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
237 per_rank = true;
238 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800239
240 /* Figure out the offsets of the various items from the start of an mc
241 * structure. We want the alignment of each item to be at least as
242 * stringent as what the compiler would provide if we could simply
243 * hardcode everything into a single struct.
244 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300245 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300246 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300247 for (i = 0; i < n_layers; i++) {
248 count *= layers[i].size;
249 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
250 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
251 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
252 tot_errcount += 2 * count;
253 }
254
255 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300256 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700257 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800258
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300259 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
260 __func__, size,
261 tot_dimms,
262 per_rank ? "ranks" : "dimms",
263 tot_csrows * tot_channels);
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300264
Doug Thompson8096cfa2007-07-19 01:50:27 -0700265 mci = kzalloc(size, GFP_KERNEL);
266 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800267 return NULL;
268
269 /* Adjust pointers so they point within the memory we just allocated
270 * rather than an imaginary chunk of memory located at address 0.
271 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300272 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300273 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800278
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700279 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300280 mci->mc_idx = mc_num;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300281 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800282 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300283 mci->n_layers = n_layers;
284 mci->layers = layer;
285 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
286 mci->nr_csrows = tot_csrows;
287 mci->num_cschannel = tot_channels;
288 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800289
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300290 /*
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300291 * Alocate and fill the csrow/channels structs
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300292 */
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300293 mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
294 if (!mci->csrows)
295 goto error;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300296 for (row = 0; row < tot_csrows; row++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300297 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
298 if (!csr)
299 goto error;
300 mci->csrows[row] = csr;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300301 csr->csrow_idx = row;
302 csr->mci = mci;
303 csr->nr_channels = tot_channels;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300304 csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
305 GFP_KERNEL);
306 if (!csr->channels)
307 goto error;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300308
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300309 for (chn = 0; chn < tot_channels; chn++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300310 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
311 if (!chan)
312 goto error;
313 csr->channels[chn] = chan;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800314 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300315 chan->csrow = csr;
316 }
317 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300318
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300319 /*
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300320 * Allocate and fill the dimm structs
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300321 */
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300322 mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
323 if (!mci->dimms)
324 goto error;
325
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300326 memset(&pos, 0, sizeof(pos));
327 row = 0;
328 chn = 0;
329 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
330 per_rank ? "ranks" : "dimms");
331 for (i = 0; i < tot_dimms; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300332 chan = mci->csrows[row]->channels[chn];
333 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
334 if (off < 0 || off >= tot_dimms) {
335 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
336 goto error;
337 }
338
339 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
340 mci->dimms[off] = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300341 dimm->mci = mci;
342
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300343 debugf2("%s: %d: %s%i (%d:%d:%d): row %d, chan %d\n", __func__,
344 i, per_rank ? "rank" : "dimm", off,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300345 pos[0], pos[1], pos[2], row, chn);
346
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300347 /*
348 * Copy DIMM location and initialize it.
349 */
350 len = sizeof(dimm->label);
351 p = dimm->label;
352 n = snprintf(p, len, "mc#%u", mc_num);
353 p += n;
354 len -= n;
355 for (j = 0; j < n_layers; j++) {
356 n = snprintf(p, len, "%s#%u",
357 edac_layer_name[layers[j].type],
358 pos[j]);
359 p += n;
360 len -= n;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300361 dimm->location[j] = pos[j];
362
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300363 if (len <= 0)
364 break;
365 }
366
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300367 /* Link it to the csrows old API data */
368 chan->dimm = dimm;
369 dimm->csrow = row;
370 dimm->cschannel = chn;
371
372 /* Increment csrow location */
373 row++;
374 if (row == tot_csrows) {
375 row = 0;
376 chn++;
377 }
378
379 /* Increment dimm location */
380 for (j = n_layers - 1; j >= 0; j--) {
381 pos[j]++;
382 if (pos[j] < layers[j].size)
383 break;
384 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800385 }
386 }
387
Dave Jiang81d87cb2007-07-19 01:49:52 -0700388 mci->op_state = OP_ALLOC;
389
Doug Thompson8096cfa2007-07-19 01:50:27 -0700390 /* at this point, the root kobj is valid, and in order to
391 * 'free' the object, then the function:
392 * edac_mc_unregister_sysfs_main_kobj() must be called
393 * which will perform kobj unregistration and the actual free
394 * will occur during the kobject callback operation
395 */
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300396
Alan Coxda9bb1d2006-01-18 17:44:13 -0800397 return mci;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300398
399error:
400 if (mci->dimms) {
401 for (i = 0; i < tot_dimms; i++)
402 kfree(mci->dimms[i]);
403 kfree(mci->dimms);
404 }
405 if (mci->csrows) {
406 for (chn = 0; chn < tot_channels; chn++) {
407 csr = mci->csrows[chn];
408 if (csr) {
409 for (chn = 0; chn < tot_channels; chn++)
410 kfree(csr->channels[chn]);
411 kfree(csr);
412 }
413 kfree(mci->csrows[i]);
414 }
415 kfree(mci->csrows);
416 }
417 kfree(mci);
418
419 return NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800420}
Dave Peterson91105402006-03-26 01:38:55 -0800421EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800422
Alan Coxda9bb1d2006-01-18 17:44:13 -0800423/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700424 * edac_mc_free
425 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800426 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800427 */
428void edac_mc_free(struct mem_ctl_info *mci)
429{
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300430 debugf1("%s()\n", __func__);
431
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300432 /* the mci instance is freed here, when the sysfs object is dropped */
Mauro Carvalho Chehab7a623c02012-04-16 16:41:11 -0300433 edac_unregister_sysfs(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800434}
Dave Peterson91105402006-03-26 01:38:55 -0800435EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800436
Doug Thompsonbce19682007-07-26 10:41:14 -0700437
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300438/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700439 * find_mci_by_dev
440 *
441 * scan list of controllers looking for the one that manages
442 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300443 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700444 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300445struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800446{
447 struct mem_ctl_info *mci;
448 struct list_head *item;
449
Dave Peterson537fba22006-03-26 01:38:40 -0800450 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800451
452 list_for_each(item, &mc_devices) {
453 mci = list_entry(item, struct mem_ctl_info, link);
454
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300455 if (mci->pdev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800456 return mci;
457 }
458
459 return NULL;
460}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300461EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800462
Dave Jiang81d87cb2007-07-19 01:49:52 -0700463/*
464 * handler for EDAC to check if NMI type handler has asserted interrupt
465 */
466static int edac_mc_assert_error_check_and_clear(void)
467{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700468 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700469
Douglas Thompson079708b2007-07-19 01:49:58 -0700470 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700471 return 1;
472
Dave Jiang66ee2f92007-07-19 01:49:54 -0700473 old_state = edac_err_assert;
474 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700475
Dave Jiang66ee2f92007-07-19 01:49:54 -0700476 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700477}
478
479/*
480 * edac_mc_workq_function
481 * performs the operation scheduled by a workq request
482 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700483static void edac_mc_workq_function(struct work_struct *work_req)
484{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700485 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700486 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700487
488 mutex_lock(&mem_ctls_mutex);
489
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700490 /* if this control struct has movd to offline state, we are done */
491 if (mci->op_state == OP_OFFLINE) {
492 mutex_unlock(&mem_ctls_mutex);
493 return;
494 }
495
Dave Jiang81d87cb2007-07-19 01:49:52 -0700496 /* Only poll controllers that are running polled and have a check */
497 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
498 mci->edac_check(mci);
499
Dave Jiang81d87cb2007-07-19 01:49:52 -0700500 mutex_unlock(&mem_ctls_mutex);
501
502 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700503 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700504 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700505}
506
507/*
508 * edac_mc_workq_setup
509 * initialize a workq item for this mci
510 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700511 *
512 * locking model:
513 *
514 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700515 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700516static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700517{
518 debugf0("%s()\n", __func__);
519
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700520 /* if this instance is not in the POLL state, then simply return */
521 if (mci->op_state != OP_RUNNING_POLL)
522 return;
523
Dave Jiang81d87cb2007-07-19 01:49:52 -0700524 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700525 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
526}
527
528/*
529 * edac_mc_workq_teardown
530 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700531 *
532 * locking model:
533 *
534 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700535 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700536static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700537{
538 int status;
539
Borislav Petkov00740c52010-09-26 12:42:23 +0200540 if (mci->op_state != OP_RUNNING_POLL)
541 return;
542
Doug Thompsonbce19682007-07-26 10:41:14 -0700543 status = cancel_delayed_work(&mci->work);
544 if (status == 0) {
545 debugf0("%s() not canceled, flush the queue\n",
546 __func__);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700547
Doug Thompsonbce19682007-07-26 10:41:14 -0700548 /* workq instance might be running, wait for it */
549 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700550 }
551}
552
553/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700554 * edac_mc_reset_delay_period(unsigned long value)
555 *
556 * user space has updated our poll period value, need to
557 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700558 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700559void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700560{
Doug Thompsonbce19682007-07-26 10:41:14 -0700561 struct mem_ctl_info *mci;
562 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700563
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700564 mutex_lock(&mem_ctls_mutex);
565
Doug Thompsonbce19682007-07-26 10:41:14 -0700566 /* scan the list and turn off all workq timers, doing so under lock
567 */
568 list_for_each(item, &mc_devices) {
569 mci = list_entry(item, struct mem_ctl_info, link);
570
571 if (mci->op_state == OP_RUNNING_POLL)
572 cancel_delayed_work(&mci->work);
573 }
574
575 mutex_unlock(&mem_ctls_mutex);
576
577
578 /* re-walk the list, and reset the poll delay */
579 mutex_lock(&mem_ctls_mutex);
580
581 list_for_each(item, &mc_devices) {
582 mci = list_entry(item, struct mem_ctl_info, link);
583
584 edac_mc_workq_setup(mci, (unsigned long) value);
585 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700586
587 mutex_unlock(&mem_ctls_mutex);
588}
589
Doug Thompsonbce19682007-07-26 10:41:14 -0700590
591
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700592/* Return 0 on success, 1 on failure.
593 * Before calling this function, caller must
594 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700595 *
596 * locking model:
597 *
598 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700599 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700600static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800601{
602 struct list_head *item, *insert_before;
603 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800604
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700605 insert_before = &mc_devices;
606
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300607 p = find_mci_by_dev(mci->pdev);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700608 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700609 goto fail0;
610
611 list_for_each(item, &mc_devices) {
612 p = list_entry(item, struct mem_ctl_info, link);
613
614 if (p->mc_idx >= mci->mc_idx) {
615 if (unlikely(p->mc_idx == mci->mc_idx))
616 goto fail1;
617
618 insert_before = item;
619 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800620 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800621 }
622
623 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700624 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800625 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700626
Douglas Thompson052dfb42007-07-19 01:50:13 -0700627fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700628 edac_printk(KERN_WARNING, EDAC_MC,
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300629 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000630 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700631 return 1;
632
Douglas Thompson052dfb42007-07-19 01:50:13 -0700633fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700634 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700635 "bug in low-level driver: attempt to assign\n"
636 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700637 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800638}
639
Dave Petersone7ecd892006-03-26 01:38:52 -0800640static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800641{
Dave Jiangc0d12172007-07-19 01:49:46 -0700642 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800643 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700644
645 /* these are for safe removal of devices from global list while
646 * NMI handlers may be traversing list
647 */
648 synchronize_rcu();
649 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800650}
651
Alan Coxda9bb1d2006-01-18 17:44:13 -0800652/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700653 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
654 *
655 * If found, return a pointer to the structure.
656 * Else return NULL.
657 *
658 * Caller must hold mem_ctls_mutex.
659 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700660struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700661{
662 struct list_head *item;
663 struct mem_ctl_info *mci;
664
665 list_for_each(item, &mc_devices) {
666 mci = list_entry(item, struct mem_ctl_info, link);
667
668 if (mci->mc_idx >= idx) {
669 if (mci->mc_idx == idx)
670 return mci;
671
672 break;
673 }
674 }
675
676 return NULL;
677}
678EXPORT_SYMBOL(edac_mc_find);
679
680/**
Dave Peterson472678e2006-03-26 01:38:49 -0800681 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
682 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800683 * @mci: pointer to the mci structure to be added to the list
684 *
685 * Return:
686 * 0 Success
687 * !0 Failure
688 */
689
690/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700691int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800692{
Dave Peterson537fba22006-03-26 01:38:40 -0800693 debugf0("%s()\n", __func__);
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700694
Alan Coxda9bb1d2006-01-18 17:44:13 -0800695#ifdef CONFIG_EDAC_DEBUG
696 if (edac_debug_level >= 3)
697 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800698
Alan Coxda9bb1d2006-01-18 17:44:13 -0800699 if (edac_debug_level >= 4) {
700 int i;
701
702 for (i = 0; i < mci->nr_csrows; i++) {
703 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800704
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300705 edac_mc_dump_csrow(mci->csrows[i]);
706 for (j = 0; j < mci->csrows[i]->nr_channels; j++)
707 edac_mc_dump_channel(mci->csrows[i]->channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800708 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300709 for (i = 0; i < mci->tot_dimms; i++)
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300710 edac_mc_dump_dimm(mci->dimms[i]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800711 }
712#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700713 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800714
715 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800716 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800717
718 /* set load time so that error rate can be tracked */
719 mci->start_time = jiffies;
720
eric wollesen9794f332007-02-12 00:53:08 -0800721 if (edac_create_sysfs_mci_device(mci)) {
722 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700723 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800724 goto fail1;
725 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800726
Dave Jiang81d87cb2007-07-19 01:49:52 -0700727 /* If there IS a check routine, then we are running POLLED */
728 if (mci->edac_check != NULL) {
729 /* This instance is NOW RUNNING */
730 mci->op_state = OP_RUNNING_POLL;
731
732 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
733 } else {
734 mci->op_state = OP_RUNNING_INTERRUPT;
735 }
736
Alan Coxda9bb1d2006-01-18 17:44:13 -0800737 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700738 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000739 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800740
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700741 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800742 return 0;
743
Douglas Thompson052dfb42007-07-19 01:50:13 -0700744fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800745 del_mc_from_global_list(mci);
746
Douglas Thompson052dfb42007-07-19 01:50:13 -0700747fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700748 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800749 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800750}
Dave Peterson91105402006-03-26 01:38:55 -0800751EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800752
Alan Coxda9bb1d2006-01-18 17:44:13 -0800753/**
Dave Peterson472678e2006-03-26 01:38:49 -0800754 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
755 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700756 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800757 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800758 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800759 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700760struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800761{
Dave Peterson18dbc332006-03-26 01:38:50 -0800762 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800763
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700764 debugf0("%s()\n", __func__);
765
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700766 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800767
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700768 /* find the requested mci struct in the global list */
769 mci = find_mci_by_dev(dev);
770 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700771 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800772 return NULL;
773 }
774
Alan Coxda9bb1d2006-01-18 17:44:13 -0800775 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700776 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700777
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100778 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700779 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100780
781 /* marking MCI offline */
782 mci->op_state = OP_OFFLINE;
783
784 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700785 edac_remove_sysfs_mci_device(mci);
786
Dave Peterson537fba22006-03-26 01:38:40 -0800787 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700788 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000789 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700790
Dave Peterson18dbc332006-03-26 01:38:50 -0800791 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800792}
Dave Peterson91105402006-03-26 01:38:55 -0800793EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800794
Adrian Bunk2da1c112007-07-19 01:49:32 -0700795static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
796 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800797{
798 struct page *pg;
799 void *virt_addr;
800 unsigned long flags = 0;
801
Dave Peterson537fba22006-03-26 01:38:40 -0800802 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800803
804 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700805 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800806 return;
807
808 /* Find the actual page structure then map it and fix */
809 pg = pfn_to_page(page);
810
811 if (PageHighMem(pg))
812 local_irq_save(flags);
813
Cong Wang4e5df7c2011-11-25 23:14:19 +0800814 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800815
816 /* Perform architecture specific atomic scrub operation */
817 atomic_scrub(virt_addr + offset, size);
818
819 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800820 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800821
822 if (PageHighMem(pg))
823 local_irq_restore(flags);
824}
825
Alan Coxda9bb1d2006-01-18 17:44:13 -0800826/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800827int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800828{
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300829 struct csrow_info **csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300830 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800831
Dave Peterson537fba22006-03-26 01:38:40 -0800832 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800833 row = -1;
834
835 for (i = 0; i < mci->nr_csrows; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300836 struct csrow_info *csrow = csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300837 n = 0;
838 for (j = 0; j < csrow->nr_channels; j++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300839 struct dimm_info *dimm = csrow->channels[j]->dimm;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300840 n += dimm->nr_pages;
841 }
842 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800843 continue;
844
Dave Peterson537fba22006-03-26 01:38:40 -0800845 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
846 "mask(0x%lx)\n", mci->mc_idx, __func__,
847 csrow->first_page, page, csrow->last_page,
848 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800849
850 if ((page >= csrow->first_page) &&
851 (page <= csrow->last_page) &&
852 ((page & csrow->page_mask) ==
853 (csrow->first_page & csrow->page_mask))) {
854 row = i;
855 break;
856 }
857 }
858
859 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800860 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700861 "could not look up page error address %lx\n",
862 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800863
864 return row;
865}
Dave Peterson91105402006-03-26 01:38:55 -0800866EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800867
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300868const char *edac_layer_name[] = {
869 [EDAC_MC_LAYER_BRANCH] = "branch",
870 [EDAC_MC_LAYER_CHANNEL] = "channel",
871 [EDAC_MC_LAYER_SLOT] = "slot",
872 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
873};
874EXPORT_SYMBOL_GPL(edac_layer_name);
875
876static void edac_inc_ce_error(struct mem_ctl_info *mci,
877 bool enable_per_layer_report,
878 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800879{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300880 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800881
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300882 mci->ce_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300883
884 if (!enable_per_layer_report) {
885 mci->ce_noinfo_count++;
886 return;
887 }
888
889 for (i = 0; i < mci->n_layers; i++) {
890 if (pos[i] < 0)
891 break;
892 index += pos[i];
893 mci->ce_per_layer[i][index]++;
894
895 if (i < mci->n_layers - 1)
896 index *= mci->layers[i + 1].size;
897 }
898}
899
900static void edac_inc_ue_error(struct mem_ctl_info *mci,
901 bool enable_per_layer_report,
902 const int pos[EDAC_MAX_LAYERS])
903{
904 int i, index = 0;
905
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300906 mci->ue_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300907
908 if (!enable_per_layer_report) {
909 mci->ce_noinfo_count++;
910 return;
911 }
912
913 for (i = 0; i < mci->n_layers; i++) {
914 if (pos[i] < 0)
915 break;
916 index += pos[i];
917 mci->ue_per_layer[i][index]++;
918
919 if (i < mci->n_layers - 1)
920 index *= mci->layers[i + 1].size;
921 }
922}
923
924static void edac_ce_error(struct mem_ctl_info *mci,
925 const int pos[EDAC_MAX_LAYERS],
926 const char *msg,
927 const char *location,
928 const char *label,
929 const char *detail,
930 const char *other_detail,
931 const bool enable_per_layer_report,
932 const unsigned long page_frame_number,
933 const unsigned long offset_in_page,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300934 long grain)
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300935{
936 unsigned long remapped_page;
937
938 if (edac_mc_get_log_ce()) {
939 if (other_detail && *other_detail)
940 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300941 "CE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300942 msg, label, location,
943 detail, other_detail);
944 else
945 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300946 "CE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300947 msg, label, location,
948 detail);
949 }
950 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800951
952 if (mci->scrub_mode & SCRUB_SW_SRC) {
953 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300954 * Some memory controllers (called MCs below) can remap
955 * memory so that it is still available at a different
956 * address when PCI devices map into memory.
957 * MC's that can't do this, lose the memory where PCI
958 * devices are mapped. This mapping is MC-dependent
959 * and so we call back into the MC driver for it to
960 * map the MC page to a physical (CPU) page which can
961 * then be mapped to a virtual page - which can then
962 * be scrubbed.
963 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800964 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700965 mci->ctl_page_to_phys(mci, page_frame_number) :
966 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800967
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300968 edac_mc_scrub_block(remapped_page,
969 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800970 }
971}
972
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300973static void edac_ue_error(struct mem_ctl_info *mci,
974 const int pos[EDAC_MAX_LAYERS],
975 const char *msg,
976 const char *location,
977 const char *label,
978 const char *detail,
979 const char *other_detail,
980 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800981{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300982 if (edac_mc_get_log_ue()) {
983 if (other_detail && *other_detail)
984 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300985 "UE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300986 msg, label, location, detail,
987 other_detail);
988 else
989 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300990 "UE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300991 msg, label, location, detail);
992 }
Dave Petersone7ecd892006-03-26 01:38:52 -0800993
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300994 if (edac_mc_get_panic_on_ue()) {
995 if (other_detail && *other_detail)
996 panic("UE %s on %s (%s%s - %s)\n",
997 msg, label, location, detail, other_detail);
998 else
999 panic("UE %s on %s (%s%s)\n",
1000 msg, label, location, detail);
1001 }
1002
1003 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001004}
1005
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001006#define OTHER_LABEL " or "
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001007
1008/**
1009 * edac_mc_handle_error - reports a memory event to userspace
1010 *
1011 * @type: severity of the error (CE/UE/Fatal)
1012 * @mci: a struct mem_ctl_info pointer
1013 * @page_frame_number: mem page where the error occurred
1014 * @offset_in_page: offset of the error inside the page
1015 * @syndrome: ECC syndrome
1016 * @top_layer: Memory layer[0] position
1017 * @mid_layer: Memory layer[1] position
1018 * @low_layer: Memory layer[2] position
1019 * @msg: Message meaningful to the end users that
1020 * explains the event
1021 * @other_detail: Technical details about the event that
1022 * may help hardware manufacturers and
1023 * EDAC developers to analyse the event
1024 * @arch_log: Architecture-specific struct that can
1025 * be used to add extended information to the
1026 * tracepoint, like dumping MCE registers.
1027 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001028void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1029 struct mem_ctl_info *mci,
1030 const unsigned long page_frame_number,
1031 const unsigned long offset_in_page,
1032 const unsigned long syndrome,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001033 const int top_layer,
1034 const int mid_layer,
1035 const int low_layer,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001036 const char *msg,
1037 const char *other_detail,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001038 const void *arch_log)
Alan Coxda9bb1d2006-01-18 17:44:13 -08001039{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001040 /* FIXME: too much for stack: move it to some pre-alocated area */
1041 char detail[80], location[80];
1042 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1043 char *p;
1044 int row = -1, chan = -1;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001045 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001046 int i;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001047 long grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001048 bool enable_per_layer_report = false;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001049 u16 error_count; /* FIXME: make it a parameter */
1050 u8 grain_bits;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001051
Dave Peterson537fba22006-03-26 01:38:40 -08001052 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001053
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001054 /*
1055 * Check if the event report is consistent and if the memory
1056 * location is known. If it is known, enable_per_layer_report will be
1057 * true, the DIMM(s) label info will be filled and the per-layer
1058 * error counters will be incremented.
1059 */
1060 for (i = 0; i < mci->n_layers; i++) {
1061 if (pos[i] >= (int)mci->layers[i].size) {
1062 if (type == HW_EVENT_ERR_CORRECTED)
1063 p = "CE";
1064 else
1065 p = "UE";
1066
1067 edac_mc_printk(mci, KERN_ERR,
1068 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1069 edac_layer_name[mci->layers[i].type],
1070 pos[i], mci->layers[i].size);
1071 /*
1072 * Instead of just returning it, let's use what's
1073 * known about the error. The increment routines and
1074 * the DIMM filter logic will do the right thing by
1075 * pointing the likely damaged DIMMs.
1076 */
1077 pos[i] = -1;
1078 }
1079 if (pos[i] >= 0)
1080 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001081 }
1082
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001083 /*
1084 * Get the dimm label/grain that applies to the match criteria.
1085 * As the error algorithm may not be able to point to just one memory
1086 * stick, the logic here will get all possible labels that could
1087 * pottentially be affected by the error.
1088 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1089 * to have only the MC channel and the MC dimm (also called "branch")
1090 * but the channel is not known, as the memory is arranged in pairs,
1091 * where each memory belongs to a separate channel within the same
1092 * branch.
1093 */
1094 grain = 0;
1095 p = label;
1096 *p = '\0';
1097 for (i = 0; i < mci->tot_dimms; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001098 struct dimm_info *dimm = mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001099
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001100 if (top_layer >= 0 && top_layer != dimm->location[0])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001101 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001102 if (mid_layer >= 0 && mid_layer != dimm->location[1])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001103 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001104 if (low_layer >= 0 && low_layer != dimm->location[2])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001105 continue;
1106
1107 /* get the max grain, over the error match range */
1108 if (dimm->grain > grain)
1109 grain = dimm->grain;
1110
1111 /*
1112 * If the error is memory-controller wide, there's no need to
1113 * seek for the affected DIMMs because the whole
1114 * channel/memory controller/... may be affected.
1115 * Also, don't show errors for empty DIMM slots.
1116 */
1117 if (enable_per_layer_report && dimm->nr_pages) {
1118 if (p != label) {
1119 strcpy(p, OTHER_LABEL);
1120 p += strlen(OTHER_LABEL);
1121 }
1122 strcpy(p, dimm->label);
1123 p += strlen(p);
1124 *p = '\0';
1125
1126 /*
1127 * get csrow/channel of the DIMM, in order to allow
1128 * incrementing the compat API counters
1129 */
1130 debugf4("%s: %s csrows map: (%d,%d)\n",
1131 __func__,
1132 mci->mem_is_per_rank ? "rank" : "dimm",
1133 dimm->csrow, dimm->cschannel);
1134
1135 if (row == -1)
1136 row = dimm->csrow;
1137 else if (row >= 0 && row != dimm->csrow)
1138 row = -2;
1139
1140 if (chan == -1)
1141 chan = dimm->cschannel;
1142 else if (chan >= 0 && chan != dimm->cschannel)
1143 chan = -2;
1144 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001145 }
1146
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001147 if (!enable_per_layer_report) {
1148 strcpy(label, "any memory");
1149 } else {
1150 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1151 __func__, row, chan);
1152 if (p == label)
1153 strcpy(label, "unknown memory");
1154 if (type == HW_EVENT_ERR_CORRECTED) {
1155 if (row >= 0) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001156 mci->csrows[row]->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001157 if (chan >= 0)
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001158 mci->csrows[row]->channels[chan]->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001159 }
1160 } else
1161 if (row >= 0)
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001162 mci->csrows[row]->ue_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001163 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001164
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001165 /* Fill the RAM location data */
1166 p = location;
1167 for (i = 0; i < mci->n_layers; i++) {
1168 if (pos[i] < 0)
1169 continue;
1170
1171 p += sprintf(p, "%s:%d ",
1172 edac_layer_name[mci->layers[i].type],
1173 pos[i]);
1174 }
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001175 if (p > location)
1176 *(p - 1) = '\0';
1177
1178 /* Report the error via the trace interface */
1179
1180 error_count = 1; /* FIXME: allow change it */
1181 grain_bits = fls_long(grain) + 1;
1182 trace_mc_event(type, msg, label, error_count,
1183 mci->mc_idx, top_layer, mid_layer, low_layer,
1184 PAGES_TO_MiB(page_frame_number) | offset_in_page,
1185 grain_bits, syndrome, other_detail);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001186
1187 /* Memory type dependent details about the error */
1188 if (type == HW_EVENT_ERR_CORRECTED) {
1189 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001190 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001191 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001192 grain, syndrome);
1193 edac_ce_error(mci, pos, msg, location, label, detail,
1194 other_detail, enable_per_layer_report,
1195 page_frame_number, offset_in_page, grain);
1196 } else {
1197 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001198 "page:0x%lx offset:0x%lx grain:%ld",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001199 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001200
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001201 edac_ue_error(mci, pos, msg, location, label, detail,
1202 other_detail, enable_per_layer_report);
1203 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001204}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001205EXPORT_SYMBOL_GPL(edac_mc_handle_error);