blob: 61ae34643b49697923cc05a612c4e008d020b1f8 [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030030#include <linux/bitops.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080031#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070034#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070035#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080036
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030037#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
Alan Coxda9bb1d2006-01-18 17:44:13 -080041/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070042static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070043static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080044
Alan Coxda9bb1d2006-01-18 17:44:13 -080045#ifdef CONFIG_EDAC_DEBUG
46
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030047static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080048{
49 debugf4("\tchannel = %p\n", chan);
50 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -080051 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030052 debugf4("\tchannel->dimm = %p\n", chan->dimm);
53}
54
55static void edac_mc_dump_dimm(struct dimm_info *dimm)
56{
57 int i;
58
59 debugf4("\tdimm = %p\n", dimm);
60 debugf4("\tdimm->label = '%s'\n", dimm->label);
61 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
62 debugf4("\tdimm location ");
63 for (i = 0; i < dimm->mci->n_layers; i++) {
64 printk(KERN_CONT "%d", dimm->location[i]);
65 if (i < dimm->mci->n_layers - 1)
66 printk(KERN_CONT ".");
67 }
68 printk(KERN_CONT "\n");
69 debugf4("\tdimm->grain = %d\n", dimm->grain);
70 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080071}
72
Adrian Bunk2da1c112007-07-19 01:49:32 -070073static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080074{
75 debugf4("\tcsrow = %p\n", csrow);
76 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
Douglas Thompson079708b2007-07-19 01:49:58 -070077 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -080078 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
79 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
Douglas Thompson079708b2007-07-19 01:49:58 -070080 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
Alan Coxda9bb1d2006-01-18 17:44:13 -080081 debugf4("\tcsrow->channels = %p\n", csrow->channels);
82 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
83}
84
Adrian Bunk2da1c112007-07-19 01:49:32 -070085static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -080086{
87 debugf3("\tmci = %p\n", mci);
88 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
89 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
90 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
91 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
92 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
93 mci->nr_csrows, mci->csrows);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030094 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
95 mci->tot_dimms, mci->dimms);
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -030096 debugf3("\tdev = %p\n", mci->pdev);
Douglas Thompson079708b2007-07-19 01:49:58 -070097 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
Alan Coxda9bb1d2006-01-18 17:44:13 -080098 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
99}
100
Borislav Petkov24f9a7f2010-10-07 18:29:15 +0200101#endif /* CONFIG_EDAC_DEBUG */
102
Borislav Petkov239642f2009-11-12 15:33:16 +0100103/*
104 * keep those in sync with the enum mem_type
105 */
106const char *edac_mem_types[] = {
107 "Empty csrow",
108 "Reserved csrow type",
109 "Unknown csrow type",
110 "Fast page mode RAM",
111 "Extended data out RAM",
112 "Burst Extended data out RAM",
113 "Single data rate SDRAM",
114 "Registered single data rate SDRAM",
115 "Double data rate SDRAM",
116 "Registered Double data rate SDRAM",
117 "Rambus DRAM",
118 "Unbuffered DDR2 RAM",
119 "Fully buffered DDR2",
120 "Registered DDR2 RAM",
121 "Rambus XDR",
122 "Unbuffered DDR3 RAM",
123 "Registered DDR3 RAM",
124};
125EXPORT_SYMBOL_GPL(edac_mem_types);
126
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300127/**
128 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
129 * @p: pointer to a pointer with the memory offset to be used. At
130 * return, this will be incremented to point to the next offset
131 * @size: Size of the data structure to be reserved
132 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800133 *
134 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300135 * down to either a no-op or the addition of a constant to the value of '*p'.
136 *
137 * The 'p' pointer is absolutely needed to keep the proper advancing
138 * further in memory to the proper offsets when allocating the struct along
139 * with its embedded structs, as edac_device_alloc_ctl_info() does it
140 * above, for example.
141 *
142 * At return, the pointer 'p' will be incremented to be used on a next call
143 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800144 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300145void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800146{
147 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300148 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800149
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300150 *p += size * n_elems;
151
152 /*
153 * 'p' can possibly be an unaligned item X such that sizeof(X) is
154 * 'size'. Adjust 'p' so that its alignment is at least as
155 * stringent as what the compiler would provide for X and return
156 * the aligned result.
157 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800158 * stringent alignment that the compiler will ever provide by default.
159 * As far as I know, this is a reasonable assumption.
160 */
161 if (size > sizeof(long))
162 align = sizeof(long long);
163 else if (size > sizeof(int))
164 align = sizeof(long);
165 else if (size > sizeof(short))
166 align = sizeof(int);
167 else if (size > sizeof(char))
168 align = sizeof(short);
169 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700170 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800171
172 r = size % align;
173
174 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700175 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800176
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300177 *p += align - r;
178
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700179 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800180}
181
Alan Coxda9bb1d2006-01-18 17:44:13 -0800182/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300183 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
184 * @mc_num: Memory controller number
185 * @n_layers: Number of MC hierarchy layers
186 * layers: Describes each layer as seen by the Memory Controller
187 * @size_pvt: size of private storage needed
188 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800189 *
190 * Everything is kmalloc'ed as one big chunk - more efficient.
191 * Only can be used if all structures have the same lifetime - otherwise
192 * you have to allocate and initialize your own structures.
193 *
194 * Use edac_mc_free() to free mc structures allocated by this function.
195 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300196 * NOTE: drivers handle multi-rank memories in different ways: in some
197 * drivers, one multi-rank memory stick is mapped as one entry, while, in
198 * others, a single multi-rank memory stick would be mapped into several
199 * entries. Currently, this function will allocate multiple struct dimm_info
200 * on such scenarios, as grouping the multiple ranks require drivers change.
201 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800202 * Returns:
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300203 * On failure: NULL
204 * On success: struct mem_ctl_info pointer
Alan Coxda9bb1d2006-01-18 17:44:13 -0800205 */
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300206struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
207 unsigned n_layers,
208 struct edac_mc_layer *layers,
209 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800210{
211 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300212 struct edac_mc_layer *layer;
213 struct csrow_info *csi, *csr;
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300214 struct rank_info *chi, *chp, *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300215 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300216 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
217 unsigned pos[EDAC_MAX_LAYERS];
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300218 unsigned size, tot_dimms = 1, count = 1;
219 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300220 void *pvt, *p, *ptr = NULL;
Mauro Carvalho Chehab7a623c02012-04-16 16:41:11 -0300221 int i, j, row, chn, n, len;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300222 bool per_rank = false;
223
224 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
225 /*
226 * Calculate the total amount of dimms and csrows/cschannels while
227 * in the old API emulation mode
228 */
229 for (i = 0; i < n_layers; i++) {
230 tot_dimms *= layers[i].size;
231 if (layers[i].is_virt_csrow)
232 tot_csrows *= layers[i].size;
233 else
234 tot_channels *= layers[i].size;
235
236 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
237 per_rank = true;
238 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800239
240 /* Figure out the offsets of the various items from the start of an mc
241 * structure. We want the alignment of each item to be at least as
242 * stringent as what the compiler would provide if we could simply
243 * hardcode everything into a single struct.
244 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300245 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300246 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
247 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
248 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
249 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
250 for (i = 0; i < n_layers; i++) {
251 count *= layers[i].size;
252 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
253 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
254 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
255 tot_errcount += 2 * count;
256 }
257
258 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300259 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700260 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800261
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300262 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
263 __func__, size,
264 tot_dimms,
265 per_rank ? "ranks" : "dimms",
266 tot_csrows * tot_channels);
Doug Thompson8096cfa2007-07-19 01:50:27 -0700267 mci = kzalloc(size, GFP_KERNEL);
268 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800269 return NULL;
270
271 /* Adjust pointers so they point within the memory we just allocated
272 * rather than an imaginary chunk of memory located at address 0.
273 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300274 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Douglas Thompson079708b2007-07-19 01:49:58 -0700275 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300276 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300277 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300278 for (i = 0; i < n_layers; i++) {
279 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
280 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
281 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700282 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800283
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700284 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300285 mci->mc_idx = mc_num;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800286 mci->csrows = csi;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300287 mci->dimms = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300288 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800289 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300290 mci->n_layers = n_layers;
291 mci->layers = layer;
292 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
293 mci->nr_csrows = tot_csrows;
294 mci->num_cschannel = tot_channels;
295 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800296
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300297 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300298 * Fill the csrow struct
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300299 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300300 for (row = 0; row < tot_csrows; row++) {
301 csr = &csi[row];
302 csr->csrow_idx = row;
303 csr->mci = mci;
304 csr->nr_channels = tot_channels;
305 chp = &chi[row * tot_channels];
306 csr->channels = chp;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300307
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300308 for (chn = 0; chn < tot_channels; chn++) {
Alan Coxda9bb1d2006-01-18 17:44:13 -0800309 chan = &chp[chn];
310 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300311 chan->csrow = csr;
312 }
313 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300314
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300315 /*
316 * Fill the dimm struct
317 */
318 memset(&pos, 0, sizeof(pos));
319 row = 0;
320 chn = 0;
321 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
322 per_rank ? "ranks" : "dimms");
323 for (i = 0; i < tot_dimms; i++) {
324 chan = &csi[row].channels[chn];
325 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
326 pos[0], pos[1], pos[2]);
327 dimm->mci = mci;
328
329 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
330 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
331 pos[0], pos[1], pos[2], row, chn);
332
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300333 /*
334 * Copy DIMM location and initialize it.
335 */
336 len = sizeof(dimm->label);
337 p = dimm->label;
338 n = snprintf(p, len, "mc#%u", mc_num);
339 p += n;
340 len -= n;
341 for (j = 0; j < n_layers; j++) {
342 n = snprintf(p, len, "%s#%u",
343 edac_layer_name[layers[j].type],
344 pos[j]);
345 p += n;
346 len -= n;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300347 dimm->location[j] = pos[j];
348
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300349 if (len <= 0)
350 break;
351 }
352
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300353 /* Link it to the csrows old API data */
354 chan->dimm = dimm;
355 dimm->csrow = row;
356 dimm->cschannel = chn;
357
358 /* Increment csrow location */
359 row++;
360 if (row == tot_csrows) {
361 row = 0;
362 chn++;
363 }
364
365 /* Increment dimm location */
366 for (j = n_layers - 1; j >= 0; j--) {
367 pos[j]++;
368 if (pos[j] < layers[j].size)
369 break;
370 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800371 }
372 }
373
Dave Jiang81d87cb2007-07-19 01:49:52 -0700374 mci->op_state = OP_ALLOC;
Mauro Carvalho Chehab6fe11082010-08-12 00:30:25 -0300375 INIT_LIST_HEAD(&mci->grp_kobj_list);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700376
Doug Thompson8096cfa2007-07-19 01:50:27 -0700377 /* at this point, the root kobj is valid, and in order to
378 * 'free' the object, then the function:
379 * edac_mc_unregister_sysfs_main_kobj() must be called
380 * which will perform kobj unregistration and the actual free
381 * will occur during the kobject callback operation
382 */
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300383
Alan Coxda9bb1d2006-01-18 17:44:13 -0800384 return mci;
385}
Dave Peterson91105402006-03-26 01:38:55 -0800386EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800387
Alan Coxda9bb1d2006-01-18 17:44:13 -0800388/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700389 * edac_mc_free
390 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800391 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800392 */
393void edac_mc_free(struct mem_ctl_info *mci)
394{
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300395 debugf1("%s()\n", __func__);
396
Mauro Carvalho Chehab7a623c02012-04-16 16:41:11 -0300397 edac_unregister_sysfs(mci);
Mauro Carvalho Chehabaccf74f2010-08-16 18:34:37 -0300398
399 /* free the mci instance memory here */
400 kfree(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800401}
Dave Peterson91105402006-03-26 01:38:55 -0800402EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800403
Doug Thompsonbce19682007-07-26 10:41:14 -0700404
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300405/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700406 * find_mci_by_dev
407 *
408 * scan list of controllers looking for the one that manages
409 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300410 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700411 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300412struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800413{
414 struct mem_ctl_info *mci;
415 struct list_head *item;
416
Dave Peterson537fba22006-03-26 01:38:40 -0800417 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800418
419 list_for_each(item, &mc_devices) {
420 mci = list_entry(item, struct mem_ctl_info, link);
421
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300422 if (mci->pdev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800423 return mci;
424 }
425
426 return NULL;
427}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300428EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800429
Dave Jiang81d87cb2007-07-19 01:49:52 -0700430/*
431 * handler for EDAC to check if NMI type handler has asserted interrupt
432 */
433static int edac_mc_assert_error_check_and_clear(void)
434{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700435 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700436
Douglas Thompson079708b2007-07-19 01:49:58 -0700437 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700438 return 1;
439
Dave Jiang66ee2f92007-07-19 01:49:54 -0700440 old_state = edac_err_assert;
441 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700442
Dave Jiang66ee2f92007-07-19 01:49:54 -0700443 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700444}
445
446/*
447 * edac_mc_workq_function
448 * performs the operation scheduled by a workq request
449 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700450static void edac_mc_workq_function(struct work_struct *work_req)
451{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700452 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700453 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700454
455 mutex_lock(&mem_ctls_mutex);
456
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700457 /* if this control struct has movd to offline state, we are done */
458 if (mci->op_state == OP_OFFLINE) {
459 mutex_unlock(&mem_ctls_mutex);
460 return;
461 }
462
Dave Jiang81d87cb2007-07-19 01:49:52 -0700463 /* Only poll controllers that are running polled and have a check */
464 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
465 mci->edac_check(mci);
466
Dave Jiang81d87cb2007-07-19 01:49:52 -0700467 mutex_unlock(&mem_ctls_mutex);
468
469 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700470 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700471 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700472}
473
474/*
475 * edac_mc_workq_setup
476 * initialize a workq item for this mci
477 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700478 *
479 * locking model:
480 *
481 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700482 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700483static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700484{
485 debugf0("%s()\n", __func__);
486
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700487 /* if this instance is not in the POLL state, then simply return */
488 if (mci->op_state != OP_RUNNING_POLL)
489 return;
490
Dave Jiang81d87cb2007-07-19 01:49:52 -0700491 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700492 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
493}
494
495/*
496 * edac_mc_workq_teardown
497 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700498 *
499 * locking model:
500 *
501 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700502 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700503static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700504{
505 int status;
506
Borislav Petkov00740c52010-09-26 12:42:23 +0200507 if (mci->op_state != OP_RUNNING_POLL)
508 return;
509
Doug Thompsonbce19682007-07-26 10:41:14 -0700510 status = cancel_delayed_work(&mci->work);
511 if (status == 0) {
512 debugf0("%s() not canceled, flush the queue\n",
513 __func__);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700514
Doug Thompsonbce19682007-07-26 10:41:14 -0700515 /* workq instance might be running, wait for it */
516 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700517 }
518}
519
520/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700521 * edac_mc_reset_delay_period(unsigned long value)
522 *
523 * user space has updated our poll period value, need to
524 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700525 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700526void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700527{
Doug Thompsonbce19682007-07-26 10:41:14 -0700528 struct mem_ctl_info *mci;
529 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700530
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700531 mutex_lock(&mem_ctls_mutex);
532
Doug Thompsonbce19682007-07-26 10:41:14 -0700533 /* scan the list and turn off all workq timers, doing so under lock
534 */
535 list_for_each(item, &mc_devices) {
536 mci = list_entry(item, struct mem_ctl_info, link);
537
538 if (mci->op_state == OP_RUNNING_POLL)
539 cancel_delayed_work(&mci->work);
540 }
541
542 mutex_unlock(&mem_ctls_mutex);
543
544
545 /* re-walk the list, and reset the poll delay */
546 mutex_lock(&mem_ctls_mutex);
547
548 list_for_each(item, &mc_devices) {
549 mci = list_entry(item, struct mem_ctl_info, link);
550
551 edac_mc_workq_setup(mci, (unsigned long) value);
552 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700553
554 mutex_unlock(&mem_ctls_mutex);
555}
556
Doug Thompsonbce19682007-07-26 10:41:14 -0700557
558
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700559/* Return 0 on success, 1 on failure.
560 * Before calling this function, caller must
561 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700562 *
563 * locking model:
564 *
565 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700566 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700567static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800568{
569 struct list_head *item, *insert_before;
570 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800571
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700572 insert_before = &mc_devices;
573
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300574 p = find_mci_by_dev(mci->pdev);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700575 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700576 goto fail0;
577
578 list_for_each(item, &mc_devices) {
579 p = list_entry(item, struct mem_ctl_info, link);
580
581 if (p->mc_idx >= mci->mc_idx) {
582 if (unlikely(p->mc_idx == mci->mc_idx))
583 goto fail1;
584
585 insert_before = item;
586 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800587 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800588 }
589
590 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700591 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800592 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700593
Douglas Thompson052dfb42007-07-19 01:50:13 -0700594fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700595 edac_printk(KERN_WARNING, EDAC_MC,
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300596 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000597 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700598 return 1;
599
Douglas Thompson052dfb42007-07-19 01:50:13 -0700600fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700601 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700602 "bug in low-level driver: attempt to assign\n"
603 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700604 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800605}
606
Dave Petersone7ecd892006-03-26 01:38:52 -0800607static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800608{
Dave Jiangc0d12172007-07-19 01:49:46 -0700609 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800610 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700611
612 /* these are for safe removal of devices from global list while
613 * NMI handlers may be traversing list
614 */
615 synchronize_rcu();
616 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800617}
618
Alan Coxda9bb1d2006-01-18 17:44:13 -0800619/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700620 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
621 *
622 * If found, return a pointer to the structure.
623 * Else return NULL.
624 *
625 * Caller must hold mem_ctls_mutex.
626 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700627struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700628{
629 struct list_head *item;
630 struct mem_ctl_info *mci;
631
632 list_for_each(item, &mc_devices) {
633 mci = list_entry(item, struct mem_ctl_info, link);
634
635 if (mci->mc_idx >= idx) {
636 if (mci->mc_idx == idx)
637 return mci;
638
639 break;
640 }
641 }
642
643 return NULL;
644}
645EXPORT_SYMBOL(edac_mc_find);
646
647/**
Dave Peterson472678e2006-03-26 01:38:49 -0800648 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
649 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800650 * @mci: pointer to the mci structure to be added to the list
651 *
652 * Return:
653 * 0 Success
654 * !0 Failure
655 */
656
657/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700658int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800659{
Dave Peterson537fba22006-03-26 01:38:40 -0800660 debugf0("%s()\n", __func__);
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700661
Alan Coxda9bb1d2006-01-18 17:44:13 -0800662#ifdef CONFIG_EDAC_DEBUG
663 if (edac_debug_level >= 3)
664 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800665
Alan Coxda9bb1d2006-01-18 17:44:13 -0800666 if (edac_debug_level >= 4) {
667 int i;
668
669 for (i = 0; i < mci->nr_csrows; i++) {
670 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800671
Alan Coxda9bb1d2006-01-18 17:44:13 -0800672 edac_mc_dump_csrow(&mci->csrows[i]);
673 for (j = 0; j < mci->csrows[i].nr_channels; j++)
Douglas Thompson079708b2007-07-19 01:49:58 -0700674 edac_mc_dump_channel(&mci->csrows[i].
Douglas Thompson052dfb42007-07-19 01:50:13 -0700675 channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800676 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300677 for (i = 0; i < mci->tot_dimms; i++)
678 edac_mc_dump_dimm(&mci->dimms[i]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800679 }
680#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700681 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800682
683 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800684 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800685
686 /* set load time so that error rate can be tracked */
687 mci->start_time = jiffies;
688
eric wollesen9794f332007-02-12 00:53:08 -0800689 if (edac_create_sysfs_mci_device(mci)) {
690 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700691 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800692 goto fail1;
693 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800694
Dave Jiang81d87cb2007-07-19 01:49:52 -0700695 /* If there IS a check routine, then we are running POLLED */
696 if (mci->edac_check != NULL) {
697 /* This instance is NOW RUNNING */
698 mci->op_state = OP_RUNNING_POLL;
699
700 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
701 } else {
702 mci->op_state = OP_RUNNING_INTERRUPT;
703 }
704
Alan Coxda9bb1d2006-01-18 17:44:13 -0800705 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700706 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000707 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800708
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700709 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800710 return 0;
711
Douglas Thompson052dfb42007-07-19 01:50:13 -0700712fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800713 del_mc_from_global_list(mci);
714
Douglas Thompson052dfb42007-07-19 01:50:13 -0700715fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700716 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800717 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800718}
Dave Peterson91105402006-03-26 01:38:55 -0800719EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800720
Alan Coxda9bb1d2006-01-18 17:44:13 -0800721/**
Dave Peterson472678e2006-03-26 01:38:49 -0800722 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
723 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700724 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800725 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800726 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800727 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700728struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800729{
Dave Peterson18dbc332006-03-26 01:38:50 -0800730 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800731
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700732 debugf0("%s()\n", __func__);
733
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700734 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800735
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700736 /* find the requested mci struct in the global list */
737 mci = find_mci_by_dev(dev);
738 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700739 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800740 return NULL;
741 }
742
Alan Coxda9bb1d2006-01-18 17:44:13 -0800743 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700744 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700745
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100746 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700747 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100748
749 /* marking MCI offline */
750 mci->op_state = OP_OFFLINE;
751
752 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700753 edac_remove_sysfs_mci_device(mci);
754
Dave Peterson537fba22006-03-26 01:38:40 -0800755 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700756 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000757 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700758
Dave Peterson18dbc332006-03-26 01:38:50 -0800759 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800760}
Dave Peterson91105402006-03-26 01:38:55 -0800761EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800762
Adrian Bunk2da1c112007-07-19 01:49:32 -0700763static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
764 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800765{
766 struct page *pg;
767 void *virt_addr;
768 unsigned long flags = 0;
769
Dave Peterson537fba22006-03-26 01:38:40 -0800770 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800771
772 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700773 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800774 return;
775
776 /* Find the actual page structure then map it and fix */
777 pg = pfn_to_page(page);
778
779 if (PageHighMem(pg))
780 local_irq_save(flags);
781
Cong Wang4e5df7c2011-11-25 23:14:19 +0800782 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800783
784 /* Perform architecture specific atomic scrub operation */
785 atomic_scrub(virt_addr + offset, size);
786
787 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800788 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800789
790 if (PageHighMem(pg))
791 local_irq_restore(flags);
792}
793
Alan Coxda9bb1d2006-01-18 17:44:13 -0800794/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800795int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800796{
797 struct csrow_info *csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300798 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800799
Dave Peterson537fba22006-03-26 01:38:40 -0800800 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800801 row = -1;
802
803 for (i = 0; i < mci->nr_csrows; i++) {
804 struct csrow_info *csrow = &csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300805 n = 0;
806 for (j = 0; j < csrow->nr_channels; j++) {
807 struct dimm_info *dimm = csrow->channels[j].dimm;
808 n += dimm->nr_pages;
809 }
810 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800811 continue;
812
Dave Peterson537fba22006-03-26 01:38:40 -0800813 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
814 "mask(0x%lx)\n", mci->mc_idx, __func__,
815 csrow->first_page, page, csrow->last_page,
816 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800817
818 if ((page >= csrow->first_page) &&
819 (page <= csrow->last_page) &&
820 ((page & csrow->page_mask) ==
821 (csrow->first_page & csrow->page_mask))) {
822 row = i;
823 break;
824 }
825 }
826
827 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800828 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700829 "could not look up page error address %lx\n",
830 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800831
832 return row;
833}
Dave Peterson91105402006-03-26 01:38:55 -0800834EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800835
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300836const char *edac_layer_name[] = {
837 [EDAC_MC_LAYER_BRANCH] = "branch",
838 [EDAC_MC_LAYER_CHANNEL] = "channel",
839 [EDAC_MC_LAYER_SLOT] = "slot",
840 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
841};
842EXPORT_SYMBOL_GPL(edac_layer_name);
843
844static void edac_inc_ce_error(struct mem_ctl_info *mci,
845 bool enable_per_layer_report,
846 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800847{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300848 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800849
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300850 mci->ce_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300851
852 if (!enable_per_layer_report) {
853 mci->ce_noinfo_count++;
854 return;
855 }
856
857 for (i = 0; i < mci->n_layers; i++) {
858 if (pos[i] < 0)
859 break;
860 index += pos[i];
861 mci->ce_per_layer[i][index]++;
862
863 if (i < mci->n_layers - 1)
864 index *= mci->layers[i + 1].size;
865 }
866}
867
868static void edac_inc_ue_error(struct mem_ctl_info *mci,
869 bool enable_per_layer_report,
870 const int pos[EDAC_MAX_LAYERS])
871{
872 int i, index = 0;
873
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300874 mci->ue_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300875
876 if (!enable_per_layer_report) {
877 mci->ce_noinfo_count++;
878 return;
879 }
880
881 for (i = 0; i < mci->n_layers; i++) {
882 if (pos[i] < 0)
883 break;
884 index += pos[i];
885 mci->ue_per_layer[i][index]++;
886
887 if (i < mci->n_layers - 1)
888 index *= mci->layers[i + 1].size;
889 }
890}
891
892static void edac_ce_error(struct mem_ctl_info *mci,
893 const int pos[EDAC_MAX_LAYERS],
894 const char *msg,
895 const char *location,
896 const char *label,
897 const char *detail,
898 const char *other_detail,
899 const bool enable_per_layer_report,
900 const unsigned long page_frame_number,
901 const unsigned long offset_in_page,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300902 long grain)
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300903{
904 unsigned long remapped_page;
905
906 if (edac_mc_get_log_ce()) {
907 if (other_detail && *other_detail)
908 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300909 "CE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300910 msg, label, location,
911 detail, other_detail);
912 else
913 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300914 "CE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300915 msg, label, location,
916 detail);
917 }
918 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800919
920 if (mci->scrub_mode & SCRUB_SW_SRC) {
921 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300922 * Some memory controllers (called MCs below) can remap
923 * memory so that it is still available at a different
924 * address when PCI devices map into memory.
925 * MC's that can't do this, lose the memory where PCI
926 * devices are mapped. This mapping is MC-dependent
927 * and so we call back into the MC driver for it to
928 * map the MC page to a physical (CPU) page which can
929 * then be mapped to a virtual page - which can then
930 * be scrubbed.
931 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800932 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700933 mci->ctl_page_to_phys(mci, page_frame_number) :
934 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800935
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300936 edac_mc_scrub_block(remapped_page,
937 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800938 }
939}
940
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300941static void edac_ue_error(struct mem_ctl_info *mci,
942 const int pos[EDAC_MAX_LAYERS],
943 const char *msg,
944 const char *location,
945 const char *label,
946 const char *detail,
947 const char *other_detail,
948 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800949{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300950 if (edac_mc_get_log_ue()) {
951 if (other_detail && *other_detail)
952 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300953 "UE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300954 msg, label, location, detail,
955 other_detail);
956 else
957 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300958 "UE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300959 msg, label, location, detail);
960 }
Dave Petersone7ecd892006-03-26 01:38:52 -0800961
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300962 if (edac_mc_get_panic_on_ue()) {
963 if (other_detail && *other_detail)
964 panic("UE %s on %s (%s%s - %s)\n",
965 msg, label, location, detail, other_detail);
966 else
967 panic("UE %s on %s (%s%s)\n",
968 msg, label, location, detail);
969 }
970
971 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800972}
973
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300974#define OTHER_LABEL " or "
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300975
976/**
977 * edac_mc_handle_error - reports a memory event to userspace
978 *
979 * @type: severity of the error (CE/UE/Fatal)
980 * @mci: a struct mem_ctl_info pointer
981 * @page_frame_number: mem page where the error occurred
982 * @offset_in_page: offset of the error inside the page
983 * @syndrome: ECC syndrome
984 * @top_layer: Memory layer[0] position
985 * @mid_layer: Memory layer[1] position
986 * @low_layer: Memory layer[2] position
987 * @msg: Message meaningful to the end users that
988 * explains the event
989 * @other_detail: Technical details about the event that
990 * may help hardware manufacturers and
991 * EDAC developers to analyse the event
992 * @arch_log: Architecture-specific struct that can
993 * be used to add extended information to the
994 * tracepoint, like dumping MCE registers.
995 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300996void edac_mc_handle_error(const enum hw_event_mc_err_type type,
997 struct mem_ctl_info *mci,
998 const unsigned long page_frame_number,
999 const unsigned long offset_in_page,
1000 const unsigned long syndrome,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001001 const int top_layer,
1002 const int mid_layer,
1003 const int low_layer,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001004 const char *msg,
1005 const char *other_detail,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001006 const void *arch_log)
Alan Coxda9bb1d2006-01-18 17:44:13 -08001007{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001008 /* FIXME: too much for stack: move it to some pre-alocated area */
1009 char detail[80], location[80];
1010 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1011 char *p;
1012 int row = -1, chan = -1;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001013 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001014 int i;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001015 long grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001016 bool enable_per_layer_report = false;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001017 u16 error_count; /* FIXME: make it a parameter */
1018 u8 grain_bits;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001019
Dave Peterson537fba22006-03-26 01:38:40 -08001020 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001021
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001022 /*
1023 * Check if the event report is consistent and if the memory
1024 * location is known. If it is known, enable_per_layer_report will be
1025 * true, the DIMM(s) label info will be filled and the per-layer
1026 * error counters will be incremented.
1027 */
1028 for (i = 0; i < mci->n_layers; i++) {
1029 if (pos[i] >= (int)mci->layers[i].size) {
1030 if (type == HW_EVENT_ERR_CORRECTED)
1031 p = "CE";
1032 else
1033 p = "UE";
1034
1035 edac_mc_printk(mci, KERN_ERR,
1036 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1037 edac_layer_name[mci->layers[i].type],
1038 pos[i], mci->layers[i].size);
1039 /*
1040 * Instead of just returning it, let's use what's
1041 * known about the error. The increment routines and
1042 * the DIMM filter logic will do the right thing by
1043 * pointing the likely damaged DIMMs.
1044 */
1045 pos[i] = -1;
1046 }
1047 if (pos[i] >= 0)
1048 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001049 }
1050
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001051 /*
1052 * Get the dimm label/grain that applies to the match criteria.
1053 * As the error algorithm may not be able to point to just one memory
1054 * stick, the logic here will get all possible labels that could
1055 * pottentially be affected by the error.
1056 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1057 * to have only the MC channel and the MC dimm (also called "branch")
1058 * but the channel is not known, as the memory is arranged in pairs,
1059 * where each memory belongs to a separate channel within the same
1060 * branch.
1061 */
1062 grain = 0;
1063 p = label;
1064 *p = '\0';
1065 for (i = 0; i < mci->tot_dimms; i++) {
1066 struct dimm_info *dimm = &mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001067
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001068 if (top_layer >= 0 && top_layer != dimm->location[0])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001069 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001070 if (mid_layer >= 0 && mid_layer != dimm->location[1])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001071 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001072 if (low_layer >= 0 && low_layer != dimm->location[2])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001073 continue;
1074
1075 /* get the max grain, over the error match range */
1076 if (dimm->grain > grain)
1077 grain = dimm->grain;
1078
1079 /*
1080 * If the error is memory-controller wide, there's no need to
1081 * seek for the affected DIMMs because the whole
1082 * channel/memory controller/... may be affected.
1083 * Also, don't show errors for empty DIMM slots.
1084 */
1085 if (enable_per_layer_report && dimm->nr_pages) {
1086 if (p != label) {
1087 strcpy(p, OTHER_LABEL);
1088 p += strlen(OTHER_LABEL);
1089 }
1090 strcpy(p, dimm->label);
1091 p += strlen(p);
1092 *p = '\0';
1093
1094 /*
1095 * get csrow/channel of the DIMM, in order to allow
1096 * incrementing the compat API counters
1097 */
1098 debugf4("%s: %s csrows map: (%d,%d)\n",
1099 __func__,
1100 mci->mem_is_per_rank ? "rank" : "dimm",
1101 dimm->csrow, dimm->cschannel);
1102
1103 if (row == -1)
1104 row = dimm->csrow;
1105 else if (row >= 0 && row != dimm->csrow)
1106 row = -2;
1107
1108 if (chan == -1)
1109 chan = dimm->cschannel;
1110 else if (chan >= 0 && chan != dimm->cschannel)
1111 chan = -2;
1112 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001113 }
1114
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001115 if (!enable_per_layer_report) {
1116 strcpy(label, "any memory");
1117 } else {
1118 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1119 __func__, row, chan);
1120 if (p == label)
1121 strcpy(label, "unknown memory");
1122 if (type == HW_EVENT_ERR_CORRECTED) {
1123 if (row >= 0) {
1124 mci->csrows[row].ce_count++;
1125 if (chan >= 0)
1126 mci->csrows[row].channels[chan].ce_count++;
1127 }
1128 } else
1129 if (row >= 0)
1130 mci->csrows[row].ue_count++;
1131 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001132
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001133 /* Fill the RAM location data */
1134 p = location;
1135 for (i = 0; i < mci->n_layers; i++) {
1136 if (pos[i] < 0)
1137 continue;
1138
1139 p += sprintf(p, "%s:%d ",
1140 edac_layer_name[mci->layers[i].type],
1141 pos[i]);
1142 }
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001143 if (p > location)
1144 *(p - 1) = '\0';
1145
1146 /* Report the error via the trace interface */
1147
1148 error_count = 1; /* FIXME: allow change it */
1149 grain_bits = fls_long(grain) + 1;
1150 trace_mc_event(type, msg, label, error_count,
1151 mci->mc_idx, top_layer, mid_layer, low_layer,
1152 PAGES_TO_MiB(page_frame_number) | offset_in_page,
1153 grain_bits, syndrome, other_detail);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001154
1155 /* Memory type dependent details about the error */
1156 if (type == HW_EVENT_ERR_CORRECTED) {
1157 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001158 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001159 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001160 grain, syndrome);
1161 edac_ce_error(mci, pos, msg, location, label, detail,
1162 other_detail, enable_per_layer_report,
1163 page_frame_number, offset_in_page, grain);
1164 } else {
1165 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001166 "page:0x%lx offset:0x%lx grain:%ld",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001167 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001168
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001169 edac_ue_error(mci, pos, msg, location, label, detail,
1170 other_detail, enable_per_layer_report);
1171 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001172}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001173EXPORT_SYMBOL_GPL(edac_mc_handle_error);