blob: 1bd237ee4ca7651780a9c57b939eeb07584f1905 [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080030#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070033#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070034#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080035
Alan Coxda9bb1d2006-01-18 17:44:13 -080036/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070037static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070038static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080039
Alan Coxda9bb1d2006-01-18 17:44:13 -080040#ifdef CONFIG_EDAC_DEBUG
41
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030042static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080043{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -080046 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030047 debugf4("\tchannel->dimm = %p\n", chan->dimm);
48}
49
50static void edac_mc_dump_dimm(struct dimm_info *dimm)
51{
52 int i;
53
54 debugf4("\tdimm = %p\n", dimm);
55 debugf4("\tdimm->label = '%s'\n", dimm->label);
56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57 debugf4("\tdimm location ");
58 for (i = 0; i < dimm->mci->n_layers; i++) {
59 printk(KERN_CONT "%d", dimm->location[i]);
60 if (i < dimm->mci->n_layers - 1)
61 printk(KERN_CONT ".");
62 }
63 printk(KERN_CONT "\n");
64 debugf4("\tdimm->grain = %d\n", dimm->grain);
65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080066}
67
Adrian Bunk2da1c112007-07-19 01:49:32 -070068static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080069{
70 debugf4("\tcsrow = %p\n", csrow);
71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
Douglas Thompson079708b2007-07-19 01:49:58 -070072 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -080073 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
Douglas Thompson079708b2007-07-19 01:49:58 -070075 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
Alan Coxda9bb1d2006-01-18 17:44:13 -080076 debugf4("\tcsrow->channels = %p\n", csrow->channels);
77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78}
79
Adrian Bunk2da1c112007-07-19 01:49:32 -070080static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -080081{
82 debugf3("\tmci = %p\n", mci);
83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88 mci->nr_csrows, mci->csrows);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030089 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90 mci->tot_dimms, mci->dimms);
Doug Thompson37f04582006-06-30 01:56:07 -070091 debugf3("\tdev = %p\n", mci->dev);
Douglas Thompson079708b2007-07-19 01:49:58 -070092 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
Alan Coxda9bb1d2006-01-18 17:44:13 -080093 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94}
95
Borislav Petkov24f9a7f2010-10-07 18:29:15 +020096#endif /* CONFIG_EDAC_DEBUG */
97
Borislav Petkov239642f2009-11-12 15:33:16 +010098/*
99 * keep those in sync with the enum mem_type
100 */
101const char *edac_mem_types[] = {
102 "Empty csrow",
103 "Reserved csrow type",
104 "Unknown csrow type",
105 "Fast page mode RAM",
106 "Extended data out RAM",
107 "Burst Extended data out RAM",
108 "Single data rate SDRAM",
109 "Registered single data rate SDRAM",
110 "Double data rate SDRAM",
111 "Registered Double data rate SDRAM",
112 "Rambus DRAM",
113 "Unbuffered DDR2 RAM",
114 "Fully buffered DDR2",
115 "Registered DDR2 RAM",
116 "Rambus XDR",
117 "Unbuffered DDR3 RAM",
118 "Registered DDR3 RAM",
119};
120EXPORT_SYMBOL_GPL(edac_mem_types);
121
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300122/**
123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124 * @p: pointer to a pointer with the memory offset to be used. At
125 * return, this will be incremented to point to the next offset
126 * @size: Size of the data structure to be reserved
127 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800128 *
129 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300130 * down to either a no-op or the addition of a constant to the value of '*p'.
131 *
132 * The 'p' pointer is absolutely needed to keep the proper advancing
133 * further in memory to the proper offsets when allocating the struct along
134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
135 * above, for example.
136 *
137 * At return, the pointer 'p' will be incremented to be used on a next call
138 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800139 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300140void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800141{
142 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300143 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800144
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300145 *p += size * n_elems;
146
147 /*
148 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149 * 'size'. Adjust 'p' so that its alignment is at least as
150 * stringent as what the compiler would provide for X and return
151 * the aligned result.
152 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800153 * stringent alignment that the compiler will ever provide by default.
154 * As far as I know, this is a reasonable assumption.
155 */
156 if (size > sizeof(long))
157 align = sizeof(long long);
158 else if (size > sizeof(int))
159 align = sizeof(long);
160 else if (size > sizeof(short))
161 align = sizeof(int);
162 else if (size > sizeof(char))
163 align = sizeof(short);
164 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700165 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800166
167 r = size % align;
168
169 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700170 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800171
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300172 *p += align - r;
173
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700174 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800175}
176
Alan Coxda9bb1d2006-01-18 17:44:13 -0800177/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179 * @mc_num: Memory controller number
180 * @n_layers: Number of MC hierarchy layers
181 * layers: Describes each layer as seen by the Memory Controller
182 * @size_pvt: size of private storage needed
183 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800184 *
185 * Everything is kmalloc'ed as one big chunk - more efficient.
186 * Only can be used if all structures have the same lifetime - otherwise
187 * you have to allocate and initialize your own structures.
188 *
189 * Use edac_mc_free() to free mc structures allocated by this function.
190 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300191 * NOTE: drivers handle multi-rank memories in different ways: in some
192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
193 * others, a single multi-rank memory stick would be mapped into several
194 * entries. Currently, this function will allocate multiple struct dimm_info
195 * on such scenarios, as grouping the multiple ranks require drivers change.
196 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800197 * Returns:
198 * NULL allocation failed
199 * struct mem_ctl_info pointer
200 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300201struct mem_ctl_info *new_edac_mc_alloc(unsigned mc_num,
202 unsigned n_layers,
203 struct edac_mc_layer *layers,
204 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800205{
206 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300207 struct edac_mc_layer *layer;
208 struct csrow_info *csi, *csr;
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300209 struct rank_info *chi, *chp, *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300210 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 unsigned pos[EDAC_MAX_LAYERS];
213 void *pvt, *ptr = NULL;
214 unsigned size, tot_dimms = 1, count = 1;
215 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
216 int i, j, err, row, chn;
217 bool per_rank = false;
218
219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 /*
221 * Calculate the total amount of dimms and csrows/cschannels while
222 * in the old API emulation mode
223 */
224 for (i = 0; i < n_layers; i++) {
225 tot_dimms *= layers[i].size;
226 if (layers[i].is_virt_csrow)
227 tot_csrows *= layers[i].size;
228 else
229 tot_channels *= layers[i].size;
230
231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232 per_rank = true;
233 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800234
235 /* Figure out the offsets of the various items from the start of an mc
236 * structure. We want the alignment of each item to be at least as
237 * stringent as what the compiler would provide if we could simply
238 * hardcode everything into a single struct.
239 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 for (i = 0; i < n_layers; i++) {
246 count *= layers[i].size;
247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 tot_errcount += 2 * count;
251 }
252
253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300254 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700255 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800256
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 __func__, size,
259 tot_dimms,
260 per_rank ? "ranks" : "dimms",
261 tot_csrows * tot_channels);
Doug Thompson8096cfa2007-07-19 01:50:27 -0700262 mci = kzalloc(size, GFP_KERNEL);
263 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800264 return NULL;
265
266 /* Adjust pointers so they point within the memory we just allocated
267 * rather than an imaginary chunk of memory located at address 0.
268 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Douglas Thompson079708b2007-07-19 01:49:58 -0700270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300273 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800278
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700279 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300280 mci->mc_idx = mc_num;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800281 mci->csrows = csi;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300282 mci->dimms = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300283 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800284 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300285 mci->n_layers = n_layers;
286 mci->layers = layer;
287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288 mci->nr_csrows = tot_csrows;
289 mci->num_cschannel = tot_channels;
290 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800291
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300292 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300293 * Fill the csrow struct
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300294 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300295 for (row = 0; row < tot_csrows; row++) {
296 csr = &csi[row];
297 csr->csrow_idx = row;
298 csr->mci = mci;
299 csr->nr_channels = tot_channels;
300 chp = &chi[row * tot_channels];
301 csr->channels = chp;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300302
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300303 for (chn = 0; chn < tot_channels; chn++) {
Alan Coxda9bb1d2006-01-18 17:44:13 -0800304 chan = &chp[chn];
305 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300306 chan->csrow = csr;
307 }
308 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300309
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300310 /*
311 * Fill the dimm struct
312 */
313 memset(&pos, 0, sizeof(pos));
314 row = 0;
315 chn = 0;
316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 per_rank ? "ranks" : "dimms");
318 for (i = 0; i < tot_dimms; i++) {
319 chan = &csi[row].channels[chn];
320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321 pos[0], pos[1], pos[2]);
322 dimm->mci = mci;
323
324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 pos[0], pos[1], pos[2], row, chn);
327
328 /* Copy DIMM location */
329 for (j = 0; j < n_layers; j++)
330 dimm->location[j] = pos[j];
331
332 /* Link it to the csrows old API data */
333 chan->dimm = dimm;
334 dimm->csrow = row;
335 dimm->cschannel = chn;
336
337 /* Increment csrow location */
338 row++;
339 if (row == tot_csrows) {
340 row = 0;
341 chn++;
342 }
343
344 /* Increment dimm location */
345 for (j = n_layers - 1; j >= 0; j--) {
346 pos[j]++;
347 if (pos[j] < layers[j].size)
348 break;
349 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800350 }
351 }
352
Dave Jiang81d87cb2007-07-19 01:49:52 -0700353 mci->op_state = OP_ALLOC;
Mauro Carvalho Chehab6fe11082010-08-12 00:30:25 -0300354 INIT_LIST_HEAD(&mci->grp_kobj_list);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700355
Doug Thompson8096cfa2007-07-19 01:50:27 -0700356 /*
357 * Initialize the 'root' kobj for the edac_mc controller
358 */
359 err = edac_mc_register_sysfs_main_kobj(mci);
360 if (err) {
361 kfree(mci);
362 return NULL;
363 }
364
365 /* at this point, the root kobj is valid, and in order to
366 * 'free' the object, then the function:
367 * edac_mc_unregister_sysfs_main_kobj() must be called
368 * which will perform kobj unregistration and the actual free
369 * will occur during the kobject callback operation
370 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800371 return mci;
372}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300373EXPORT_SYMBOL_GPL(new_edac_mc_alloc);
374
375/**
376 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
377 * @mc_num: Memory controller number
378 * @n_layers: Number of layers at the MC hierarchy
379 * layers: Describes each layer as seen by the Memory Controller
380 * @size_pvt: Size of private storage needed
381 *
382 *
383 * FIXME: drivers handle multi-rank memories in different ways: some
384 * drivers map multi-ranked DIMMs as one DIMM while others
385 * as several DIMMs.
386 *
387 * Everything is kmalloc'ed as one big chunk - more efficient.
388 * It can only be used if all structures have the same lifetime - otherwise
389 * you have to allocate and initialize your own structures.
390 *
391 * Use edac_mc_free() to free mc structures allocated by this function.
392 *
393 * Returns:
394 * On failure: NULL
395 * On success: struct mem_ctl_info pointer
396 */
397
398struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
399 unsigned nr_chans, int mc_num)
400{
401 unsigned n_layers = 2;
402 struct edac_mc_layer layers[n_layers];
403
404 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
405 layers[0].size = nr_csrows;
406 layers[0].is_virt_csrow = true;
407 layers[1].type = EDAC_MC_LAYER_CHANNEL;
408 layers[1].size = nr_chans;
409 layers[1].is_virt_csrow = false;
410
411 return new_edac_mc_alloc(mc_num, ARRAY_SIZE(layers), layers, sz_pvt);
412}
Dave Peterson91105402006-03-26 01:38:55 -0800413EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800414
Alan Coxda9bb1d2006-01-18 17:44:13 -0800415/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700416 * edac_mc_free
417 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800418 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800419 */
420void edac_mc_free(struct mem_ctl_info *mci)
421{
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300422 debugf1("%s()\n", __func__);
423
Doug Thompson8096cfa2007-07-19 01:50:27 -0700424 edac_mc_unregister_sysfs_main_kobj(mci);
Mauro Carvalho Chehabaccf74f2010-08-16 18:34:37 -0300425
426 /* free the mci instance memory here */
427 kfree(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800428}
Dave Peterson91105402006-03-26 01:38:55 -0800429EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800430
Doug Thompsonbce19682007-07-26 10:41:14 -0700431
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300432/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700433 * find_mci_by_dev
434 *
435 * scan list of controllers looking for the one that manages
436 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300437 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700438 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300439struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800440{
441 struct mem_ctl_info *mci;
442 struct list_head *item;
443
Dave Peterson537fba22006-03-26 01:38:40 -0800444 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800445
446 list_for_each(item, &mc_devices) {
447 mci = list_entry(item, struct mem_ctl_info, link);
448
Doug Thompson37f04582006-06-30 01:56:07 -0700449 if (mci->dev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800450 return mci;
451 }
452
453 return NULL;
454}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300455EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800456
Dave Jiang81d87cb2007-07-19 01:49:52 -0700457/*
458 * handler for EDAC to check if NMI type handler has asserted interrupt
459 */
460static int edac_mc_assert_error_check_and_clear(void)
461{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700462 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700463
Douglas Thompson079708b2007-07-19 01:49:58 -0700464 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700465 return 1;
466
Dave Jiang66ee2f92007-07-19 01:49:54 -0700467 old_state = edac_err_assert;
468 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700469
Dave Jiang66ee2f92007-07-19 01:49:54 -0700470 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700471}
472
473/*
474 * edac_mc_workq_function
475 * performs the operation scheduled by a workq request
476 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700477static void edac_mc_workq_function(struct work_struct *work_req)
478{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700479 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700480 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700481
482 mutex_lock(&mem_ctls_mutex);
483
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700484 /* if this control struct has movd to offline state, we are done */
485 if (mci->op_state == OP_OFFLINE) {
486 mutex_unlock(&mem_ctls_mutex);
487 return;
488 }
489
Dave Jiang81d87cb2007-07-19 01:49:52 -0700490 /* Only poll controllers that are running polled and have a check */
491 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
492 mci->edac_check(mci);
493
Dave Jiang81d87cb2007-07-19 01:49:52 -0700494 mutex_unlock(&mem_ctls_mutex);
495
496 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700497 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700498 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700499}
500
501/*
502 * edac_mc_workq_setup
503 * initialize a workq item for this mci
504 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700505 *
506 * locking model:
507 *
508 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700509 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700510static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700511{
512 debugf0("%s()\n", __func__);
513
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700514 /* if this instance is not in the POLL state, then simply return */
515 if (mci->op_state != OP_RUNNING_POLL)
516 return;
517
Dave Jiang81d87cb2007-07-19 01:49:52 -0700518 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700519 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
520}
521
522/*
523 * edac_mc_workq_teardown
524 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700525 *
526 * locking model:
527 *
528 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700529 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700530static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700531{
532 int status;
533
Borislav Petkov00740c52010-09-26 12:42:23 +0200534 if (mci->op_state != OP_RUNNING_POLL)
535 return;
536
Doug Thompsonbce19682007-07-26 10:41:14 -0700537 status = cancel_delayed_work(&mci->work);
538 if (status == 0) {
539 debugf0("%s() not canceled, flush the queue\n",
540 __func__);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700541
Doug Thompsonbce19682007-07-26 10:41:14 -0700542 /* workq instance might be running, wait for it */
543 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700544 }
545}
546
547/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700548 * edac_mc_reset_delay_period(unsigned long value)
549 *
550 * user space has updated our poll period value, need to
551 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700552 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700553void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700554{
Doug Thompsonbce19682007-07-26 10:41:14 -0700555 struct mem_ctl_info *mci;
556 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700557
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700558 mutex_lock(&mem_ctls_mutex);
559
Doug Thompsonbce19682007-07-26 10:41:14 -0700560 /* scan the list and turn off all workq timers, doing so under lock
561 */
562 list_for_each(item, &mc_devices) {
563 mci = list_entry(item, struct mem_ctl_info, link);
564
565 if (mci->op_state == OP_RUNNING_POLL)
566 cancel_delayed_work(&mci->work);
567 }
568
569 mutex_unlock(&mem_ctls_mutex);
570
571
572 /* re-walk the list, and reset the poll delay */
573 mutex_lock(&mem_ctls_mutex);
574
575 list_for_each(item, &mc_devices) {
576 mci = list_entry(item, struct mem_ctl_info, link);
577
578 edac_mc_workq_setup(mci, (unsigned long) value);
579 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700580
581 mutex_unlock(&mem_ctls_mutex);
582}
583
Doug Thompsonbce19682007-07-26 10:41:14 -0700584
585
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700586/* Return 0 on success, 1 on failure.
587 * Before calling this function, caller must
588 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700589 *
590 * locking model:
591 *
592 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700593 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700594static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800595{
596 struct list_head *item, *insert_before;
597 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800598
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700599 insert_before = &mc_devices;
600
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700601 p = find_mci_by_dev(mci->dev);
602 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700603 goto fail0;
604
605 list_for_each(item, &mc_devices) {
606 p = list_entry(item, struct mem_ctl_info, link);
607
608 if (p->mc_idx >= mci->mc_idx) {
609 if (unlikely(p->mc_idx == mci->mc_idx))
610 goto fail1;
611
612 insert_before = item;
613 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800614 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800615 }
616
617 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700618 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800619 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700620
Douglas Thompson052dfb42007-07-19 01:50:13 -0700621fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700622 edac_printk(KERN_WARNING, EDAC_MC,
Kay Sievers281efb12009-01-06 14:42:57 -0800623 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000624 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700625 return 1;
626
Douglas Thompson052dfb42007-07-19 01:50:13 -0700627fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700628 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700629 "bug in low-level driver: attempt to assign\n"
630 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700631 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800632}
633
Dave Petersone7ecd892006-03-26 01:38:52 -0800634static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800635{
Dave Jiangc0d12172007-07-19 01:49:46 -0700636 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800637 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700638
639 /* these are for safe removal of devices from global list while
640 * NMI handlers may be traversing list
641 */
642 synchronize_rcu();
643 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800644}
645
Alan Coxda9bb1d2006-01-18 17:44:13 -0800646/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700647 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
648 *
649 * If found, return a pointer to the structure.
650 * Else return NULL.
651 *
652 * Caller must hold mem_ctls_mutex.
653 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700654struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700655{
656 struct list_head *item;
657 struct mem_ctl_info *mci;
658
659 list_for_each(item, &mc_devices) {
660 mci = list_entry(item, struct mem_ctl_info, link);
661
662 if (mci->mc_idx >= idx) {
663 if (mci->mc_idx == idx)
664 return mci;
665
666 break;
667 }
668 }
669
670 return NULL;
671}
672EXPORT_SYMBOL(edac_mc_find);
673
674/**
Dave Peterson472678e2006-03-26 01:38:49 -0800675 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
676 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800677 * @mci: pointer to the mci structure to be added to the list
678 *
679 * Return:
680 * 0 Success
681 * !0 Failure
682 */
683
684/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700685int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800686{
Dave Peterson537fba22006-03-26 01:38:40 -0800687 debugf0("%s()\n", __func__);
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700688
Alan Coxda9bb1d2006-01-18 17:44:13 -0800689#ifdef CONFIG_EDAC_DEBUG
690 if (edac_debug_level >= 3)
691 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800692
Alan Coxda9bb1d2006-01-18 17:44:13 -0800693 if (edac_debug_level >= 4) {
694 int i;
695
696 for (i = 0; i < mci->nr_csrows; i++) {
697 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800698
Alan Coxda9bb1d2006-01-18 17:44:13 -0800699 edac_mc_dump_csrow(&mci->csrows[i]);
700 for (j = 0; j < mci->csrows[i].nr_channels; j++)
Douglas Thompson079708b2007-07-19 01:49:58 -0700701 edac_mc_dump_channel(&mci->csrows[i].
Douglas Thompson052dfb42007-07-19 01:50:13 -0700702 channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800703 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300704 for (i = 0; i < mci->tot_dimms; i++)
705 edac_mc_dump_dimm(&mci->dimms[i]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800706 }
707#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700708 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800709
710 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800711 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800712
713 /* set load time so that error rate can be tracked */
714 mci->start_time = jiffies;
715
eric wollesen9794f332007-02-12 00:53:08 -0800716 if (edac_create_sysfs_mci_device(mci)) {
717 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700718 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800719 goto fail1;
720 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800721
Dave Jiang81d87cb2007-07-19 01:49:52 -0700722 /* If there IS a check routine, then we are running POLLED */
723 if (mci->edac_check != NULL) {
724 /* This instance is NOW RUNNING */
725 mci->op_state = OP_RUNNING_POLL;
726
727 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
728 } else {
729 mci->op_state = OP_RUNNING_INTERRUPT;
730 }
731
Alan Coxda9bb1d2006-01-18 17:44:13 -0800732 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700733 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000734 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800735
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700736 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800737 return 0;
738
Douglas Thompson052dfb42007-07-19 01:50:13 -0700739fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800740 del_mc_from_global_list(mci);
741
Douglas Thompson052dfb42007-07-19 01:50:13 -0700742fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700743 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800744 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800745}
Dave Peterson91105402006-03-26 01:38:55 -0800746EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800747
Alan Coxda9bb1d2006-01-18 17:44:13 -0800748/**
Dave Peterson472678e2006-03-26 01:38:49 -0800749 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
750 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700751 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800752 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800753 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800754 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700755struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800756{
Dave Peterson18dbc332006-03-26 01:38:50 -0800757 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800758
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700759 debugf0("%s()\n", __func__);
760
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700761 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800762
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700763 /* find the requested mci struct in the global list */
764 mci = find_mci_by_dev(dev);
765 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700766 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800767 return NULL;
768 }
769
Alan Coxda9bb1d2006-01-18 17:44:13 -0800770 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700771 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700772
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100773 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700774 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100775
776 /* marking MCI offline */
777 mci->op_state = OP_OFFLINE;
778
779 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700780 edac_remove_sysfs_mci_device(mci);
781
Dave Peterson537fba22006-03-26 01:38:40 -0800782 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700783 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000784 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700785
Dave Peterson18dbc332006-03-26 01:38:50 -0800786 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800787}
Dave Peterson91105402006-03-26 01:38:55 -0800788EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800789
Adrian Bunk2da1c112007-07-19 01:49:32 -0700790static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
791 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800792{
793 struct page *pg;
794 void *virt_addr;
795 unsigned long flags = 0;
796
Dave Peterson537fba22006-03-26 01:38:40 -0800797 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800798
799 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700800 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800801 return;
802
803 /* Find the actual page structure then map it and fix */
804 pg = pfn_to_page(page);
805
806 if (PageHighMem(pg))
807 local_irq_save(flags);
808
Cong Wang4e5df7c2011-11-25 23:14:19 +0800809 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800810
811 /* Perform architecture specific atomic scrub operation */
812 atomic_scrub(virt_addr + offset, size);
813
814 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800815 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800816
817 if (PageHighMem(pg))
818 local_irq_restore(flags);
819}
820
Alan Coxda9bb1d2006-01-18 17:44:13 -0800821/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800822int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800823{
824 struct csrow_info *csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300825 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800826
Dave Peterson537fba22006-03-26 01:38:40 -0800827 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800828 row = -1;
829
830 for (i = 0; i < mci->nr_csrows; i++) {
831 struct csrow_info *csrow = &csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300832 n = 0;
833 for (j = 0; j < csrow->nr_channels; j++) {
834 struct dimm_info *dimm = csrow->channels[j].dimm;
835 n += dimm->nr_pages;
836 }
837 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800838 continue;
839
Dave Peterson537fba22006-03-26 01:38:40 -0800840 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
841 "mask(0x%lx)\n", mci->mc_idx, __func__,
842 csrow->first_page, page, csrow->last_page,
843 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800844
845 if ((page >= csrow->first_page) &&
846 (page <= csrow->last_page) &&
847 ((page & csrow->page_mask) ==
848 (csrow->first_page & csrow->page_mask))) {
849 row = i;
850 break;
851 }
852 }
853
854 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800855 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700856 "could not look up page error address %lx\n",
857 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800858
859 return row;
860}
Dave Peterson91105402006-03-26 01:38:55 -0800861EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800862
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300863const char *edac_layer_name[] = {
864 [EDAC_MC_LAYER_BRANCH] = "branch",
865 [EDAC_MC_LAYER_CHANNEL] = "channel",
866 [EDAC_MC_LAYER_SLOT] = "slot",
867 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
868};
869EXPORT_SYMBOL_GPL(edac_layer_name);
870
871static void edac_inc_ce_error(struct mem_ctl_info *mci,
872 bool enable_per_layer_report,
873 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800874{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300875 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800876
877 mci->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300878
879 if (!enable_per_layer_report) {
880 mci->ce_noinfo_count++;
881 return;
882 }
883
884 for (i = 0; i < mci->n_layers; i++) {
885 if (pos[i] < 0)
886 break;
887 index += pos[i];
888 mci->ce_per_layer[i][index]++;
889
890 if (i < mci->n_layers - 1)
891 index *= mci->layers[i + 1].size;
892 }
893}
894
895static void edac_inc_ue_error(struct mem_ctl_info *mci,
896 bool enable_per_layer_report,
897 const int pos[EDAC_MAX_LAYERS])
898{
899 int i, index = 0;
900
901 mci->ue_count++;
902
903 if (!enable_per_layer_report) {
904 mci->ce_noinfo_count++;
905 return;
906 }
907
908 for (i = 0; i < mci->n_layers; i++) {
909 if (pos[i] < 0)
910 break;
911 index += pos[i];
912 mci->ue_per_layer[i][index]++;
913
914 if (i < mci->n_layers - 1)
915 index *= mci->layers[i + 1].size;
916 }
917}
918
919static void edac_ce_error(struct mem_ctl_info *mci,
920 const int pos[EDAC_MAX_LAYERS],
921 const char *msg,
922 const char *location,
923 const char *label,
924 const char *detail,
925 const char *other_detail,
926 const bool enable_per_layer_report,
927 const unsigned long page_frame_number,
928 const unsigned long offset_in_page,
929 u32 grain)
930{
931 unsigned long remapped_page;
932
933 if (edac_mc_get_log_ce()) {
934 if (other_detail && *other_detail)
935 edac_mc_printk(mci, KERN_WARNING,
936 "CE %s on %s (%s%s - %s)\n",
937 msg, label, location,
938 detail, other_detail);
939 else
940 edac_mc_printk(mci, KERN_WARNING,
941 "CE %s on %s (%s%s)\n",
942 msg, label, location,
943 detail);
944 }
945 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800946
947 if (mci->scrub_mode & SCRUB_SW_SRC) {
948 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300949 * Some memory controllers (called MCs below) can remap
950 * memory so that it is still available at a different
951 * address when PCI devices map into memory.
952 * MC's that can't do this, lose the memory where PCI
953 * devices are mapped. This mapping is MC-dependent
954 * and so we call back into the MC driver for it to
955 * map the MC page to a physical (CPU) page which can
956 * then be mapped to a virtual page - which can then
957 * be scrubbed.
958 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800959 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700960 mci->ctl_page_to_phys(mci, page_frame_number) :
961 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800962
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300963 edac_mc_scrub_block(remapped_page,
964 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800965 }
966}
967
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300968static void edac_ue_error(struct mem_ctl_info *mci,
969 const int pos[EDAC_MAX_LAYERS],
970 const char *msg,
971 const char *location,
972 const char *label,
973 const char *detail,
974 const char *other_detail,
975 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800976{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300977 if (edac_mc_get_log_ue()) {
978 if (other_detail && *other_detail)
979 edac_mc_printk(mci, KERN_WARNING,
980 "UE %s on %s (%s%s - %s)\n",
981 msg, label, location, detail,
982 other_detail);
983 else
984 edac_mc_printk(mci, KERN_WARNING,
985 "UE %s on %s (%s%s)\n",
986 msg, label, location, detail);
987 }
Dave Petersone7ecd892006-03-26 01:38:52 -0800988
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300989 if (edac_mc_get_panic_on_ue()) {
990 if (other_detail && *other_detail)
991 panic("UE %s on %s (%s%s - %s)\n",
992 msg, label, location, detail, other_detail);
993 else
994 panic("UE %s on %s (%s%s)\n",
995 msg, label, location, detail);
996 }
997
998 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800999}
1000
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001001#define OTHER_LABEL " or "
1002void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1003 struct mem_ctl_info *mci,
1004 const unsigned long page_frame_number,
1005 const unsigned long offset_in_page,
1006 const unsigned long syndrome,
1007 const int layer0,
1008 const int layer1,
1009 const int layer2,
1010 const char *msg,
1011 const char *other_detail,
1012 const void *mcelog)
Alan Coxda9bb1d2006-01-18 17:44:13 -08001013{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001014 /* FIXME: too much for stack: move it to some pre-alocated area */
1015 char detail[80], location[80];
1016 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1017 char *p;
1018 int row = -1, chan = -1;
1019 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
1020 int i;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -03001021 u32 grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001022 bool enable_per_layer_report = false;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001023
Dave Peterson537fba22006-03-26 01:38:40 -08001024 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001025
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001026 /*
1027 * Check if the event report is consistent and if the memory
1028 * location is known. If it is known, enable_per_layer_report will be
1029 * true, the DIMM(s) label info will be filled and the per-layer
1030 * error counters will be incremented.
1031 */
1032 for (i = 0; i < mci->n_layers; i++) {
1033 if (pos[i] >= (int)mci->layers[i].size) {
1034 if (type == HW_EVENT_ERR_CORRECTED)
1035 p = "CE";
1036 else
1037 p = "UE";
1038
1039 edac_mc_printk(mci, KERN_ERR,
1040 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1041 edac_layer_name[mci->layers[i].type],
1042 pos[i], mci->layers[i].size);
1043 /*
1044 * Instead of just returning it, let's use what's
1045 * known about the error. The increment routines and
1046 * the DIMM filter logic will do the right thing by
1047 * pointing the likely damaged DIMMs.
1048 */
1049 pos[i] = -1;
1050 }
1051 if (pos[i] >= 0)
1052 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001053 }
1054
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001055 /*
1056 * Get the dimm label/grain that applies to the match criteria.
1057 * As the error algorithm may not be able to point to just one memory
1058 * stick, the logic here will get all possible labels that could
1059 * pottentially be affected by the error.
1060 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1061 * to have only the MC channel and the MC dimm (also called "branch")
1062 * but the channel is not known, as the memory is arranged in pairs,
1063 * where each memory belongs to a separate channel within the same
1064 * branch.
1065 */
1066 grain = 0;
1067 p = label;
1068 *p = '\0';
1069 for (i = 0; i < mci->tot_dimms; i++) {
1070 struct dimm_info *dimm = &mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001071
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001072 if (layer0 >= 0 && layer0 != dimm->location[0])
1073 continue;
1074 if (layer1 >= 0 && layer1 != dimm->location[1])
1075 continue;
1076 if (layer2 >= 0 && layer2 != dimm->location[2])
1077 continue;
1078
1079 /* get the max grain, over the error match range */
1080 if (dimm->grain > grain)
1081 grain = dimm->grain;
1082
1083 /*
1084 * If the error is memory-controller wide, there's no need to
1085 * seek for the affected DIMMs because the whole
1086 * channel/memory controller/... may be affected.
1087 * Also, don't show errors for empty DIMM slots.
1088 */
1089 if (enable_per_layer_report && dimm->nr_pages) {
1090 if (p != label) {
1091 strcpy(p, OTHER_LABEL);
1092 p += strlen(OTHER_LABEL);
1093 }
1094 strcpy(p, dimm->label);
1095 p += strlen(p);
1096 *p = '\0';
1097
1098 /*
1099 * get csrow/channel of the DIMM, in order to allow
1100 * incrementing the compat API counters
1101 */
1102 debugf4("%s: %s csrows map: (%d,%d)\n",
1103 __func__,
1104 mci->mem_is_per_rank ? "rank" : "dimm",
1105 dimm->csrow, dimm->cschannel);
1106
1107 if (row == -1)
1108 row = dimm->csrow;
1109 else if (row >= 0 && row != dimm->csrow)
1110 row = -2;
1111
1112 if (chan == -1)
1113 chan = dimm->cschannel;
1114 else if (chan >= 0 && chan != dimm->cschannel)
1115 chan = -2;
1116 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001117 }
1118
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001119 if (!enable_per_layer_report) {
1120 strcpy(label, "any memory");
1121 } else {
1122 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1123 __func__, row, chan);
1124 if (p == label)
1125 strcpy(label, "unknown memory");
1126 if (type == HW_EVENT_ERR_CORRECTED) {
1127 if (row >= 0) {
1128 mci->csrows[row].ce_count++;
1129 if (chan >= 0)
1130 mci->csrows[row].channels[chan].ce_count++;
1131 }
1132 } else
1133 if (row >= 0)
1134 mci->csrows[row].ue_count++;
1135 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001136
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001137 /* Fill the RAM location data */
1138 p = location;
1139 for (i = 0; i < mci->n_layers; i++) {
1140 if (pos[i] < 0)
1141 continue;
1142
1143 p += sprintf(p, "%s:%d ",
1144 edac_layer_name[mci->layers[i].type],
1145 pos[i]);
1146 }
1147
1148 /* Memory type dependent details about the error */
1149 if (type == HW_EVENT_ERR_CORRECTED) {
1150 snprintf(detail, sizeof(detail),
1151 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001152 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001153 grain, syndrome);
1154 edac_ce_error(mci, pos, msg, location, label, detail,
1155 other_detail, enable_per_layer_report,
1156 page_frame_number, offset_in_page, grain);
1157 } else {
1158 snprintf(detail, sizeof(detail),
1159 "page:0x%lx offset:0x%lx grain:%d",
1160 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001161
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001162 edac_ue_error(mci, pos, msg, location, label, detail,
1163 other_detail, enable_per_layer_report);
1164 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001165}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001166EXPORT_SYMBOL_GPL(edac_mc_handle_error);