blob: 06028de5fe1be4235e2152e08f0f9ef3b50f5d22 [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080030#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070033#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070034#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080035
Alan Coxda9bb1d2006-01-18 17:44:13 -080036/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070037static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070038static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080039
Alan Coxda9bb1d2006-01-18 17:44:13 -080040#ifdef CONFIG_EDAC_DEBUG
41
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030042static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080043{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -080046 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030047 debugf4("\tchannel->dimm = %p\n", chan->dimm);
48}
49
50static void edac_mc_dump_dimm(struct dimm_info *dimm)
51{
52 int i;
53
54 debugf4("\tdimm = %p\n", dimm);
55 debugf4("\tdimm->label = '%s'\n", dimm->label);
56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57 debugf4("\tdimm location ");
58 for (i = 0; i < dimm->mci->n_layers; i++) {
59 printk(KERN_CONT "%d", dimm->location[i]);
60 if (i < dimm->mci->n_layers - 1)
61 printk(KERN_CONT ".");
62 }
63 printk(KERN_CONT "\n");
64 debugf4("\tdimm->grain = %d\n", dimm->grain);
65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080066}
67
Adrian Bunk2da1c112007-07-19 01:49:32 -070068static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080069{
70 debugf4("\tcsrow = %p\n", csrow);
71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
Douglas Thompson079708b2007-07-19 01:49:58 -070072 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -080073 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
Douglas Thompson079708b2007-07-19 01:49:58 -070075 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
Alan Coxda9bb1d2006-01-18 17:44:13 -080076 debugf4("\tcsrow->channels = %p\n", csrow->channels);
77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78}
79
Adrian Bunk2da1c112007-07-19 01:49:32 -070080static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -080081{
82 debugf3("\tmci = %p\n", mci);
83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88 mci->nr_csrows, mci->csrows);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030089 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90 mci->tot_dimms, mci->dimms);
Doug Thompson37f04582006-06-30 01:56:07 -070091 debugf3("\tdev = %p\n", mci->dev);
Douglas Thompson079708b2007-07-19 01:49:58 -070092 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
Alan Coxda9bb1d2006-01-18 17:44:13 -080093 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94}
95
Borislav Petkov24f9a7f2010-10-07 18:29:15 +020096#endif /* CONFIG_EDAC_DEBUG */
97
Borislav Petkov239642f2009-11-12 15:33:16 +010098/*
99 * keep those in sync with the enum mem_type
100 */
101const char *edac_mem_types[] = {
102 "Empty csrow",
103 "Reserved csrow type",
104 "Unknown csrow type",
105 "Fast page mode RAM",
106 "Extended data out RAM",
107 "Burst Extended data out RAM",
108 "Single data rate SDRAM",
109 "Registered single data rate SDRAM",
110 "Double data rate SDRAM",
111 "Registered Double data rate SDRAM",
112 "Rambus DRAM",
113 "Unbuffered DDR2 RAM",
114 "Fully buffered DDR2",
115 "Registered DDR2 RAM",
116 "Rambus XDR",
117 "Unbuffered DDR3 RAM",
118 "Registered DDR3 RAM",
119};
120EXPORT_SYMBOL_GPL(edac_mem_types);
121
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300122/**
123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124 * @p: pointer to a pointer with the memory offset to be used. At
125 * return, this will be incremented to point to the next offset
126 * @size: Size of the data structure to be reserved
127 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800128 *
129 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300130 * down to either a no-op or the addition of a constant to the value of '*p'.
131 *
132 * The 'p' pointer is absolutely needed to keep the proper advancing
133 * further in memory to the proper offsets when allocating the struct along
134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
135 * above, for example.
136 *
137 * At return, the pointer 'p' will be incremented to be used on a next call
138 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800139 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300140void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800141{
142 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300143 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800144
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300145 *p += size * n_elems;
146
147 /*
148 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149 * 'size'. Adjust 'p' so that its alignment is at least as
150 * stringent as what the compiler would provide for X and return
151 * the aligned result.
152 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800153 * stringent alignment that the compiler will ever provide by default.
154 * As far as I know, this is a reasonable assumption.
155 */
156 if (size > sizeof(long))
157 align = sizeof(long long);
158 else if (size > sizeof(int))
159 align = sizeof(long);
160 else if (size > sizeof(short))
161 align = sizeof(int);
162 else if (size > sizeof(char))
163 align = sizeof(short);
164 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700165 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800166
167 r = size % align;
168
169 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700170 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800171
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300172 *p += align - r;
173
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700174 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800175}
176
Alan Coxda9bb1d2006-01-18 17:44:13 -0800177/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179 * @mc_num: Memory controller number
180 * @n_layers: Number of MC hierarchy layers
181 * layers: Describes each layer as seen by the Memory Controller
182 * @size_pvt: size of private storage needed
183 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800184 *
185 * Everything is kmalloc'ed as one big chunk - more efficient.
186 * Only can be used if all structures have the same lifetime - otherwise
187 * you have to allocate and initialize your own structures.
188 *
189 * Use edac_mc_free() to free mc structures allocated by this function.
190 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300191 * NOTE: drivers handle multi-rank memories in different ways: in some
192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
193 * others, a single multi-rank memory stick would be mapped into several
194 * entries. Currently, this function will allocate multiple struct dimm_info
195 * on such scenarios, as grouping the multiple ranks require drivers change.
196 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800197 * Returns:
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300198 * On failure: NULL
199 * On success: struct mem_ctl_info pointer
Alan Coxda9bb1d2006-01-18 17:44:13 -0800200 */
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300201struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
202 unsigned n_layers,
203 struct edac_mc_layer *layers,
204 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800205{
206 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300207 struct edac_mc_layer *layer;
208 struct csrow_info *csi, *csr;
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300209 struct rank_info *chi, *chp, *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300210 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 unsigned pos[EDAC_MAX_LAYERS];
213 void *pvt, *ptr = NULL;
214 unsigned size, tot_dimms = 1, count = 1;
215 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
216 int i, j, err, row, chn;
217 bool per_rank = false;
218
219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 /*
221 * Calculate the total amount of dimms and csrows/cschannels while
222 * in the old API emulation mode
223 */
224 for (i = 0; i < n_layers; i++) {
225 tot_dimms *= layers[i].size;
226 if (layers[i].is_virt_csrow)
227 tot_csrows *= layers[i].size;
228 else
229 tot_channels *= layers[i].size;
230
231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232 per_rank = true;
233 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800234
235 /* Figure out the offsets of the various items from the start of an mc
236 * structure. We want the alignment of each item to be at least as
237 * stringent as what the compiler would provide if we could simply
238 * hardcode everything into a single struct.
239 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 for (i = 0; i < n_layers; i++) {
246 count *= layers[i].size;
247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 tot_errcount += 2 * count;
251 }
252
253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300254 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700255 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800256
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 __func__, size,
259 tot_dimms,
260 per_rank ? "ranks" : "dimms",
261 tot_csrows * tot_channels);
Doug Thompson8096cfa2007-07-19 01:50:27 -0700262 mci = kzalloc(size, GFP_KERNEL);
263 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800264 return NULL;
265
266 /* Adjust pointers so they point within the memory we just allocated
267 * rather than an imaginary chunk of memory located at address 0.
268 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Douglas Thompson079708b2007-07-19 01:49:58 -0700270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300273 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800278
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700279 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300280 mci->mc_idx = mc_num;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800281 mci->csrows = csi;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300282 mci->dimms = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300283 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800284 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300285 mci->n_layers = n_layers;
286 mci->layers = layer;
287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288 mci->nr_csrows = tot_csrows;
289 mci->num_cschannel = tot_channels;
290 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800291
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300292 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300293 * Fill the csrow struct
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300294 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300295 for (row = 0; row < tot_csrows; row++) {
296 csr = &csi[row];
297 csr->csrow_idx = row;
298 csr->mci = mci;
299 csr->nr_channels = tot_channels;
300 chp = &chi[row * tot_channels];
301 csr->channels = chp;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300302
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300303 for (chn = 0; chn < tot_channels; chn++) {
Alan Coxda9bb1d2006-01-18 17:44:13 -0800304 chan = &chp[chn];
305 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300306 chan->csrow = csr;
307 }
308 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300309
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300310 /*
311 * Fill the dimm struct
312 */
313 memset(&pos, 0, sizeof(pos));
314 row = 0;
315 chn = 0;
316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 per_rank ? "ranks" : "dimms");
318 for (i = 0; i < tot_dimms; i++) {
319 chan = &csi[row].channels[chn];
320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321 pos[0], pos[1], pos[2]);
322 dimm->mci = mci;
323
324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 pos[0], pos[1], pos[2], row, chn);
327
328 /* Copy DIMM location */
329 for (j = 0; j < n_layers; j++)
330 dimm->location[j] = pos[j];
331
332 /* Link it to the csrows old API data */
333 chan->dimm = dimm;
334 dimm->csrow = row;
335 dimm->cschannel = chn;
336
337 /* Increment csrow location */
338 row++;
339 if (row == tot_csrows) {
340 row = 0;
341 chn++;
342 }
343
344 /* Increment dimm location */
345 for (j = n_layers - 1; j >= 0; j--) {
346 pos[j]++;
347 if (pos[j] < layers[j].size)
348 break;
349 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800350 }
351 }
352
Dave Jiang81d87cb2007-07-19 01:49:52 -0700353 mci->op_state = OP_ALLOC;
Mauro Carvalho Chehab6fe11082010-08-12 00:30:25 -0300354 INIT_LIST_HEAD(&mci->grp_kobj_list);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700355
Doug Thompson8096cfa2007-07-19 01:50:27 -0700356 /*
357 * Initialize the 'root' kobj for the edac_mc controller
358 */
359 err = edac_mc_register_sysfs_main_kobj(mci);
360 if (err) {
361 kfree(mci);
362 return NULL;
363 }
364
365 /* at this point, the root kobj is valid, and in order to
366 * 'free' the object, then the function:
367 * edac_mc_unregister_sysfs_main_kobj() must be called
368 * which will perform kobj unregistration and the actual free
369 * will occur during the kobject callback operation
370 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800371 return mci;
372}
Dave Peterson91105402006-03-26 01:38:55 -0800373EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800374
Alan Coxda9bb1d2006-01-18 17:44:13 -0800375/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700376 * edac_mc_free
377 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800378 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800379 */
380void edac_mc_free(struct mem_ctl_info *mci)
381{
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300382 debugf1("%s()\n", __func__);
383
Doug Thompson8096cfa2007-07-19 01:50:27 -0700384 edac_mc_unregister_sysfs_main_kobj(mci);
Mauro Carvalho Chehabaccf74f2010-08-16 18:34:37 -0300385
386 /* free the mci instance memory here */
387 kfree(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800388}
Dave Peterson91105402006-03-26 01:38:55 -0800389EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800390
Doug Thompsonbce19682007-07-26 10:41:14 -0700391
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300392/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700393 * find_mci_by_dev
394 *
395 * scan list of controllers looking for the one that manages
396 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300397 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700398 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300399struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800400{
401 struct mem_ctl_info *mci;
402 struct list_head *item;
403
Dave Peterson537fba22006-03-26 01:38:40 -0800404 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800405
406 list_for_each(item, &mc_devices) {
407 mci = list_entry(item, struct mem_ctl_info, link);
408
Doug Thompson37f04582006-06-30 01:56:07 -0700409 if (mci->dev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800410 return mci;
411 }
412
413 return NULL;
414}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300415EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800416
Dave Jiang81d87cb2007-07-19 01:49:52 -0700417/*
418 * handler for EDAC to check if NMI type handler has asserted interrupt
419 */
420static int edac_mc_assert_error_check_and_clear(void)
421{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700422 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700423
Douglas Thompson079708b2007-07-19 01:49:58 -0700424 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700425 return 1;
426
Dave Jiang66ee2f92007-07-19 01:49:54 -0700427 old_state = edac_err_assert;
428 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700429
Dave Jiang66ee2f92007-07-19 01:49:54 -0700430 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700431}
432
433/*
434 * edac_mc_workq_function
435 * performs the operation scheduled by a workq request
436 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700437static void edac_mc_workq_function(struct work_struct *work_req)
438{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700439 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700440 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700441
442 mutex_lock(&mem_ctls_mutex);
443
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700444 /* if this control struct has movd to offline state, we are done */
445 if (mci->op_state == OP_OFFLINE) {
446 mutex_unlock(&mem_ctls_mutex);
447 return;
448 }
449
Dave Jiang81d87cb2007-07-19 01:49:52 -0700450 /* Only poll controllers that are running polled and have a check */
451 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
452 mci->edac_check(mci);
453
Dave Jiang81d87cb2007-07-19 01:49:52 -0700454 mutex_unlock(&mem_ctls_mutex);
455
456 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700457 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700458 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700459}
460
461/*
462 * edac_mc_workq_setup
463 * initialize a workq item for this mci
464 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700465 *
466 * locking model:
467 *
468 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700469 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700470static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700471{
472 debugf0("%s()\n", __func__);
473
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700474 /* if this instance is not in the POLL state, then simply return */
475 if (mci->op_state != OP_RUNNING_POLL)
476 return;
477
Dave Jiang81d87cb2007-07-19 01:49:52 -0700478 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700479 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
480}
481
482/*
483 * edac_mc_workq_teardown
484 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700485 *
486 * locking model:
487 *
488 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700489 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700490static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700491{
492 int status;
493
Borislav Petkov00740c52010-09-26 12:42:23 +0200494 if (mci->op_state != OP_RUNNING_POLL)
495 return;
496
Doug Thompsonbce19682007-07-26 10:41:14 -0700497 status = cancel_delayed_work(&mci->work);
498 if (status == 0) {
499 debugf0("%s() not canceled, flush the queue\n",
500 __func__);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700501
Doug Thompsonbce19682007-07-26 10:41:14 -0700502 /* workq instance might be running, wait for it */
503 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700504 }
505}
506
507/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700508 * edac_mc_reset_delay_period(unsigned long value)
509 *
510 * user space has updated our poll period value, need to
511 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700512 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700513void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700514{
Doug Thompsonbce19682007-07-26 10:41:14 -0700515 struct mem_ctl_info *mci;
516 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700517
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700518 mutex_lock(&mem_ctls_mutex);
519
Doug Thompsonbce19682007-07-26 10:41:14 -0700520 /* scan the list and turn off all workq timers, doing so under lock
521 */
522 list_for_each(item, &mc_devices) {
523 mci = list_entry(item, struct mem_ctl_info, link);
524
525 if (mci->op_state == OP_RUNNING_POLL)
526 cancel_delayed_work(&mci->work);
527 }
528
529 mutex_unlock(&mem_ctls_mutex);
530
531
532 /* re-walk the list, and reset the poll delay */
533 mutex_lock(&mem_ctls_mutex);
534
535 list_for_each(item, &mc_devices) {
536 mci = list_entry(item, struct mem_ctl_info, link);
537
538 edac_mc_workq_setup(mci, (unsigned long) value);
539 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700540
541 mutex_unlock(&mem_ctls_mutex);
542}
543
Doug Thompsonbce19682007-07-26 10:41:14 -0700544
545
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700546/* Return 0 on success, 1 on failure.
547 * Before calling this function, caller must
548 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700549 *
550 * locking model:
551 *
552 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700553 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700554static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800555{
556 struct list_head *item, *insert_before;
557 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800558
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700559 insert_before = &mc_devices;
560
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700561 p = find_mci_by_dev(mci->dev);
562 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700563 goto fail0;
564
565 list_for_each(item, &mc_devices) {
566 p = list_entry(item, struct mem_ctl_info, link);
567
568 if (p->mc_idx >= mci->mc_idx) {
569 if (unlikely(p->mc_idx == mci->mc_idx))
570 goto fail1;
571
572 insert_before = item;
573 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800574 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800575 }
576
577 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700578 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800579 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700580
Douglas Thompson052dfb42007-07-19 01:50:13 -0700581fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700582 edac_printk(KERN_WARNING, EDAC_MC,
Kay Sievers281efb12009-01-06 14:42:57 -0800583 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000584 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700585 return 1;
586
Douglas Thompson052dfb42007-07-19 01:50:13 -0700587fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700588 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700589 "bug in low-level driver: attempt to assign\n"
590 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700591 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800592}
593
Dave Petersone7ecd892006-03-26 01:38:52 -0800594static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800595{
Dave Jiangc0d12172007-07-19 01:49:46 -0700596 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800597 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700598
599 /* these are for safe removal of devices from global list while
600 * NMI handlers may be traversing list
601 */
602 synchronize_rcu();
603 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800604}
605
Alan Coxda9bb1d2006-01-18 17:44:13 -0800606/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700607 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
608 *
609 * If found, return a pointer to the structure.
610 * Else return NULL.
611 *
612 * Caller must hold mem_ctls_mutex.
613 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700614struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700615{
616 struct list_head *item;
617 struct mem_ctl_info *mci;
618
619 list_for_each(item, &mc_devices) {
620 mci = list_entry(item, struct mem_ctl_info, link);
621
622 if (mci->mc_idx >= idx) {
623 if (mci->mc_idx == idx)
624 return mci;
625
626 break;
627 }
628 }
629
630 return NULL;
631}
632EXPORT_SYMBOL(edac_mc_find);
633
634/**
Dave Peterson472678e2006-03-26 01:38:49 -0800635 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
636 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800637 * @mci: pointer to the mci structure to be added to the list
638 *
639 * Return:
640 * 0 Success
641 * !0 Failure
642 */
643
644/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700645int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800646{
Dave Peterson537fba22006-03-26 01:38:40 -0800647 debugf0("%s()\n", __func__);
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700648
Alan Coxda9bb1d2006-01-18 17:44:13 -0800649#ifdef CONFIG_EDAC_DEBUG
650 if (edac_debug_level >= 3)
651 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800652
Alan Coxda9bb1d2006-01-18 17:44:13 -0800653 if (edac_debug_level >= 4) {
654 int i;
655
656 for (i = 0; i < mci->nr_csrows; i++) {
657 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800658
Alan Coxda9bb1d2006-01-18 17:44:13 -0800659 edac_mc_dump_csrow(&mci->csrows[i]);
660 for (j = 0; j < mci->csrows[i].nr_channels; j++)
Douglas Thompson079708b2007-07-19 01:49:58 -0700661 edac_mc_dump_channel(&mci->csrows[i].
Douglas Thompson052dfb42007-07-19 01:50:13 -0700662 channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800663 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300664 for (i = 0; i < mci->tot_dimms; i++)
665 edac_mc_dump_dimm(&mci->dimms[i]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800666 }
667#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700668 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800669
670 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800671 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800672
673 /* set load time so that error rate can be tracked */
674 mci->start_time = jiffies;
675
eric wollesen9794f332007-02-12 00:53:08 -0800676 if (edac_create_sysfs_mci_device(mci)) {
677 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700678 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800679 goto fail1;
680 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800681
Dave Jiang81d87cb2007-07-19 01:49:52 -0700682 /* If there IS a check routine, then we are running POLLED */
683 if (mci->edac_check != NULL) {
684 /* This instance is NOW RUNNING */
685 mci->op_state = OP_RUNNING_POLL;
686
687 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
688 } else {
689 mci->op_state = OP_RUNNING_INTERRUPT;
690 }
691
Alan Coxda9bb1d2006-01-18 17:44:13 -0800692 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700693 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000694 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800695
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700696 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800697 return 0;
698
Douglas Thompson052dfb42007-07-19 01:50:13 -0700699fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800700 del_mc_from_global_list(mci);
701
Douglas Thompson052dfb42007-07-19 01:50:13 -0700702fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700703 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800704 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800705}
Dave Peterson91105402006-03-26 01:38:55 -0800706EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800707
Alan Coxda9bb1d2006-01-18 17:44:13 -0800708/**
Dave Peterson472678e2006-03-26 01:38:49 -0800709 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
710 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700711 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800712 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800713 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800714 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700715struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800716{
Dave Peterson18dbc332006-03-26 01:38:50 -0800717 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800718
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700719 debugf0("%s()\n", __func__);
720
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700721 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800722
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700723 /* find the requested mci struct in the global list */
724 mci = find_mci_by_dev(dev);
725 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700726 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800727 return NULL;
728 }
729
Alan Coxda9bb1d2006-01-18 17:44:13 -0800730 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700731 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700732
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100733 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700734 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100735
736 /* marking MCI offline */
737 mci->op_state = OP_OFFLINE;
738
739 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700740 edac_remove_sysfs_mci_device(mci);
741
Dave Peterson537fba22006-03-26 01:38:40 -0800742 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700743 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000744 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700745
Dave Peterson18dbc332006-03-26 01:38:50 -0800746 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800747}
Dave Peterson91105402006-03-26 01:38:55 -0800748EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800749
Adrian Bunk2da1c112007-07-19 01:49:32 -0700750static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
751 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800752{
753 struct page *pg;
754 void *virt_addr;
755 unsigned long flags = 0;
756
Dave Peterson537fba22006-03-26 01:38:40 -0800757 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800758
759 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700760 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800761 return;
762
763 /* Find the actual page structure then map it and fix */
764 pg = pfn_to_page(page);
765
766 if (PageHighMem(pg))
767 local_irq_save(flags);
768
Cong Wang4e5df7c2011-11-25 23:14:19 +0800769 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800770
771 /* Perform architecture specific atomic scrub operation */
772 atomic_scrub(virt_addr + offset, size);
773
774 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800775 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800776
777 if (PageHighMem(pg))
778 local_irq_restore(flags);
779}
780
Alan Coxda9bb1d2006-01-18 17:44:13 -0800781/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800782int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800783{
784 struct csrow_info *csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300785 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800786
Dave Peterson537fba22006-03-26 01:38:40 -0800787 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800788 row = -1;
789
790 for (i = 0; i < mci->nr_csrows; i++) {
791 struct csrow_info *csrow = &csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300792 n = 0;
793 for (j = 0; j < csrow->nr_channels; j++) {
794 struct dimm_info *dimm = csrow->channels[j].dimm;
795 n += dimm->nr_pages;
796 }
797 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800798 continue;
799
Dave Peterson537fba22006-03-26 01:38:40 -0800800 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
801 "mask(0x%lx)\n", mci->mc_idx, __func__,
802 csrow->first_page, page, csrow->last_page,
803 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800804
805 if ((page >= csrow->first_page) &&
806 (page <= csrow->last_page) &&
807 ((page & csrow->page_mask) ==
808 (csrow->first_page & csrow->page_mask))) {
809 row = i;
810 break;
811 }
812 }
813
814 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800815 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700816 "could not look up page error address %lx\n",
817 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800818
819 return row;
820}
Dave Peterson91105402006-03-26 01:38:55 -0800821EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800822
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300823const char *edac_layer_name[] = {
824 [EDAC_MC_LAYER_BRANCH] = "branch",
825 [EDAC_MC_LAYER_CHANNEL] = "channel",
826 [EDAC_MC_LAYER_SLOT] = "slot",
827 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
828};
829EXPORT_SYMBOL_GPL(edac_layer_name);
830
831static void edac_inc_ce_error(struct mem_ctl_info *mci,
832 bool enable_per_layer_report,
833 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800834{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300835 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800836
837 mci->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300838
839 if (!enable_per_layer_report) {
840 mci->ce_noinfo_count++;
841 return;
842 }
843
844 for (i = 0; i < mci->n_layers; i++) {
845 if (pos[i] < 0)
846 break;
847 index += pos[i];
848 mci->ce_per_layer[i][index]++;
849
850 if (i < mci->n_layers - 1)
851 index *= mci->layers[i + 1].size;
852 }
853}
854
855static void edac_inc_ue_error(struct mem_ctl_info *mci,
856 bool enable_per_layer_report,
857 const int pos[EDAC_MAX_LAYERS])
858{
859 int i, index = 0;
860
861 mci->ue_count++;
862
863 if (!enable_per_layer_report) {
864 mci->ce_noinfo_count++;
865 return;
866 }
867
868 for (i = 0; i < mci->n_layers; i++) {
869 if (pos[i] < 0)
870 break;
871 index += pos[i];
872 mci->ue_per_layer[i][index]++;
873
874 if (i < mci->n_layers - 1)
875 index *= mci->layers[i + 1].size;
876 }
877}
878
879static void edac_ce_error(struct mem_ctl_info *mci,
880 const int pos[EDAC_MAX_LAYERS],
881 const char *msg,
882 const char *location,
883 const char *label,
884 const char *detail,
885 const char *other_detail,
886 const bool enable_per_layer_report,
887 const unsigned long page_frame_number,
888 const unsigned long offset_in_page,
889 u32 grain)
890{
891 unsigned long remapped_page;
892
893 if (edac_mc_get_log_ce()) {
894 if (other_detail && *other_detail)
895 edac_mc_printk(mci, KERN_WARNING,
896 "CE %s on %s (%s%s - %s)\n",
897 msg, label, location,
898 detail, other_detail);
899 else
900 edac_mc_printk(mci, KERN_WARNING,
901 "CE %s on %s (%s%s)\n",
902 msg, label, location,
903 detail);
904 }
905 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800906
907 if (mci->scrub_mode & SCRUB_SW_SRC) {
908 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300909 * Some memory controllers (called MCs below) can remap
910 * memory so that it is still available at a different
911 * address when PCI devices map into memory.
912 * MC's that can't do this, lose the memory where PCI
913 * devices are mapped. This mapping is MC-dependent
914 * and so we call back into the MC driver for it to
915 * map the MC page to a physical (CPU) page which can
916 * then be mapped to a virtual page - which can then
917 * be scrubbed.
918 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800919 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700920 mci->ctl_page_to_phys(mci, page_frame_number) :
921 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800922
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300923 edac_mc_scrub_block(remapped_page,
924 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800925 }
926}
927
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300928static void edac_ue_error(struct mem_ctl_info *mci,
929 const int pos[EDAC_MAX_LAYERS],
930 const char *msg,
931 const char *location,
932 const char *label,
933 const char *detail,
934 const char *other_detail,
935 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800936{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300937 if (edac_mc_get_log_ue()) {
938 if (other_detail && *other_detail)
939 edac_mc_printk(mci, KERN_WARNING,
940 "UE %s on %s (%s%s - %s)\n",
941 msg, label, location, detail,
942 other_detail);
943 else
944 edac_mc_printk(mci, KERN_WARNING,
945 "UE %s on %s (%s%s)\n",
946 msg, label, location, detail);
947 }
Dave Petersone7ecd892006-03-26 01:38:52 -0800948
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300949 if (edac_mc_get_panic_on_ue()) {
950 if (other_detail && *other_detail)
951 panic("UE %s on %s (%s%s - %s)\n",
952 msg, label, location, detail, other_detail);
953 else
954 panic("UE %s on %s (%s%s)\n",
955 msg, label, location, detail);
956 }
957
958 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800959}
960
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300961#define OTHER_LABEL " or "
962void edac_mc_handle_error(const enum hw_event_mc_err_type type,
963 struct mem_ctl_info *mci,
964 const unsigned long page_frame_number,
965 const unsigned long offset_in_page,
966 const unsigned long syndrome,
967 const int layer0,
968 const int layer1,
969 const int layer2,
970 const char *msg,
971 const char *other_detail,
972 const void *mcelog)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800973{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300974 /* FIXME: too much for stack: move it to some pre-alocated area */
975 char detail[80], location[80];
976 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
977 char *p;
978 int row = -1, chan = -1;
979 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
980 int i;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300981 u32 grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300982 bool enable_per_layer_report = false;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800983
Dave Peterson537fba22006-03-26 01:38:40 -0800984 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800985
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300986 /*
987 * Check if the event report is consistent and if the memory
988 * location is known. If it is known, enable_per_layer_report will be
989 * true, the DIMM(s) label info will be filled and the per-layer
990 * error counters will be incremented.
991 */
992 for (i = 0; i < mci->n_layers; i++) {
993 if (pos[i] >= (int)mci->layers[i].size) {
994 if (type == HW_EVENT_ERR_CORRECTED)
995 p = "CE";
996 else
997 p = "UE";
998
999 edac_mc_printk(mci, KERN_ERR,
1000 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1001 edac_layer_name[mci->layers[i].type],
1002 pos[i], mci->layers[i].size);
1003 /*
1004 * Instead of just returning it, let's use what's
1005 * known about the error. The increment routines and
1006 * the DIMM filter logic will do the right thing by
1007 * pointing the likely damaged DIMMs.
1008 */
1009 pos[i] = -1;
1010 }
1011 if (pos[i] >= 0)
1012 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001013 }
1014
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001015 /*
1016 * Get the dimm label/grain that applies to the match criteria.
1017 * As the error algorithm may not be able to point to just one memory
1018 * stick, the logic here will get all possible labels that could
1019 * pottentially be affected by the error.
1020 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1021 * to have only the MC channel and the MC dimm (also called "branch")
1022 * but the channel is not known, as the memory is arranged in pairs,
1023 * where each memory belongs to a separate channel within the same
1024 * branch.
1025 */
1026 grain = 0;
1027 p = label;
1028 *p = '\0';
1029 for (i = 0; i < mci->tot_dimms; i++) {
1030 struct dimm_info *dimm = &mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001031
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001032 if (layer0 >= 0 && layer0 != dimm->location[0])
1033 continue;
1034 if (layer1 >= 0 && layer1 != dimm->location[1])
1035 continue;
1036 if (layer2 >= 0 && layer2 != dimm->location[2])
1037 continue;
1038
1039 /* get the max grain, over the error match range */
1040 if (dimm->grain > grain)
1041 grain = dimm->grain;
1042
1043 /*
1044 * If the error is memory-controller wide, there's no need to
1045 * seek for the affected DIMMs because the whole
1046 * channel/memory controller/... may be affected.
1047 * Also, don't show errors for empty DIMM slots.
1048 */
1049 if (enable_per_layer_report && dimm->nr_pages) {
1050 if (p != label) {
1051 strcpy(p, OTHER_LABEL);
1052 p += strlen(OTHER_LABEL);
1053 }
1054 strcpy(p, dimm->label);
1055 p += strlen(p);
1056 *p = '\0';
1057
1058 /*
1059 * get csrow/channel of the DIMM, in order to allow
1060 * incrementing the compat API counters
1061 */
1062 debugf4("%s: %s csrows map: (%d,%d)\n",
1063 __func__,
1064 mci->mem_is_per_rank ? "rank" : "dimm",
1065 dimm->csrow, dimm->cschannel);
1066
1067 if (row == -1)
1068 row = dimm->csrow;
1069 else if (row >= 0 && row != dimm->csrow)
1070 row = -2;
1071
1072 if (chan == -1)
1073 chan = dimm->cschannel;
1074 else if (chan >= 0 && chan != dimm->cschannel)
1075 chan = -2;
1076 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001077 }
1078
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001079 if (!enable_per_layer_report) {
1080 strcpy(label, "any memory");
1081 } else {
1082 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1083 __func__, row, chan);
1084 if (p == label)
1085 strcpy(label, "unknown memory");
1086 if (type == HW_EVENT_ERR_CORRECTED) {
1087 if (row >= 0) {
1088 mci->csrows[row].ce_count++;
1089 if (chan >= 0)
1090 mci->csrows[row].channels[chan].ce_count++;
1091 }
1092 } else
1093 if (row >= 0)
1094 mci->csrows[row].ue_count++;
1095 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001096
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001097 /* Fill the RAM location data */
1098 p = location;
1099 for (i = 0; i < mci->n_layers; i++) {
1100 if (pos[i] < 0)
1101 continue;
1102
1103 p += sprintf(p, "%s:%d ",
1104 edac_layer_name[mci->layers[i].type],
1105 pos[i]);
1106 }
1107
1108 /* Memory type dependent details about the error */
1109 if (type == HW_EVENT_ERR_CORRECTED) {
1110 snprintf(detail, sizeof(detail),
1111 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001112 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001113 grain, syndrome);
1114 edac_ce_error(mci, pos, msg, location, label, detail,
1115 other_detail, enable_per_layer_report,
1116 page_frame_number, offset_in_page, grain);
1117 } else {
1118 snprintf(detail, sizeof(detail),
1119 "page:0x%lx offset:0x%lx grain:%d",
1120 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001121
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001122 edac_ue_error(mci, pos, msg, location, label, detail,
1123 other_detail, enable_per_layer_report);
1124 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001125}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001126EXPORT_SYMBOL_GPL(edac_mc_handle_error);