blob: f827f02bfbe8c5ce371d3fe4f970197d62e4ec60 [file] [log] [blame]
Huang Ying06d65de2010-05-18 14:35:19 +08001/*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various APEI tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.3.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/time.h>
30#include <linux/cper.h>
31#include <linux/acpi.h>
Lance Ortiz1d521002013-01-03 15:34:08 -070032#include <linux/pci.h>
Huang Yingc413d762011-02-21 13:54:43 +080033#include <linux/aer.h>
Huang Ying06d65de2010-05-18 14:35:19 +080034
35/*
36 * CPER record ID need to be unique even after reboot, because record
37 * ID is used as index for ERST storage, while CPER records from
38 * multiple boot may co-exist in ERST.
39 */
40u64 cper_next_record_id(void)
41{
42 static atomic64_t seq;
43
44 if (!atomic64_read(&seq))
45 atomic64_set(&seq, ((u64)get_seconds()) << 32);
46
47 return atomic64_inc_return(&seq);
48}
49EXPORT_SYMBOL_GPL(cper_next_record_id);
50
Huang Yingf59c55d2010-12-07 10:22:30 +080051static const char *cper_severity_strs[] = {
52 "recoverable",
53 "fatal",
54 "corrected",
55 "info",
56};
57
58static const char *cper_severity_str(unsigned int severity)
59{
60 return severity < ARRAY_SIZE(cper_severity_strs) ?
61 cper_severity_strs[severity] : "unknown";
62}
63
64/*
65 * cper_print_bits - print strings for set bits
66 * @pfx: prefix for each line, including log level and prefix string
67 * @bits: bit mask
68 * @strs: string array, indexed by bit position
69 * @strs_size: size of the string array: @strs
70 *
71 * For each set bit in @bits, print the corresponding string in @strs.
72 * If the output length is longer than 80, multiple line will be
73 * printed, with @pfx is printed at the beginning of each line.
74 */
Huang Yingc413d762011-02-21 13:54:43 +080075void cper_print_bits(const char *pfx, unsigned int bits,
76 const char *strs[], unsigned int strs_size)
Huang Yingf59c55d2010-12-07 10:22:30 +080077{
78 int i, len = 0;
79 const char *str;
80 char buf[84];
81
82 for (i = 0; i < strs_size; i++) {
83 if (!(bits & (1U << i)))
84 continue;
85 str = strs[i];
Huang Yingc413d762011-02-21 13:54:43 +080086 if (!str)
87 continue;
Huang Yingf59c55d2010-12-07 10:22:30 +080088 if (len && len + strlen(str) + 2 > 80) {
89 printk("%s\n", buf);
90 len = 0;
91 }
92 if (!len)
93 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
94 else
95 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
96 }
97 if (len)
98 printk("%s\n", buf);
99}
100
101static const char *cper_proc_type_strs[] = {
102 "IA32/X64",
103 "IA64",
104};
105
106static const char *cper_proc_isa_strs[] = {
107 "IA32",
108 "IA64",
109 "X64",
110};
111
112static const char *cper_proc_error_type_strs[] = {
113 "cache error",
114 "TLB error",
115 "bus error",
116 "micro-architectural error",
117};
118
119static const char *cper_proc_op_strs[] = {
120 "unknown or generic",
121 "data read",
122 "data write",
123 "instruction execution",
124};
125
126static const char *cper_proc_flag_strs[] = {
127 "restartable",
128 "precise IP",
129 "overflow",
130 "corrected",
131};
132
133static void cper_print_proc_generic(const char *pfx,
134 const struct cper_sec_proc_generic *proc)
135{
136 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
137 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
138 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
139 cper_proc_type_strs[proc->proc_type] : "unknown");
140 if (proc->validation_bits & CPER_PROC_VALID_ISA)
141 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
142 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
143 cper_proc_isa_strs[proc->proc_isa] : "unknown");
144 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
145 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
146 cper_print_bits(pfx, proc->proc_error_type,
147 cper_proc_error_type_strs,
148 ARRAY_SIZE(cper_proc_error_type_strs));
149 }
150 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
151 printk("%s""operation: %d, %s\n", pfx, proc->operation,
152 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
153 cper_proc_op_strs[proc->operation] : "unknown");
154 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
155 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
156 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
157 ARRAY_SIZE(cper_proc_flag_strs));
158 }
159 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
160 printk("%s""level: %d\n", pfx, proc->level);
161 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
162 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
163 if (proc->validation_bits & CPER_PROC_VALID_ID)
164 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
165 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
166 printk("%s""target_address: 0x%016llx\n",
167 pfx, proc->target_addr);
168 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
169 printk("%s""requestor_id: 0x%016llx\n",
170 pfx, proc->requestor_id);
171 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
172 printk("%s""responder_id: 0x%016llx\n",
173 pfx, proc->responder_id);
174 if (proc->validation_bits & CPER_PROC_VALID_IP)
175 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
176}
177
178static const char *cper_mem_err_type_strs[] = {
179 "unknown",
180 "no error",
181 "single-bit ECC",
182 "multi-bit ECC",
183 "single-symbol chipkill ECC",
184 "multi-symbol chipkill ECC",
185 "master abort",
186 "target abort",
187 "parity error",
188 "watchdog timeout",
189 "invalid address",
190 "mirror Broken",
191 "memory sparing",
192 "scrub corrected error",
193 "scrub uncorrected error",
194};
195
196static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
197{
198 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
199 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
200 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
201 printk("%s""physical_address: 0x%016llx\n",
202 pfx, mem->physical_addr);
203 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
204 printk("%s""physical_address_mask: 0x%016llx\n",
205 pfx, mem->physical_addr_mask);
206 if (mem->validation_bits & CPER_MEM_VALID_NODE)
207 printk("%s""node: %d\n", pfx, mem->node);
208 if (mem->validation_bits & CPER_MEM_VALID_CARD)
209 printk("%s""card: %d\n", pfx, mem->card);
210 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
211 printk("%s""module: %d\n", pfx, mem->module);
212 if (mem->validation_bits & CPER_MEM_VALID_BANK)
213 printk("%s""bank: %d\n", pfx, mem->bank);
214 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
215 printk("%s""device: %d\n", pfx, mem->device);
216 if (mem->validation_bits & CPER_MEM_VALID_ROW)
217 printk("%s""row: %d\n", pfx, mem->row);
218 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
219 printk("%s""column: %d\n", pfx, mem->column);
220 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
221 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
222 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
223 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
224 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
225 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
226 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
227 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
228 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
229 u8 etype = mem->error_type;
230 printk("%s""error_type: %d, %s\n", pfx, etype,
231 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
232 cper_mem_err_type_strs[etype] : "unknown");
233 }
234}
235
236static const char *cper_pcie_port_type_strs[] = {
237 "PCIe end point",
238 "legacy PCI end point",
239 "unknown",
240 "unknown",
241 "root port",
242 "upstream switch port",
243 "downstream switch port",
244 "PCIe to PCI/PCI-X bridge",
245 "PCI/PCI-X to PCIe bridge",
246 "root complex integrated endpoint device",
247 "root complex event collector",
248};
249
Huang Yingc413d762011-02-21 13:54:43 +0800250static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
251 const struct acpi_hest_generic_data *gdata)
Huang Yingf59c55d2010-12-07 10:22:30 +0800252{
253 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
254 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
255 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
256 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
257 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
258 printk("%s""version: %d.%d\n", pfx,
259 pcie->version.major, pcie->version.minor);
260 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
261 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
262 pcie->command, pcie->status);
263 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
264 const __u8 *p;
265 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
266 pcie->device_id.segment, pcie->device_id.bus,
267 pcie->device_id.device, pcie->device_id.function);
268 printk("%s""slot: %d\n", pfx,
269 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
270 printk("%s""secondary_bus: 0x%02x\n", pfx,
271 pcie->device_id.secondary_bus);
272 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
273 pcie->device_id.vendor_id, pcie->device_id.device_id);
274 p = pcie->device_id.class_code;
275 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
276 }
277 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
278 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
279 pcie->serial_number.lower, pcie->serial_number.upper);
280 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
281 printk(
282 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
283 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
284}
285
286static const char *apei_estatus_section_flag_strs[] = {
287 "primary",
288 "containment warning",
289 "reset",
290 "threshold exceeded",
291 "resource not accessible",
292 "latent error",
293};
294
295static void apei_estatus_print_section(
296 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
297{
298 uuid_le *sec_type = (uuid_le *)gdata->section_type;
299 __u16 severity;
300
301 severity = gdata->error_severity;
302 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
303 cper_severity_str(severity));
304 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
305 cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
306 ARRAY_SIZE(apei_estatus_section_flag_strs));
307 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
308 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
309 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
310 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
311
312 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
313 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
314 printk("%s""section_type: general processor error\n", pfx);
315 if (gdata->error_data_length >= sizeof(*proc_err))
316 cper_print_proc_generic(pfx, proc_err);
317 else
318 goto err_section_too_small;
319 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
320 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
321 printk("%s""section_type: memory error\n", pfx);
322 if (gdata->error_data_length >= sizeof(*mem_err))
323 cper_print_mem(pfx, mem_err);
324 else
325 goto err_section_too_small;
326 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
327 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
328 printk("%s""section_type: PCIe error\n", pfx);
329 if (gdata->error_data_length >= sizeof(*pcie))
Huang Yingc413d762011-02-21 13:54:43 +0800330 cper_print_pcie(pfx, pcie, gdata);
Huang Yingf59c55d2010-12-07 10:22:30 +0800331 else
332 goto err_section_too_small;
333 } else
334 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
335
336 return;
337
338err_section_too_small:
339 pr_err(FW_WARN "error section length is too small\n");
340}
341
342void apei_estatus_print(const char *pfx,
343 const struct acpi_hest_generic_status *estatus)
344{
345 struct acpi_hest_generic_data *gdata;
346 unsigned int data_len, gedata_len;
347 int sec_no = 0;
348 __u16 severity;
349
350 printk("%s""APEI generic hardware error status\n", pfx);
351 severity = estatus->error_severity;
352 printk("%s""severity: %d, %s\n", pfx, severity,
353 cper_severity_str(severity));
354 data_len = estatus->data_length;
355 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
Chen, Gong833ba4b2013-10-18 14:27:51 -0700356 while (data_len >= sizeof(*gdata)) {
Huang Yingf59c55d2010-12-07 10:22:30 +0800357 gedata_len = gdata->error_data_length;
358 apei_estatus_print_section(pfx, gdata, sec_no);
359 data_len -= gedata_len + sizeof(*gdata);
Jiang Liu37d2a362012-02-15 00:01:44 +0800360 gdata = (void *)(gdata + 1) + gedata_len;
Huang Yingf59c55d2010-12-07 10:22:30 +0800361 sec_no++;
362 }
363}
364EXPORT_SYMBOL_GPL(apei_estatus_print);
365
Huang Ying06d65de2010-05-18 14:35:19 +0800366int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
367{
368 if (estatus->data_length &&
369 estatus->data_length < sizeof(struct acpi_hest_generic_data))
370 return -EINVAL;
371 if (estatus->raw_data_length &&
372 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
373 return -EINVAL;
374
375 return 0;
376}
377EXPORT_SYMBOL_GPL(apei_estatus_check_header);
378
379int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
380{
381 struct acpi_hest_generic_data *gdata;
382 unsigned int data_len, gedata_len;
383 int rc;
384
385 rc = apei_estatus_check_header(estatus);
386 if (rc)
387 return rc;
388 data_len = estatus->data_length;
389 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
Chen Gongaaf9d932013-03-19 06:48:07 +0000390 while (data_len >= sizeof(*gdata)) {
Huang Ying06d65de2010-05-18 14:35:19 +0800391 gedata_len = gdata->error_data_length;
392 if (gedata_len > data_len - sizeof(*gdata))
393 return -EINVAL;
394 data_len -= gedata_len + sizeof(*gdata);
Jiang Liu37d2a362012-02-15 00:01:44 +0800395 gdata = (void *)(gdata + 1) + gedata_len;
Huang Ying06d65de2010-05-18 14:35:19 +0800396 }
397 if (data_len)
398 return -EINVAL;
399
400 return 0;
401}
402EXPORT_SYMBOL_GPL(apei_estatus_check);