blob: 6a17c9b508eaa06dea5f66791e06e9f6f3d8dfc5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/mips/dec/ecc-berr.c
3 *
4 * Bus error event handling code for systems equipped with ECC
5 * handling logic, i.e. DECstation/DECsystem 5000/200 (KN02),
6 * 5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03),
7 * 5900/260 (KN05) systems.
8 *
Maciej W. Rozycki64dac502005-06-22 20:56:26 +00009 * Copyright (c) 2003, 2005 Maciej W. Rozycki
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/init.h>
Maciej W. Rozycki64dac502005-06-22 20:56:26 +000018#include <linux/interrupt.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/kernel.h>
20#include <linux/sched.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22
23#include <asm/addrspace.h>
24#include <asm/bootinfo.h>
25#include <asm/cpu.h>
Ralf Baechle6dab2f42006-10-09 00:00:31 +010026#include <asm/irq_regs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <asm/processor.h>
Maciej W. Rozycki49afb1f2006-12-06 11:50:23 +000028#include <asm/ptrace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/traps.h>
31
32#include <asm/dec/ecc.h>
33#include <asm/dec/kn02.h>
34#include <asm/dec/kn03.h>
35#include <asm/dec/kn05.h>
36
37static volatile u32 *kn0x_erraddr;
38static volatile u32 *kn0x_chksyn;
39
40static inline void dec_ecc_be_ack(void)
41{
42 *kn0x_erraddr = 0; /* any write clears the IRQ */
43 iob();
44}
45
46static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker)
47{
48 static const char excstr[] = "exception";
49 static const char intstr[] = "interrupt";
50 static const char cpustr[] = "CPU";
51 static const char dmastr[] = "DMA";
52 static const char readstr[] = "read";
53 static const char mreadstr[] = "memory read";
54 static const char writestr[] = "write";
55 static const char mwritstr[] = "partial memory write";
56 static const char timestr[] = "timeout";
57 static const char overstr[] = "overrun";
58 static const char eccstr[] = "ECC error";
59
60 const char *kind, *agent, *cycle, *event;
61 const char *status = "", *xbit = "", *fmt = "";
Maciej W. Rozycki64dac502005-06-22 20:56:26 +000062 unsigned long address;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 u16 syn = 0, sngl;
64
65 int i = 0;
66
67 u32 erraddr = *kn0x_erraddr;
68 u32 chksyn = *kn0x_chksyn;
69 int action = MIPS_BE_FATAL;
70
Maciej W. Rozycki64dac502005-06-22 20:56:26 +000071 /* For non-ECC ack ASAP, so that any subsequent errors get caught. */
Linus Torvalds1da177e2005-04-16 15:20:36 -070072 if ((erraddr & (KN0X_EAR_VALID | KN0X_EAR_ECCERR)) == KN0X_EAR_VALID)
73 dec_ecc_be_ack();
74
75 kind = invoker ? intstr : excstr;
76
77 if (!(erraddr & KN0X_EAR_VALID)) {
78 /* No idea what happened. */
Maciej W. Rozycki64dac502005-06-22 20:56:26 +000079 printk(KERN_ALERT "Unidentified bus error %s\n", kind);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 return action;
81 }
82
83 agent = (erraddr & KN0X_EAR_CPU) ? cpustr : dmastr;
84
85 if (erraddr & KN0X_EAR_ECCERR) {
86 /* An ECC error on a CPU or DMA transaction. */
87 cycle = (erraddr & KN0X_EAR_WRITE) ? mwritstr : mreadstr;
88 event = eccstr;
89 } else {
90 /* A CPU timeout or a DMA overrun. */
91 cycle = (erraddr & KN0X_EAR_WRITE) ? writestr : readstr;
92 event = (erraddr & KN0X_EAR_CPU) ? timestr : overstr;
93 }
94
95 address = erraddr & KN0X_EAR_ADDRESS;
96 /* For ECC errors on reads adjust for MT pipelining. */
97 if ((erraddr & (KN0X_EAR_WRITE | KN0X_EAR_ECCERR)) == KN0X_EAR_ECCERR)
98 address = (address & ~0xfffLL) | ((address - 5) & 0xfffLL);
99 address <<= 2;
100
101 /* Only CPU errors are fixable. */
102 if (erraddr & KN0X_EAR_CPU && is_fixup)
103 action = MIPS_BE_FIXUP;
104
105 if (erraddr & KN0X_EAR_ECCERR) {
106 static const u8 data_sbit[32] = {
107 0x4f, 0x4a, 0x52, 0x54, 0x57, 0x58, 0x5b, 0x5d,
108 0x23, 0x25, 0x26, 0x29, 0x2a, 0x2c, 0x31, 0x34,
109 0x0e, 0x0b, 0x13, 0x15, 0x16, 0x19, 0x1a, 0x1c,
110 0x62, 0x64, 0x67, 0x68, 0x6b, 0x6d, 0x70, 0x75,
111 };
112 static const u8 data_mbit[25] = {
113 0x07, 0x0d, 0x1f,
114 0x2f, 0x32, 0x37, 0x38, 0x3b, 0x3d, 0x3e,
115 0x43, 0x45, 0x46, 0x49, 0x4c, 0x51, 0x5e,
116 0x61, 0x6e, 0x73, 0x76, 0x79, 0x7a, 0x7c, 0x7f,
117 };
118 static const char sbestr[] = "corrected single";
119 static const char dbestr[] = "uncorrectable double";
120 static const char mbestr[] = "uncorrectable multiple";
121
122 if (!(address & 0x4))
123 syn = chksyn; /* Low bank. */
124 else
125 syn = chksyn >> 16; /* High bank. */
126
127 if (!(syn & KN0X_ESR_VLDLO)) {
128 /* Ack now, no rewrite will happen. */
129 dec_ecc_be_ack();
130
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000131 fmt = KERN_ALERT "%s" "invalid\n";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 } else {
133 sngl = syn & KN0X_ESR_SNGLO;
134 syn &= KN0X_ESR_SYNLO;
135
136 /*
137 * Multibit errors may be tagged incorrectly;
138 * check the syndrome explicitly.
139 */
140 for (i = 0; i < 25; i++)
141 if (syn == data_mbit[i])
142 break;
143
144 if (i < 25) {
145 status = mbestr;
146 } else if (!sngl) {
147 status = dbestr;
148 } else {
Maciej W. Rozycki3bd4c902005-06-16 20:30:54 +0000149 volatile u32 *ptr =
150 (void *)CKSEG1ADDR(address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151
152 *ptr = *ptr; /* Rewrite. */
153 iob();
154
155 status = sbestr;
156 action = MIPS_BE_DISCARD;
157 }
158
159 /* Ack now, now we've rewritten (or not). */
160 dec_ecc_be_ack();
161
162 if (syn && syn == (syn & -syn)) {
163 if (syn == 0x01) {
164 fmt = KERN_ALERT "%s"
165 "%#04x -- %s bit error "
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000166 "at check bit C%s\n";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 xbit = "X";
168 } else {
169 fmt = KERN_ALERT "%s"
170 "%#04x -- %s bit error "
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000171 "at check bit C%s%u\n";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 }
173 i = syn >> 2;
174 } else {
175 for (i = 0; i < 32; i++)
176 if (syn == data_sbit[i])
177 break;
178 if (i < 32)
179 fmt = KERN_ALERT "%s"
180 "%#04x -- %s bit error "
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000181 "at data bit D%s%u\n";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 else
183 fmt = KERN_ALERT "%s"
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000184 "%#04x -- %s bit error\n";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 }
186 }
187 }
188
189 if (action != MIPS_BE_FIXUP)
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000190 printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 kind, agent, cycle, event, address);
192
193 if (action != MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR)
194 printk(fmt, " ECC syndrome ", syn, status, xbit, i);
195
196 return action;
197}
198
199int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup)
200{
201 return dec_ecc_be_backend(regs, is_fixup, 0);
202}
203
Ralf Baechle6dab2f42006-10-09 00:00:31 +0100204irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205{
Ralf Baechle6dab2f42006-10-09 00:00:31 +0100206 struct pt_regs *regs = get_irq_regs();
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 int action = dec_ecc_be_backend(regs, 0, 1);
209
210 if (action == MIPS_BE_DISCARD)
Maciej W. Rozycki68e4a862005-06-13 19:53:38 +0000211 return IRQ_HANDLED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
213 /*
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000214 * FIXME: Find the affected processes and kill them, otherwise
215 * we must die.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 *
217 * The interrupt is asynchronously delivered thus EPC and RA
218 * may be irrelevant, but are printed for a reference.
219 */
220 printk(KERN_ALERT "Fatal bus interrupt, epc == %08lx, ra == %08lx\n",
221 regs->cp0_epc, regs->regs[31]);
222 die("Unrecoverable bus error", regs);
223}
224
225
226/*
227 * Initialization differs a bit between KN02 and KN03/KN05, so we
228 * need two variants. Once set up, all systems can be handled the
229 * same way.
230 */
231static inline void dec_kn02_be_init(void)
232{
Maciej W. Rozyckia5fc9c02005-07-01 16:10:40 +0000233 volatile u32 *csr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CSR);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234
Maciej W. Rozyckia5fc9c02005-07-01 16:10:40 +0000235 kn0x_erraddr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_ERRADDR);
236 kn0x_chksyn = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CHKSYN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 /* Preset write-only bits of the Control Register cache. */
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000239 cached_kn02_csr = *csr | KN02_CSR_LEDS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240
241 /* Set normal ECC detection and generation. */
242 cached_kn02_csr &= ~(KN02_CSR_DIAGCHK | KN02_CSR_DIAGGEN);
243 /* Enable ECC correction. */
244 cached_kn02_csr |= KN02_CSR_CORRECT;
245 *csr = cached_kn02_csr;
246 iob();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
249static inline void dec_kn03_be_init(void)
250{
Maciej W. Rozyckia5fc9c02005-07-01 16:10:40 +0000251 volatile u32 *mcr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_MCR);
252 volatile u32 *mbcs = (void *)CKSEG1ADDR(KN4K_SLOT_BASE + KN4K_MB_CSR);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
Maciej W. Rozyckia5fc9c02005-07-01 16:10:40 +0000254 kn0x_erraddr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_ERRADDR);
255 kn0x_chksyn = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_CHKSYN);
Ralf Baechle42a3b4f2005-09-03 15:56:17 -0700256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 /*
258 * Set normal ECC detection and generation, enable ECC correction.
259 * For KN05 we also need to make sure EE (?) is enabled in the MB.
260 * Otherwise DBE/IBE exceptions would be masked but bus error
261 * interrupts would still arrive, resulting in an inevitable crash
262 * if get_dbe() triggers one.
263 */
264 *mcr = (*mcr & ~(KN03_MCR_DIAGCHK | KN03_MCR_DIAGGEN)) |
265 KN03_MCR_CORRECT;
Ralf Baechle10cc3522007-10-11 23:46:15 +0100266 if (current_cpu_type() == CPU_R4400SC)
Maciej W. Rozycki64dac502005-06-22 20:56:26 +0000267 *mbcs |= KN4K_MB_CSR_EE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 fast_iob();
269}
270
271void __init dec_ecc_be_init(void)
272{
273 if (mips_machtype == MACH_DS5000_200)
274 dec_kn02_be_init();
275 else
276 dec_kn03_be_init();
277
278 /* Clear any leftover errors from the firmware. */
279 dec_ecc_be_ack();
280}