blob: cb4e2b8d32d974d129c89f9e05dbe1eae2daf129 [file] [log] [blame]
Tejun Heoece1d632006-04-02 18:51:53 +09001/*
2 * libata-eh.c - libata error handling
3 *
4 * Maintained by: Jeff Garzik <jgarzik@pobox.com>
5 * Please ALWAYS copy linux-ide@vger.kernel.org
6 * on emails.
7 *
8 * Copyright 2006 Tejun Heo <htejun@gmail.com>
9 *
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; see the file COPYING. If not, write to
23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
24 * USA.
25 *
26 *
27 * libata documentation is available via 'make {ps|pdf}docs',
28 * as Documentation/DocBook/libata.*
29 *
30 * Hardware documentation available from http://www.t13.org/ and
31 * http://www.sata-io.org/
32 *
33 */
34
35#include <linux/config.h>
36#include <linux/kernel.h>
37#include <scsi/scsi.h>
38#include <scsi/scsi_host.h>
39#include <scsi/scsi_eh.h>
40#include <scsi/scsi_device.h>
41#include <scsi/scsi_cmnd.h>
42
43#include <linux/libata.h>
44
45#include "libata.h"
46
47/**
48 * ata_scsi_timed_out - SCSI layer time out callback
49 * @cmd: timed out SCSI command
50 *
51 * Handles SCSI layer timeout. We race with normal completion of
52 * the qc for @cmd. If the qc is already gone, we lose and let
53 * the scsi command finish (EH_HANDLED). Otherwise, the qc has
54 * timed out and EH should be invoked. Prevent ata_qc_complete()
55 * from finishing it by setting EH_SCHEDULED and return
56 * EH_NOT_HANDLED.
57 *
58 * LOCKING:
59 * Called from timer context
60 *
61 * RETURNS:
62 * EH_HANDLED or EH_NOT_HANDLED
63 */
64enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
65{
66 struct Scsi_Host *host = cmd->device->host;
Jeff Garzik35bb94b2006-04-11 13:12:34 -040067 struct ata_port *ap = ata_shost_to_port(host);
Tejun Heoece1d632006-04-02 18:51:53 +090068 unsigned long flags;
69 struct ata_queued_cmd *qc;
70 enum scsi_eh_timer_return ret = EH_HANDLED;
71
72 DPRINTK("ENTER\n");
73
74 spin_lock_irqsave(&ap->host_set->lock, flags);
75 qc = ata_qc_from_tag(ap, ap->active_tag);
76 if (qc) {
77 WARN_ON(qc->scsicmd != cmd);
78 qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
79 qc->err_mask |= AC_ERR_TIMEOUT;
80 ret = EH_NOT_HANDLED;
81 }
82 spin_unlock_irqrestore(&ap->host_set->lock, flags);
83
84 DPRINTK("EXIT, ret=%d\n", ret);
85 return ret;
86}
87
88/**
89 * ata_scsi_error - SCSI layer error handler callback
90 * @host: SCSI host on which error occurred
91 *
92 * Handles SCSI-layer-thrown error events.
93 *
94 * LOCKING:
95 * Inherited from SCSI layer (none, can sleep)
96 *
97 * RETURNS:
98 * Zero.
99 */
Jeff Garzik381544b2006-04-11 13:04:39 -0400100void ata_scsi_error(struct Scsi_Host *host)
Tejun Heoece1d632006-04-02 18:51:53 +0900101{
Jeff Garzik35bb94b2006-04-11 13:12:34 -0400102 struct ata_port *ap = ata_shost_to_port(host);
Tejun Heoece1d632006-04-02 18:51:53 +0900103
104 DPRINTK("ENTER\n");
105
106 /* synchronize with IRQ handler and port task */
107 spin_unlock_wait(&ap->host_set->lock);
108 ata_port_flush_task(ap);
109
110 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
111
112 ap->ops->eng_timeout(ap);
113
114 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
115
116 scsi_eh_flush_done_q(&ap->eh_done_q);
117
118 DPRINTK("EXIT\n");
Tejun Heoece1d632006-04-02 18:51:53 +0900119}
120
121/**
122 * ata_qc_timeout - Handle timeout of queued command
123 * @qc: Command that timed out
124 *
125 * Some part of the kernel (currently, only the SCSI layer)
126 * has noticed that the active command on port @ap has not
127 * completed after a specified length of time. Handle this
128 * condition by disabling DMA (if necessary) and completing
129 * transactions, with error if necessary.
130 *
131 * This also handles the case of the "lost interrupt", where
132 * for some reason (possibly hardware bug, possibly driver bug)
133 * an interrupt was not delivered to the driver, even though the
134 * transaction completed successfully.
135 *
136 * LOCKING:
137 * Inherited from SCSI layer (none, can sleep)
138 */
139static void ata_qc_timeout(struct ata_queued_cmd *qc)
140{
141 struct ata_port *ap = qc->ap;
142 struct ata_host_set *host_set = ap->host_set;
143 u8 host_stat = 0, drv_stat;
144 unsigned long flags;
145
146 DPRINTK("ENTER\n");
147
148 ap->hsm_task_state = HSM_ST_IDLE;
149
150 spin_lock_irqsave(&host_set->lock, flags);
151
152 switch (qc->tf.protocol) {
153
154 case ATA_PROT_DMA:
155 case ATA_PROT_ATAPI_DMA:
156 host_stat = ap->ops->bmdma_status(ap);
157
158 /* before we do anything else, clear DMA-Start bit */
159 ap->ops->bmdma_stop(qc);
160
161 /* fall through */
162
163 default:
164 ata_altstatus(ap);
165 drv_stat = ata_chk_status(ap);
166
167 /* ack bmdma irq events */
168 ap->ops->irq_clear(ap);
169
Tejun Heof15a1da2006-05-15 20:57:56 +0900170 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
171 "stat 0x%x host_stat 0x%x\n",
172 qc->tf.command, drv_stat, host_stat);
Tejun Heoece1d632006-04-02 18:51:53 +0900173
174 /* complete taskfile transaction */
175 qc->err_mask |= ac_err_mask(drv_stat);
176 break;
177 }
178
179 spin_unlock_irqrestore(&host_set->lock, flags);
180
181 ata_eh_qc_complete(qc);
182
183 DPRINTK("EXIT\n");
184}
185
186/**
187 * ata_eng_timeout - Handle timeout of queued command
188 * @ap: Port on which timed-out command is active
189 *
190 * Some part of the kernel (currently, only the SCSI layer)
191 * has noticed that the active command on port @ap has not
192 * completed after a specified length of time. Handle this
193 * condition by disabling DMA (if necessary) and completing
194 * transactions, with error if necessary.
195 *
196 * This also handles the case of the "lost interrupt", where
197 * for some reason (possibly hardware bug, possibly driver bug)
198 * an interrupt was not delivered to the driver, even though the
199 * transaction completed successfully.
200 *
201 * LOCKING:
202 * Inherited from SCSI layer (none, can sleep)
203 */
204void ata_eng_timeout(struct ata_port *ap)
205{
206 DPRINTK("ENTER\n");
207
208 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
209
210 DPRINTK("EXIT\n");
211}
212
Tejun Heof686bcb2006-05-15 20:58:05 +0900213/**
214 * ata_qc_schedule_eh - schedule qc for error handling
215 * @qc: command to schedule error handling for
216 *
217 * Schedule error handling for @qc. EH will kick in as soon as
218 * other commands are drained.
219 *
220 * LOCKING:
221 * spin_lock_irqsave(host_set lock)
222 */
223void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
224{
225 struct ata_port *ap = qc->ap;
226
227 WARN_ON(!ap->ops->error_handler);
228
229 qc->flags |= ATA_QCFLAG_FAILED;
230 qc->ap->flags |= ATA_FLAG_EH_PENDING;
231
232 /* The following will fail if timeout has already expired.
233 * ata_scsi_error() takes care of such scmds on EH entry.
234 * Note that ATA_QCFLAG_FAILED is unconditionally set after
235 * this function completes.
236 */
237 scsi_req_abort_cmd(qc->scsicmd);
238}
239
Tejun Heo7b70fc02006-05-15 20:58:07 +0900240/**
241 * ata_port_schedule_eh - schedule error handling without a qc
242 * @ap: ATA port to schedule EH for
243 *
244 * Schedule error handling for @ap. EH will kick in as soon as
245 * all commands are drained.
246 *
247 * LOCKING:
248 * spin_lock_irqsave(host_set lock)
249 */
250void ata_port_schedule_eh(struct ata_port *ap)
251{
252 WARN_ON(!ap->ops->error_handler);
253
254 ap->flags |= ATA_FLAG_EH_PENDING;
255 ata_schedule_scsi_eh(ap->host);
256
257 DPRINTK("port EH scheduled\n");
258}
259
260/**
261 * ata_port_abort - abort all qc's on the port
262 * @ap: ATA port to abort qc's for
263 *
264 * Abort all active qc's of @ap and schedule EH.
265 *
266 * LOCKING:
267 * spin_lock_irqsave(host_set lock)
268 *
269 * RETURNS:
270 * Number of aborted qc's.
271 */
272int ata_port_abort(struct ata_port *ap)
273{
274 int tag, nr_aborted = 0;
275
276 WARN_ON(!ap->ops->error_handler);
277
278 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
279 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
280
281 if (qc) {
282 qc->flags |= ATA_QCFLAG_FAILED;
283 ata_qc_complete(qc);
284 nr_aborted++;
285 }
286 }
287
288 if (!nr_aborted)
289 ata_port_schedule_eh(ap);
290
291 return nr_aborted;
292}
293
Tejun Heoe3180492006-05-15 20:58:09 +0900294/**
295 * __ata_port_freeze - freeze port
296 * @ap: ATA port to freeze
297 *
298 * This function is called when HSM violation or some other
299 * condition disrupts normal operation of the port. Frozen port
300 * is not allowed to perform any operation until the port is
301 * thawed, which usually follows a successful reset.
302 *
303 * ap->ops->freeze() callback can be used for freezing the port
304 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
305 * port cannot be frozen hardware-wise, the interrupt handler
306 * must ack and clear interrupts unconditionally while the port
307 * is frozen.
308 *
309 * LOCKING:
310 * spin_lock_irqsave(host_set lock)
311 */
312static void __ata_port_freeze(struct ata_port *ap)
313{
314 WARN_ON(!ap->ops->error_handler);
315
316 if (ap->ops->freeze)
317 ap->ops->freeze(ap);
318
319 ap->flags |= ATA_FLAG_FROZEN;
320
321 DPRINTK("ata%u port frozen\n", ap->id);
322}
323
324/**
325 * ata_port_freeze - abort & freeze port
326 * @ap: ATA port to freeze
327 *
328 * Abort and freeze @ap.
329 *
330 * LOCKING:
331 * spin_lock_irqsave(host_set lock)
332 *
333 * RETURNS:
334 * Number of aborted commands.
335 */
336int ata_port_freeze(struct ata_port *ap)
337{
338 int nr_aborted;
339
340 WARN_ON(!ap->ops->error_handler);
341
342 nr_aborted = ata_port_abort(ap);
343 __ata_port_freeze(ap);
344
345 return nr_aborted;
346}
347
348/**
349 * ata_eh_freeze_port - EH helper to freeze port
350 * @ap: ATA port to freeze
351 *
352 * Freeze @ap.
353 *
354 * LOCKING:
355 * None.
356 */
357void ata_eh_freeze_port(struct ata_port *ap)
358{
359 unsigned long flags;
360
361 if (!ap->ops->error_handler)
362 return;
363
364 spin_lock_irqsave(&ap->host_set->lock, flags);
365 __ata_port_freeze(ap);
366 spin_unlock_irqrestore(&ap->host_set->lock, flags);
367}
368
369/**
370 * ata_port_thaw_port - EH helper to thaw port
371 * @ap: ATA port to thaw
372 *
373 * Thaw frozen port @ap.
374 *
375 * LOCKING:
376 * None.
377 */
378void ata_eh_thaw_port(struct ata_port *ap)
379{
380 unsigned long flags;
381
382 if (!ap->ops->error_handler)
383 return;
384
385 spin_lock_irqsave(&ap->host_set->lock, flags);
386
387 ap->flags &= ~ATA_FLAG_FROZEN;
388
389 if (ap->ops->thaw)
390 ap->ops->thaw(ap);
391
392 spin_unlock_irqrestore(&ap->host_set->lock, flags);
393
394 DPRINTK("ata%u port thawed\n", ap->id);
395}
396
Tejun Heoece1d632006-04-02 18:51:53 +0900397static void ata_eh_scsidone(struct scsi_cmnd *scmd)
398{
399 /* nada */
400}
401
402static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
403{
404 struct ata_port *ap = qc->ap;
405 struct scsi_cmnd *scmd = qc->scsicmd;
406 unsigned long flags;
407
408 spin_lock_irqsave(&ap->host_set->lock, flags);
409 qc->scsidone = ata_eh_scsidone;
410 __ata_qc_complete(qc);
411 WARN_ON(ata_tag_valid(qc->tag));
412 spin_unlock_irqrestore(&ap->host_set->lock, flags);
413
414 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
415}
416
417/**
418 * ata_eh_qc_complete - Complete an active ATA command from EH
419 * @qc: Command to complete
420 *
421 * Indicate to the mid and upper layers that an ATA command has
422 * completed. To be used from EH.
423 */
424void ata_eh_qc_complete(struct ata_queued_cmd *qc)
425{
426 struct scsi_cmnd *scmd = qc->scsicmd;
427 scmd->retries = scmd->allowed;
428 __ata_eh_qc_complete(qc);
429}
430
431/**
432 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
433 * @qc: Command to retry
434 *
435 * Indicate to the mid and upper layers that an ATA command
436 * should be retried. To be used from EH.
437 *
438 * SCSI midlayer limits the number of retries to scmd->allowed.
439 * scmd->retries is decremented for commands which get retried
440 * due to unrelated failures (qc->err_mask is zero).
441 */
442void ata_eh_qc_retry(struct ata_queued_cmd *qc)
443{
444 struct scsi_cmnd *scmd = qc->scsicmd;
445 if (!qc->err_mask && scmd->retries)
446 scmd->retries--;
447 __ata_eh_qc_complete(qc);
448}