Blame - drivers/scsi/libata-eh.c - kernel/msm-4.19

blob: 0803231f65779776e96383f4452a09adc16c0f42 [file] [log] [blame]

Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	1	/*
				2	* libata-eh.c - libata error handling
				3	*
				4	* Maintained by: Jeff Garzik <jgarzik@pobox.com>
				5	* Please ALWAYS copy linux-ide@vger.kernel.org
				6	* on emails.
				7	*
				8	* Copyright 2006 Tejun Heo <htejun@gmail.com>
				9	*
				10	*
				11	* This program is free software; you can redistribute it and/or
				12	* modify it under the terms of the GNU General Public License as
				13	* published by the Free Software Foundation; either version 2, or
				14	* (at your option) any later version.
				15	*
				16	* This program is distributed in the hope that it will be useful,
				17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				19	* General Public License for more details.
				20	*
				21	* You should have received a copy of the GNU General Public License
				22	* along with this program; see the file COPYING. If not, write to
				23	* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
				24	* USA.
				25	*
				26	*
				27	* libata documentation is available via 'make {ps\|pdf}docs',
				28	* as Documentation/DocBook/libata.*
				29	*
				30	* Hardware documentation available from http://www.t13.org/ and
				31	* http://www.sata-io.org/
				32	*
				33	*/
				34
				35	#include <linux/config.h>
				36	#include <linux/kernel.h>
				37	#include <scsi/scsi.h>
				38	#include <scsi/scsi_host.h>
				39	#include <scsi/scsi_eh.h>
				40	#include <scsi/scsi_device.h>
				41	#include <scsi/scsi_cmnd.h>
				42
				43	#include <linux/libata.h>
				44
				45	#include "libata.h"
				46
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	47	static void __ata_port_freeze(struct ata_port *ap);
				48
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	49	/**
				50	* ata_scsi_timed_out - SCSI layer time out callback
				51	* @cmd: timed out SCSI command
				52	*
				53	* Handles SCSI layer timeout. We race with normal completion of
				54	* the qc for @cmd. If the qc is already gone, we lose and let
				55	* the scsi command finish (EH_HANDLED). Otherwise, the qc has
				56	* timed out and EH should be invoked. Prevent ata_qc_complete()
				57	* from finishing it by setting EH_SCHEDULED and return
				58	* EH_NOT_HANDLED.
				59	*
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	60	* TODO: kill this function once old EH is gone.
				61	*
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	62	* LOCKING:
				63	* Called from timer context
				64	*
				65	* RETURNS:
				66	* EH_HANDLED or EH_NOT_HANDLED
				67	*/
				68	enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
				69	{
				70	struct Scsi_Host *host = cmd->device->host;
Jeff Garzik	35bb94b	2006-04-11 13:12:34 -0400	[diff] [blame]	71	struct ata_port *ap = ata_shost_to_port(host);
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	72	unsigned long flags;
				73	struct ata_queued_cmd *qc;
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	74	enum scsi_eh_timer_return ret;
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	75
				76	DPRINTK("ENTER\n");
				77
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	78	if (ap->ops->error_handler) {
				79	ret = EH_NOT_HANDLED;
				80	goto out;
				81	}
				82
				83	ret = EH_HANDLED;
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	84	spin_lock_irqsave(&ap->host_set->lock, flags);
				85	qc = ata_qc_from_tag(ap, ap->active_tag);
				86	if (qc) {
				87	WARN_ON(qc->scsicmd != cmd);
				88	qc->flags \|= ATA_QCFLAG_EH_SCHEDULED;
				89	qc->err_mask \|= AC_ERR_TIMEOUT;
				90	ret = EH_NOT_HANDLED;
				91	}
				92	spin_unlock_irqrestore(&ap->host_set->lock, flags);
				93
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	94	out:
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	95	DPRINTK("EXIT, ret=%d\n", ret);
				96	return ret;
				97	}
				98
				99	/**
				100	* ata_scsi_error - SCSI layer error handler callback
				101	* @host: SCSI host on which error occurred
				102	*
				103	* Handles SCSI-layer-thrown error events.
				104	*
				105	* LOCKING:
				106	* Inherited from SCSI layer (none, can sleep)
				107	*
				108	* RETURNS:
				109	* Zero.
				110	*/
Jeff Garzik	381544b	2006-04-11 13:04:39 -0400	[diff] [blame]	111	void ata_scsi_error(struct Scsi_Host *host)
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	112	{
Jeff Garzik	35bb94b	2006-04-11 13:12:34 -0400	[diff] [blame]	113	struct ata_port *ap = ata_shost_to_port(host);
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	114	spinlock_t *hs_lock = &ap->host_set->lock;
				115	int i, repeat_cnt = ATA_EH_MAX_REPEAT;
				116	unsigned long flags;
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	117
				118	DPRINTK("ENTER\n");
				119
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	120	/* synchronize with port task */
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	121	ata_port_flush_task(ap);
				122
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	123	/* synchronize with host_set lock and sort out timeouts */
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	124
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	125	/* For new EH, all qcs are finished in one of three ways -
				126	* normal completion, error completion, and SCSI timeout.
				127	* Both cmpletions can race against SCSI timeout. When normal
				128	* completion wins, the qc never reaches EH. When error
				129	* completion wins, the qc has ATA_QCFLAG_FAILED set.
				130	*
				131	* When SCSI timeout wins, things are a bit more complex.
				132	* Normal or error completion can occur after the timeout but
				133	* before this point. In such cases, both types of
				134	* completions are honored. A scmd is determined to have
				135	* timed out iff its associated qc is active and not failed.
				136	*/
				137	if (ap->ops->error_handler) {
				138	struct scsi_cmnd scmd, tmp;
				139	int nr_timedout = 0;
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	140
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	141	spin_lock_irqsave(hs_lock, flags);
				142
				143	list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
				144	struct ata_queued_cmd *qc;
				145
				146	for (i = 0; i < ATA_MAX_QUEUE; i++) {
				147	qc = __ata_qc_from_tag(ap, i);
				148	if (qc->flags & ATA_QCFLAG_ACTIVE &&
				149	qc->scsicmd == scmd)
				150	break;
				151	}
				152
				153	if (i < ATA_MAX_QUEUE) {
				154	/* the scmd has an associated qc */
				155	if (!(qc->flags & ATA_QCFLAG_FAILED)) {
				156	/* which hasn't failed yet, timeout */
				157	qc->err_mask \|= AC_ERR_TIMEOUT;
				158	qc->flags \|= ATA_QCFLAG_FAILED;
				159	nr_timedout++;
				160	}
				161	} else {
				162	/* Normal completion occurred after
				163	* SCSI timeout but before this point.
				164	* Successfully complete it.
				165	*/
				166	scmd->retries = scmd->allowed;
				167	scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
				168	}
				169	}
				170
				171	/* If we have timed out qcs. They belong to EH from
				172	* this point but the state of the controller is
				173	* unknown. Freeze the port to make sure the IRQ
				174	* handler doesn't diddle with those qcs. This must
				175	* be done atomically w.r.t. setting QCFLAG_FAILED.
				176	*/
				177	if (nr_timedout)
				178	__ata_port_freeze(ap);
				179
				180	spin_unlock_irqrestore(hs_lock, flags);
				181	} else
				182	spin_unlock_wait(hs_lock);
				183
				184	repeat:
				185	/* invoke error handler */
				186	if (ap->ops->error_handler) {
				187	/* clear EH pending */
				188	spin_lock_irqsave(hs_lock, flags);
				189	ap->flags &= ~ATA_FLAG_EH_PENDING;
				190	spin_unlock_irqrestore(hs_lock, flags);
				191
				192	/* invoke EH */
				193	ap->ops->error_handler(ap);
				194
				195	/* Exception might have happend after ->error_handler
				196	* recovered the port but before this point. Repeat
				197	* EH in such case.
				198	*/
				199	spin_lock_irqsave(hs_lock, flags);
				200
				201	if (ap->flags & ATA_FLAG_EH_PENDING) {
				202	if (--repeat_cnt) {
				203	ata_port_printk(ap, KERN_INFO,
				204	"EH pending after completion, "
				205	"repeating EH (cnt=%d)\n", repeat_cnt);
				206	spin_unlock_irqrestore(hs_lock, flags);
				207	goto repeat;
				208	}
				209	ata_port_printk(ap, KERN_ERR, "EH pending after %d "
				210	"tries, giving up\n", ATA_EH_MAX_REPEAT);
				211	}
				212
				213	/* Clear host_eh_scheduled while holding hs_lock such
				214	* that if exception occurs after this point but
				215	* before EH completion, SCSI midlayer will
				216	* re-initiate EH.
				217	*/
				218	host->host_eh_scheduled = 0;
				219
				220	spin_unlock_irqrestore(hs_lock, flags);
				221	} else {
				222	WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
				223	ap->ops->eng_timeout(ap);
				224	}
				225
				226	/* finish or retry handled scmd's and clean up */
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	227	WARN_ON(host->host_failed \|\| !list_empty(&host->eh_cmd_q));
				228
				229	scsi_eh_flush_done_q(&ap->eh_done_q);
				230
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	231	/* clean up */
				232	spin_lock_irqsave(hs_lock, flags);
				233
				234	if (ap->flags & ATA_FLAG_RECOVERED)
				235	ata_port_printk(ap, KERN_INFO, "EH complete\n");
				236	ap->flags &= ~ATA_FLAG_RECOVERED;
				237
				238	spin_unlock_irqrestore(hs_lock, flags);
				239
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	240	DPRINTK("EXIT\n");
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	241	}
				242
				243	/**
				244	* ata_qc_timeout - Handle timeout of queued command
				245	* @qc: Command that timed out
				246	*
				247	* Some part of the kernel (currently, only the SCSI layer)
				248	* has noticed that the active command on port @ap has not
				249	* completed after a specified length of time. Handle this
				250	* condition by disabling DMA (if necessary) and completing
				251	* transactions, with error if necessary.
				252	*
				253	* This also handles the case of the "lost interrupt", where
				254	* for some reason (possibly hardware bug, possibly driver bug)
				255	* an interrupt was not delivered to the driver, even though the
				256	* transaction completed successfully.
				257	*
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	258	* TODO: kill this function once old EH is gone.
				259	*
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	260	* LOCKING:
				261	* Inherited from SCSI layer (none, can sleep)
				262	*/
				263	static void ata_qc_timeout(struct ata_queued_cmd *qc)
				264	{
				265	struct ata_port *ap = qc->ap;
				266	struct ata_host_set *host_set = ap->host_set;
				267	u8 host_stat = 0, drv_stat;
				268	unsigned long flags;
				269
				270	DPRINTK("ENTER\n");
				271
				272	ap->hsm_task_state = HSM_ST_IDLE;
				273
				274	spin_lock_irqsave(&host_set->lock, flags);
				275
				276	switch (qc->tf.protocol) {
				277
				278	case ATA_PROT_DMA:
				279	case ATA_PROT_ATAPI_DMA:
				280	host_stat = ap->ops->bmdma_status(ap);
				281
				282	/* before we do anything else, clear DMA-Start bit */
				283	ap->ops->bmdma_stop(qc);
				284
				285	/* fall through */
				286
				287	default:
				288	ata_altstatus(ap);
				289	drv_stat = ata_chk_status(ap);
				290
				291	/* ack bmdma irq events */
				292	ap->ops->irq_clear(ap);
				293
Tejun Heo	f15a1da	2006-05-15 20:57:56 +0900	[diff] [blame]	294	ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
				295	"stat 0x%x host_stat 0x%x\n",
				296	qc->tf.command, drv_stat, host_stat);
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	297
				298	/* complete taskfile transaction */
				299	qc->err_mask \|= ac_err_mask(drv_stat);
				300	break;
				301	}
				302
				303	spin_unlock_irqrestore(&host_set->lock, flags);
				304
				305	ata_eh_qc_complete(qc);
				306
				307	DPRINTK("EXIT\n");
				308	}
				309
				310	/**
				311	* ata_eng_timeout - Handle timeout of queued command
				312	* @ap: Port on which timed-out command is active
				313	*
				314	* Some part of the kernel (currently, only the SCSI layer)
				315	* has noticed that the active command on port @ap has not
				316	* completed after a specified length of time. Handle this
				317	* condition by disabling DMA (if necessary) and completing
				318	* transactions, with error if necessary.
				319	*
				320	* This also handles the case of the "lost interrupt", where
				321	* for some reason (possibly hardware bug, possibly driver bug)
				322	* an interrupt was not delivered to the driver, even though the
				323	* transaction completed successfully.
				324	*
Tejun Heo	ad9e276	2006-05-15 20:58:12 +0900	[diff] [blame^]	325	* TODO: kill this function once old EH is gone.
				326	*
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	327	* LOCKING:
				328	* Inherited from SCSI layer (none, can sleep)
				329	*/
				330	void ata_eng_timeout(struct ata_port *ap)
				331	{
				332	DPRINTK("ENTER\n");
				333
				334	ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
				335
				336	DPRINTK("EXIT\n");
				337	}
				338
Tejun Heo	f686bcb	2006-05-15 20:58:05 +0900	[diff] [blame]	339	/**
				340	* ata_qc_schedule_eh - schedule qc for error handling
				341	* @qc: command to schedule error handling for
				342	*
				343	* Schedule error handling for @qc. EH will kick in as soon as
				344	* other commands are drained.
				345	*
				346	* LOCKING:
				347	* spin_lock_irqsave(host_set lock)
				348	*/
				349	void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
				350	{
				351	struct ata_port *ap = qc->ap;
				352
				353	WARN_ON(!ap->ops->error_handler);
				354
				355	qc->flags \|= ATA_QCFLAG_FAILED;
				356	qc->ap->flags \|= ATA_FLAG_EH_PENDING;
				357
				358	/* The following will fail if timeout has already expired.
				359	* ata_scsi_error() takes care of such scmds on EH entry.
				360	* Note that ATA_QCFLAG_FAILED is unconditionally set after
				361	* this function completes.
				362	*/
				363	scsi_req_abort_cmd(qc->scsicmd);
				364	}
				365
Tejun Heo	7b70fc0	2006-05-15 20:58:07 +0900	[diff] [blame]	366	/**
				367	* ata_port_schedule_eh - schedule error handling without a qc
				368	* @ap: ATA port to schedule EH for
				369	*
				370	* Schedule error handling for @ap. EH will kick in as soon as
				371	* all commands are drained.
				372	*
				373	* LOCKING:
				374	* spin_lock_irqsave(host_set lock)
				375	*/
				376	void ata_port_schedule_eh(struct ata_port *ap)
				377	{
				378	WARN_ON(!ap->ops->error_handler);
				379
				380	ap->flags \|= ATA_FLAG_EH_PENDING;
				381	ata_schedule_scsi_eh(ap->host);
				382
				383	DPRINTK("port EH scheduled\n");
				384	}
				385
				386	/**
				387	* ata_port_abort - abort all qc's on the port
				388	* @ap: ATA port to abort qc's for
				389	*
				390	* Abort all active qc's of @ap and schedule EH.
				391	*
				392	* LOCKING:
				393	* spin_lock_irqsave(host_set lock)
				394	*
				395	* RETURNS:
				396	* Number of aborted qc's.
				397	*/
				398	int ata_port_abort(struct ata_port *ap)
				399	{
				400	int tag, nr_aborted = 0;
				401
				402	WARN_ON(!ap->ops->error_handler);
				403
				404	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
				405	struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
				406
				407	if (qc) {
				408	qc->flags \|= ATA_QCFLAG_FAILED;
				409	ata_qc_complete(qc);
				410	nr_aborted++;
				411	}
				412	}
				413
				414	if (!nr_aborted)
				415	ata_port_schedule_eh(ap);
				416
				417	return nr_aborted;
				418	}
				419
Tejun Heo	e318049	2006-05-15 20:58:09 +0900	[diff] [blame]	420	/**
				421	* __ata_port_freeze - freeze port
				422	* @ap: ATA port to freeze
				423	*
				424	* This function is called when HSM violation or some other
				425	* condition disrupts normal operation of the port. Frozen port
				426	* is not allowed to perform any operation until the port is
				427	* thawed, which usually follows a successful reset.
				428	*
				429	* ap->ops->freeze() callback can be used for freezing the port
				430	* hardware-wise (e.g. mask interrupt and stop DMA engine). If a
				431	* port cannot be frozen hardware-wise, the interrupt handler
				432	* must ack and clear interrupts unconditionally while the port
				433	* is frozen.
				434	*
				435	* LOCKING:
				436	* spin_lock_irqsave(host_set lock)
				437	*/
				438	static void __ata_port_freeze(struct ata_port *ap)
				439	{
				440	WARN_ON(!ap->ops->error_handler);
				441
				442	if (ap->ops->freeze)
				443	ap->ops->freeze(ap);
				444
				445	ap->flags \|= ATA_FLAG_FROZEN;
				446
				447	DPRINTK("ata%u port frozen\n", ap->id);
				448	}
				449
				450	/**
				451	* ata_port_freeze - abort & freeze port
				452	* @ap: ATA port to freeze
				453	*
				454	* Abort and freeze @ap.
				455	*
				456	* LOCKING:
				457	* spin_lock_irqsave(host_set lock)
				458	*
				459	* RETURNS:
				460	* Number of aborted commands.
				461	*/
				462	int ata_port_freeze(struct ata_port *ap)
				463	{
				464	int nr_aborted;
				465
				466	WARN_ON(!ap->ops->error_handler);
				467
				468	nr_aborted = ata_port_abort(ap);
				469	__ata_port_freeze(ap);
				470
				471	return nr_aborted;
				472	}
				473
				474	/**
				475	* ata_eh_freeze_port - EH helper to freeze port
				476	* @ap: ATA port to freeze
				477	*
				478	* Freeze @ap.
				479	*
				480	* LOCKING:
				481	* None.
				482	*/
				483	void ata_eh_freeze_port(struct ata_port *ap)
				484	{
				485	unsigned long flags;
				486
				487	if (!ap->ops->error_handler)
				488	return;
				489
				490	spin_lock_irqsave(&ap->host_set->lock, flags);
				491	__ata_port_freeze(ap);
				492	spin_unlock_irqrestore(&ap->host_set->lock, flags);
				493	}
				494
				495	/**
				496	* ata_port_thaw_port - EH helper to thaw port
				497	* @ap: ATA port to thaw
				498	*
				499	* Thaw frozen port @ap.
				500	*
				501	* LOCKING:
				502	* None.
				503	*/
				504	void ata_eh_thaw_port(struct ata_port *ap)
				505	{
				506	unsigned long flags;
				507
				508	if (!ap->ops->error_handler)
				509	return;
				510
				511	spin_lock_irqsave(&ap->host_set->lock, flags);
				512
				513	ap->flags &= ~ATA_FLAG_FROZEN;
				514
				515	if (ap->ops->thaw)
				516	ap->ops->thaw(ap);
				517
				518	spin_unlock_irqrestore(&ap->host_set->lock, flags);
				519
				520	DPRINTK("ata%u port thawed\n", ap->id);
				521	}
				522
Tejun Heo	ece1d63	2006-04-02 18:51:53 +0900	[diff] [blame]	523	static void ata_eh_scsidone(struct scsi_cmnd *scmd)
				524	{
				525	/* nada */
				526	}
				527
				528	static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
				529	{
				530	struct ata_port *ap = qc->ap;
				531	struct scsi_cmnd *scmd = qc->scsicmd;
				532	unsigned long flags;
				533
				534	spin_lock_irqsave(&ap->host_set->lock, flags);
				535	qc->scsidone = ata_eh_scsidone;
				536	__ata_qc_complete(qc);
				537	WARN_ON(ata_tag_valid(qc->tag));
				538	spin_unlock_irqrestore(&ap->host_set->lock, flags);
				539
				540	scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
				541	}
				542
				543	/**
				544	* ata_eh_qc_complete - Complete an active ATA command from EH
				545	* @qc: Command to complete
				546	*
				547	* Indicate to the mid and upper layers that an ATA command has
				548	* completed. To be used from EH.
				549	*/
				550	void ata_eh_qc_complete(struct ata_queued_cmd *qc)
				551	{
				552	struct scsi_cmnd *scmd = qc->scsicmd;
				553	scmd->retries = scmd->allowed;
				554	__ata_eh_qc_complete(qc);
				555	}
				556
				557	/**
				558	* ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
				559	* @qc: Command to retry
				560	*
				561	* Indicate to the mid and upper layers that an ATA command
				562	* should be retried. To be used from EH.
				563	*
				564	* SCSI midlayer limits the number of retries to scmd->allowed.
				565	* scmd->retries is decremented for commands which get retried
				566	* due to unrelated failures (qc->err_mask is zero).
				567	*/
				568	void ata_eh_qc_retry(struct ata_queued_cmd *qc)
				569	{
				570	struct scsi_cmnd *scmd = qc->scsicmd;
				571	if (!qc->err_mask && scmd->retries)
				572	scmd->retries--;
				573	__ata_eh_qc_complete(qc);
				574	}