Blame - drivers/scsi/scsi_error.c - kernel/msm-4.9

blob: c8e351fb816c0480f5496097736215cd382ce7da [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* scsi_error.c Copyright (C) 1997 Eric Youngdale
				3	*
				4	* SCSI error/timeout handling
				5	* Initial versions: Eric Youngdale. Based upon conversations with
				6	* Leonard Zubkoff and David Miller at Linux Expo,
				7	* ideas originating from all over the place.
				8	*
				9	* Restructured scsi_unjam_host and associated functions.
				10	* September 04, 2002 Mike Anderson (andmike@us.ibm.com)
				11	*
				12	* Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
				13	* minor cleanups.
				14	* September 30, 2002 Mike Anderson (andmike@us.ibm.com)
				15	*/
				16
				17	#include <linux/module.h>
				18	#include <linux/sched.h>
				19	#include <linux/timer.h>
				20	#include <linux/string.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include <linux/kernel.h>
Rafael J. Wysocki	8314418	2007-07-17 04:03:35 -0700	[diff] [blame]	22	#include <linux/freezer.h>
Christoph Hellwig	c5478de	2005-09-06 14:04:26 +0200	[diff] [blame]	23	#include <linux/kthread.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/interrupt.h>
				25	#include <linux/blkdev.h>
				26	#include <linux/delay.h>
James Bottomley	355dfa1	2007-05-22 14:43:14 -0500	[diff] [blame]	27	#include <linux/scatterlist.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28
				29	#include <scsi/scsi.h>
Christoph Hellwig	beb4048	2006-06-10 18:01:03 +0200	[diff] [blame]	30	#include <scsi/scsi_cmnd.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31	#include <scsi/scsi_dbg.h>
				32	#include <scsi/scsi_device.h>
				33	#include <scsi/scsi_eh.h>
James Smart	c829c39	2006-03-13 08:28:57 -0500	[diff] [blame]	34	#include <scsi/scsi_transport.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35	#include <scsi/scsi_host.h>
				36	#include <scsi/scsi_ioctl.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37
				38	#include "scsi_priv.h"
				39	#include "scsi_logging.h"
Adrian Bunk	79ee830	2007-08-10 14:50:42 -0700	[diff] [blame^]	40	#include "scsi_transport_api.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41
				42	#define SENSE_TIMEOUT (10*HZ)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43
				44	/*
				45	* These should probably be handled by the host itself.
				46	* Since it is allowed to sleep, it probably should.
				47	*/
				48	#define BUS_RESET_SETTLE_TIME (10)
				49	#define HOST_RESET_SETTLE_TIME (10)
				50
				51	/* called with shost->host_lock held */
				52	void scsi_eh_wakeup(struct Scsi_Host *shost)
				53	{
				54	if (shost->host_busy == shost->host_failed) {
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	55	wake_up_process(shost->ehandler);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	56	SCSI_LOG_ERROR_RECOVERY(5,
				57	printk("Waking error handler thread\n"));
				58	}
				59	}
Tejun Heo	f8bbfc2	2006-05-19 21:07:05 +0900	[diff] [blame]	60
				61	/**
				62	* scsi_schedule_eh - schedule EH for SCSI host
				63	* @shost: SCSI host to invoke error handling on.
				64	*
				65	* Schedule SCSI EH without scmd.
				66	**/
				67	void scsi_schedule_eh(struct Scsi_Host *shost)
				68	{
				69	unsigned long flags;
				70
				71	spin_lock_irqsave(shost->host_lock, flags);
				72
				73	if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 \|\|
				74	scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
				75	shost->host_eh_scheduled++;
				76	scsi_eh_wakeup(shost);
				77	}
				78
				79	spin_unlock_irqrestore(shost->host_lock, flags);
				80	}
				81	EXPORT_SYMBOL_GPL(scsi_schedule_eh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	82
				83	/**
				84	* scsi_eh_scmd_add - add scsi cmd to error handling.
				85	* @scmd: scmd to run eh on.
				86	* @eh_flag: optional SCSI_EH flag.
				87	*
				88	* Return value:
				89	* 0 on failure.
				90	**/
				91	int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
				92	{
				93	struct Scsi_Host *shost = scmd->device->host;
				94	unsigned long flags;
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	95	int ret = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	96
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	97	if (!shost->ehandler)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	98	return 0;
				99
				100	spin_lock_irqsave(shost->host_lock, flags);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	101	if (scsi_host_set_state(shost, SHOST_RECOVERY))
				102	if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
				103	goto out_unlock;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	104
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	105	ret = 1;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	106	scmd->eh_eflags \|= eh_flag;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	107	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	108	shost->host_failed++;
				109	scsi_eh_wakeup(shost);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	110	out_unlock:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	111	spin_unlock_irqrestore(shost->host_lock, flags);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	112	return ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	113	}
				114
				115	/**
				116	* scsi_add_timer - Start timeout timer for a single scsi command.
				117	* @scmd: scsi command that is about to start running.
				118	* @timeout: amount of time to allow this command to run.
				119	* @complete: timeout function to call if timer isn't canceled.
				120	*
				121	* Notes:
				122	* This should be turned into an inline function. Each scsi command
				123	* has its own timer, and as it is added to the queue, we set up the
				124	* timer. When the command completes, we cancel the timer.
				125	**/
				126	void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
				127	void (complete)(struct scsi_cmnd ))
				128	{
				129
				130	/*
				131	* If the clock was already running for this command, then
				132	* first delete the timer. The timer handling code gets rather
				133	* confused if we don't do this.
				134	*/
				135	if (scmd->eh_timeout.function)
				136	del_timer(&scmd->eh_timeout);
				137
				138	scmd->eh_timeout.data = (unsigned long)scmd;
				139	scmd->eh_timeout.expires = jiffies + timeout;
				140	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
				141
				142	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
				143	" %d, (%p)\n", __FUNCTION__,
				144	scmd, timeout, complete));
				145
				146	add_timer(&scmd->eh_timeout);
				147	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	148
				149	/**
				150	* scsi_delete_timer - Delete/cancel timer for a given function.
				151	* @scmd: Cmd that we are canceling timer for
				152	*
				153	* Notes:
				154	* This should be turned into an inline function.
				155	*
				156	* Return value:
				157	* 1 if we were able to detach the timer. 0 if we blew it, and the
				158	* timer function has already started to run.
				159	**/
				160	int scsi_delete_timer(struct scsi_cmnd *scmd)
				161	{
				162	int rtn;
				163
				164	rtn = del_timer(&scmd->eh_timeout);
				165
				166	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
				167	" rtn: %d\n", __FUNCTION__,
				168	scmd, rtn));
				169
				170	scmd->eh_timeout.data = (unsigned long)NULL;
				171	scmd->eh_timeout.function = NULL;
				172
				173	return rtn;
				174	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175
				176	/**
				177	* scsi_times_out - Timeout function for normal scsi commands.
				178	* @scmd: Cmd that is timing out.
				179	*
				180	* Notes:
				181	* We do not need to lock this. There is the potential for a race
				182	* only in that the normal completion handling might run, but if the
				183	* normal completion function determines that the timer has already
				184	* fired, then it mustn't do anything.
				185	**/
				186	void scsi_times_out(struct scsi_cmnd *scmd)
				187	{
James Bottomley	6c5f8ce	2007-03-16 17:44:41 -0500	[diff] [blame]	188	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
				189
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	190	scsi_log_completion(scmd, TIMEOUT_ERROR);
				191
James Smart	c829c39	2006-03-13 08:28:57 -0500	[diff] [blame]	192	if (scmd->device->host->transportt->eh_timed_out)
James Bottomley	6c5f8ce	2007-03-16 17:44:41 -0500	[diff] [blame]	193	eh_timed_out = scmd->device->host->transportt->eh_timed_out;
				194	else if (scmd->device->host->hostt->eh_timed_out)
				195	eh_timed_out = scmd->device->host->hostt->eh_timed_out;
				196	else
				197	eh_timed_out = NULL;
				198
				199	if (eh_timed_out)
				200	switch (eh_timed_out(scmd)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201	case EH_HANDLED:
				202	__scsi_done(scmd);
				203	return;
				204	case EH_RESET_TIMER:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	205	scsi_add_timer(scmd, scmd->timeout_per_command,
				206	scsi_times_out);
				207	return;
				208	case EH_NOT_HANDLED:
				209	break;
				210	}
				211
				212	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	213	scmd->result \|= DID_TIME_OUT << 16;
				214	__scsi_done(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	215	}
				216	}
				217
				218	/**
				219	* scsi_block_when_processing_errors - Prevent cmds from being queued.
				220	* @sdev: Device on which we are performing recovery.
				221	*
				222	* Description:
				223	* We block until the host is out of error recovery, and then check to
				224	* see whether the host or the device is offline.
				225	*
				226	* Return value:
				227	* 0 when dev was taken offline by error recovery. 1 OK to proceed.
				228	**/
				229	int scsi_block_when_processing_errors(struct scsi_device *sdev)
				230	{
				231	int online;
				232
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	233	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	234
				235	online = scsi_device_online(sdev);
				236
				237	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
				238	online));
				239
				240	return online;
				241	}
				242	EXPORT_SYMBOL(scsi_block_when_processing_errors);
				243
				244	#ifdef CONFIG_SCSI_LOGGING
				245	/**
				246	* scsi_eh_prt_fail_stats - Log info on failures.
				247	* @shost: scsi host being recovered.
				248	* @work_q: Queue of scsi cmds to process.
				249	**/
				250	static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
				251	struct list_head *work_q)
				252	{
				253	struct scsi_cmnd *scmd;
				254	struct scsi_device *sdev;
				255	int total_failures = 0;
				256	int cmd_failed = 0;
				257	int cmd_cancel = 0;
				258	int devices_failed = 0;
				259
				260	shost_for_each_device(sdev, shost) {
				261	list_for_each_entry(scmd, work_q, eh_entry) {
				262	if (scmd->device == sdev) {
				263	++total_failures;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	264	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	265	++cmd_cancel;
				266	else
				267	++cmd_failed;
				268	}
				269	}
				270
				271	if (cmd_cancel \|\| cmd_failed) {
				272	SCSI_LOG_ERROR_RECOVERY(3,
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame]	273	sdev_printk(KERN_INFO, sdev,
				274	"%s: cmds failed: %d, cancel: %d\n",
				275	__FUNCTION__, cmd_failed,
				276	cmd_cancel));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	277	cmd_cancel = 0;
				278	cmd_failed = 0;
				279	++devices_failed;
				280	}
				281	}
				282
				283	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
				284	" devices require eh work\n",
				285	total_failures, devices_failed));
				286	}
				287	#endif
				288
				289	/**
				290	* scsi_check_sense - Examine scsi cmd sense
				291	* @scmd: Cmd to have sense checked.
				292	*
				293	* Return value:
				294	* SUCCESS or FAILED or NEEDS_RETRY
				295	*
				296	* Notes:
				297	* When a deferred error is detected the current command has
				298	* not been executed and needs retrying.
				299	**/
				300	static int scsi_check_sense(struct scsi_cmnd *scmd)
				301	{
				302	struct scsi_sense_hdr sshdr;
				303
				304	if (! scsi_command_normalize_sense(scmd, &sshdr))
				305	return FAILED; /* no valid sense data */
				306
				307	if (scsi_sense_is_deferred(&sshdr))
				308	return NEEDS_RETRY;
				309
				310	/*
				311	* Previous logic looked for FILEMARK, EOM or ILI which are
				312	* mainly associated with tapes and returned SUCCESS.
				313	*/
				314	if (sshdr.response_code == 0x70) {
				315	/* fixed format */
				316	if (scmd->sense_buffer[2] & 0xe0)
				317	return SUCCESS;
				318	} else {
				319	/*
				320	* descriptor format: look for "stream commands sense data
				321	* descriptor" (see SSC-3). Assume single sense data
				322	* descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
				323	*/
				324	if ((sshdr.additional_length > 3) &&
				325	(scmd->sense_buffer[8] == 0x4) &&
				326	(scmd->sense_buffer[11] & 0xe0))
				327	return SUCCESS;
				328	}
				329
				330	switch (sshdr.sense_key) {
				331	case NO_SENSE:
				332	return SUCCESS;
				333	case RECOVERED_ERROR:
				334	return /* soft_error */ SUCCESS;
				335
				336	case ABORTED_COMMAND:
				337	return NEEDS_RETRY;
				338	case NOT_READY:
				339	case UNIT_ATTENTION:
				340	/*
				341	* if we are expecting a cc/ua because of a bus reset that we
				342	* performed, treat this just as a retry. otherwise this is
				343	* information that we should pass up to the upper-level driver
				344	* so that we can deal with it there.
				345	*/
				346	if (scmd->device->expecting_cc_ua) {
				347	scmd->device->expecting_cc_ua = 0;
				348	return NEEDS_RETRY;
				349	}
				350	/*
				351	* if the device is in the process of becoming ready, we
				352	* should retry.
				353	*/
				354	if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
				355	return NEEDS_RETRY;
				356	/*
				357	* if the device is not started, we need to wake
				358	* the error handler to start the motor
				359	*/
				360	if (scmd->device->allow_restart &&
				361	(sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
				362	return FAILED;
				363	return SUCCESS;
				364
				365	/* these three are not supported */
				366	case COPY_ABORTED:
				367	case VOLUME_OVERFLOW:
				368	case MISCOMPARE:
				369	return SUCCESS;
				370
				371	case MEDIUM_ERROR:
Luben Tuikov	fd1b494	2006-11-29 19:45:23 -0800	[diff] [blame]	372	if (sshdr.asc == 0x11 \|\| /* UNRECOVERED READ ERR */
				373	sshdr.asc == 0x13 \|\| /* AMNF DATA FIELD */
				374	sshdr.asc == 0x14) { /* RECORD NOT FOUND */
				375	return SUCCESS;
				376	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	377	return NEEDS_RETRY;
				378
				379	case HARDWARE_ERROR:
				380	if (scmd->device->retry_hwerror)
				381	return NEEDS_RETRY;
				382	else
				383	return SUCCESS;
				384
				385	case ILLEGAL_REQUEST:
				386	case BLANK_CHECK:
				387	case DATA_PROTECT:
				388	default:
				389	return SUCCESS;
				390	}
				391	}
				392
				393	/**
				394	* scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
				395	* @scmd: SCSI cmd to examine.
				396	*
				397	* Notes:
				398	* This is only called when we are examining the status of commands
				399	* queued during error recovery. the main difference here is that we
				400	* don't allow for the possibility of retries here, and we are a lot
				401	* more restrictive about what we consider acceptable.
				402	**/
				403	static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
				404	{
				405	/*
				406	* first check the host byte, to see if there is anything in there
				407	* that would indicate what we need to do.
				408	*/
				409	if (host_byte(scmd->result) == DID_RESET) {
				410	/*
				411	* rats. we are already in the error handler, so we now
				412	* get to try and figure out what to do next. if the sense
				413	* is valid, we have a pretty good idea of what to do.
				414	* if not, we mark it as FAILED.
				415	*/
				416	return scsi_check_sense(scmd);
				417	}
				418	if (host_byte(scmd->result) != DID_OK)
				419	return FAILED;
				420
				421	/*
				422	* next, check the message byte.
				423	*/
				424	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				425	return FAILED;
				426
				427	/*
				428	* now, check the status byte to see if this indicates
				429	* anything special.
				430	*/
				431	switch (status_byte(scmd->result)) {
				432	case GOOD:
				433	case COMMAND_TERMINATED:
				434	return SUCCESS;
				435	case CHECK_CONDITION:
				436	return scsi_check_sense(scmd);
				437	case CONDITION_GOOD:
				438	case INTERMEDIATE_GOOD:
				439	case INTERMEDIATE_C_GOOD:
				440	/*
				441	* who knows? FIXME(eric)
				442	*/
				443	return SUCCESS;
				444	case BUSY:
				445	case QUEUE_FULL:
				446	case RESERVATION_CONFLICT:
				447	default:
				448	return FAILED;
				449	}
				450	return FAILED;
				451	}
				452
				453	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	454	* scsi_eh_done - Completion function for error handling.
				455	* @scmd: Cmd that is done.
				456	**/
				457	static void scsi_eh_done(struct scsi_cmnd *scmd)
				458	{
Michael Reed	8563167	2005-12-07 21:46:27 -0600	[diff] [blame]	459	struct completion *eh_action;
				460
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	461	SCSI_LOG_ERROR_RECOVERY(3,
				462	printk("%s scmd: %p result: %x\n",
				463	__FUNCTION__, scmd, scmd->result));
Michael Reed	8563167	2005-12-07 21:46:27 -0600	[diff] [blame]	464
				465	eh_action = scmd->device->host->eh_action;
				466	if (eh_action)
				467	complete(eh_action);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	468	}
				469
				470	/**
Brian King	292148f	2007-01-30 17:51:17 -0600	[diff] [blame]	471	* scsi_try_host_reset - ask host adapter to reset itself
				472	* @scmd: SCSI cmd to send hsot reset.
				473	**/
				474	static int scsi_try_host_reset(struct scsi_cmnd *scmd)
				475	{
				476	unsigned long flags;
				477	int rtn;
				478
				479	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
				480	__FUNCTION__));
				481
				482	if (!scmd->device->host->hostt->eh_host_reset_handler)
				483	return FAILED;
				484
				485	rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
				486
				487	if (rtn == SUCCESS) {
				488	if (!scmd->device->host->hostt->skip_settle_delay)
				489	ssleep(HOST_RESET_SETTLE_TIME);
				490	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				491	scsi_report_bus_reset(scmd->device->host,
				492	scmd_channel(scmd));
				493	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				494	}
				495
				496	return rtn;
				497	}
				498
				499	/**
				500	* scsi_try_bus_reset - ask host to perform a bus reset
				501	* @scmd: SCSI cmd to send bus reset.
				502	**/
				503	static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
				504	{
				505	unsigned long flags;
				506	int rtn;
				507
				508	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
				509	__FUNCTION__));
				510
				511	if (!scmd->device->host->hostt->eh_bus_reset_handler)
				512	return FAILED;
				513
				514	rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
				515
				516	if (rtn == SUCCESS) {
				517	if (!scmd->device->host->hostt->skip_settle_delay)
				518	ssleep(BUS_RESET_SETTLE_TIME);
				519	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				520	scsi_report_bus_reset(scmd->device->host,
				521	scmd_channel(scmd));
				522	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				523	}
				524
				525	return rtn;
				526	}
				527
				528	/**
				529	* scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
				530	* @scmd: SCSI cmd used to send BDR
				531	*
				532	* Notes:
				533	* There is no timeout for this operation. if this operation is
				534	* unreliable for a given host, then the host itself needs to put a
				535	* timer on it, and set the host back to a consistent state prior to
				536	* returning.
				537	**/
				538	static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
				539	{
				540	int rtn;
				541
				542	if (!scmd->device->host->hostt->eh_device_reset_handler)
				543	return FAILED;
				544
				545	rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
				546	if (rtn == SUCCESS) {
				547	scmd->device->was_reset = 1;
				548	scmd->device->expecting_cc_ua = 1;
				549	}
				550
				551	return rtn;
				552	}
				553
				554	static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
				555	{
				556	if (!scmd->device->host->hostt->eh_abort_handler)
				557	return FAILED;
				558
				559	return scmd->device->host->hostt->eh_abort_handler(scmd);
				560	}
				561
				562	/**
				563	* scsi_try_to_abort_cmd - Ask host to abort a running command.
				564	* @scmd: SCSI cmd to abort from Lower Level.
				565	*
				566	* Notes:
				567	* This function will not return until the user's completion function
				568	* has been called. there is no timeout on this operation. if the
				569	* author of the low-level driver wishes this operation to be timed,
				570	* they can provide this facility themselves. helper functions in
				571	* scsi_error.c can be supplied to make this easier to do.
				572	**/
				573	static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
				574	{
				575	/*
				576	* scsi_done was called just after the command timed out and before
				577	* we had a chance to process it. (db)
				578	*/
				579	if (scmd->serial_number == 0)
				580	return SUCCESS;
				581	return __scsi_try_to_abort_cmd(scmd);
				582	}
				583
				584	static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
				585	{
				586	if (__scsi_try_to_abort_cmd(scmd) != SUCCESS)
				587	if (scsi_try_bus_device_reset(scmd) != SUCCESS)
				588	if (scsi_try_bus_reset(scmd) != SUCCESS)
				589	scsi_try_host_reset(scmd);
				590	}
				591
				592	/**
Christoph Hellwig	2dc611d	2006-11-04 20:04:21 +0100	[diff] [blame]	593	* scsi_send_eh_cmnd - submit a scsi command as part of error recory
				594	* @scmd: SCSI command structure to hijack
				595	* @cmnd: CDB to send
				596	* @cmnd_size: size in bytes of @cmnd
				597	* @timeout: timeout for this request
				598	* @copy_sense: request sense data if set to 1
				599	*
				600	* This function is used to send a scsi command down to a target device
				601	* as part of the error recovery process. If @copy_sense is 0 the command
				602	* sent must be one that does not transfer any data. If @copy_sense is 1
				603	* the command must be REQUEST_SENSE and this functions copies out the
				604	* sense buffer it got into @scmd->sense_buffer.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	605	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	606	* Return value:
				607	* SUCCESS or FAILED or NEEDS_RETRY
				608	**/
Mike Christie	0db99e3	2006-08-26 03:00:22 -0400	[diff] [blame]	609	static int scsi_send_eh_cmnd(struct scsi_cmnd scmd, unsigned char cmnd,
				610	int cmnd_size, int timeout, int copy_sense)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	611	{
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	612	struct scsi_device *sdev = scmd->device;
				613	struct Scsi_Host *shost = sdev->host;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	614	int old_result = scmd->result;
Stefan Richter	7fbb364	2006-09-12 20:35:54 -0700	[diff] [blame]	615	DECLARE_COMPLETION_ONSTACK(done);
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	616	unsigned long timeleft;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	617	unsigned long flags;
Christoph Hellwig	2dc611d	2006-11-04 20:04:21 +0100	[diff] [blame]	618	struct scatterlist sgl;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	619	unsigned char old_cmnd[MAX_COMMAND_SIZE];
				620	enum dma_data_direction old_data_direction;
				621	unsigned short old_use_sg;
				622	unsigned char old_cmd_len;
				623	unsigned old_bufflen;
				624	void *old_buffer;
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	625	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	626
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	627	/*
				628	* We need saved copies of a number of fields - this is because
				629	* error handling may need to overwrite these with different values
				630	* to run different commands, and once error handling is complete,
				631	* we will need to restore these values prior to running the actual
				632	* command.
				633	*/
				634	old_buffer = scmd->request_buffer;
				635	old_bufflen = scmd->request_bufflen;
				636	memcpy(old_cmnd, scmd->cmnd, sizeof(scmd->cmnd));
				637	old_data_direction = scmd->sc_data_direction;
				638	old_cmd_len = scmd->cmd_len;
				639	old_use_sg = scmd->use_sg;
				640
Mike Christie	0db99e3	2006-08-26 03:00:22 -0400	[diff] [blame]	641	memset(scmd->cmnd, 0, sizeof(scmd->cmnd));
				642	memcpy(scmd->cmnd, cmnd, cmnd_size);
				643
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	644	if (copy_sense) {
James Bottomley	355dfa1	2007-05-22 14:43:14 -0500	[diff] [blame]	645	sg_init_one(&sgl, scmd->sense_buffer,
				646	sizeof(scmd->sense_buffer));
Christoph Hellwig	2dc611d	2006-11-04 20:04:21 +0100	[diff] [blame]	647
				648	scmd->sc_data_direction = DMA_FROM_DEVICE;
				649	scmd->request_bufflen = sgl.length;
				650	scmd->request_buffer = &sgl;
				651	scmd->use_sg = 1;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	652	} else {
				653	scmd->request_buffer = NULL;
				654	scmd->request_bufflen = 0;
				655	scmd->sc_data_direction = DMA_NONE;
Christoph Hellwig	2dc611d	2006-11-04 20:04:21 +0100	[diff] [blame]	656	scmd->use_sg = 0;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	657	}
				658
				659	scmd->underflow = 0;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	660	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				661
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	662	if (sdev->scsi_level <= SCSI_2)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	663	scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) \|
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	664	(sdev->lun << 5 & 0xe0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	665
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	666	/*
				667	* Zero the sense buffer. The scsi spec mandates that any
				668	* untransferred sense data should be interpreted as being zero.
				669	*/
				670	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				671
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	672	shost->eh_action = &done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	673
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	674	spin_lock_irqsave(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	675	scsi_log_send(scmd);
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	676	shost->hostt->queuecommand(scmd, scsi_eh_done);
				677	spin_unlock_irqrestore(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	678
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	679	timeleft = wait_for_completion_timeout(&done, timeout);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	680
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	681	shost->eh_action = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	682
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	683	scsi_log_completion(scmd, SUCCESS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	684
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	685	SCSI_LOG_ERROR_RECOVERY(3,
				686	printk("%s: scmd: %p, timeleft: %ld\n",
				687	__FUNCTION__, scmd, timeleft));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	688
				689	/*
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	690	* If there is time left scsi_eh_done got called, and we will
				691	* examine the actual status codes to see whether the command
				692	* actually did complete normally, else tell the host to forget
				693	* about this command.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	694	*/
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	695	if (timeleft) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	696	rtn = scsi_eh_completed_normally(scmd);
				697	SCSI_LOG_ERROR_RECOVERY(3,
				698	printk("%s: scsi_eh_completed_normally %x\n",
				699	__FUNCTION__, rtn));
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	700
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	701	switch (rtn) {
				702	case SUCCESS:
				703	case NEEDS_RETRY:
				704	case FAILED:
				705	break;
				706	default:
				707	rtn = FAILED;
				708	break;
				709	}
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	710	} else {
Brian King	292148f	2007-01-30 17:51:17 -0600	[diff] [blame]	711	scsi_abort_eh_cmnd(scmd);
Christoph Hellwig	7dfdc9a	2005-10-31 18:49:52 +0100	[diff] [blame]	712	rtn = FAILED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	713	}
				714
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	715
				716	/*
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	717	* Restore original data
				718	*/
				719	scmd->request_buffer = old_buffer;
				720	scmd->request_bufflen = old_bufflen;
				721	memcpy(scmd->cmnd, old_cmnd, sizeof(scmd->cmnd));
				722	scmd->sc_data_direction = old_data_direction;
				723	scmd->cmd_len = old_cmd_len;
				724	scmd->use_sg = old_use_sg;
				725	scmd->result = old_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	726	return rtn;
				727	}
				728
				729	/**
				730	* scsi_request_sense - Request sense data from a particular target.
				731	* @scmd: SCSI cmd for request sense.
				732	*
				733	* Notes:
				734	* Some hosts automatically obtain this information, others require
				735	* that we obtain it on our own. This function will not return until
				736	* the command either times out, or it completes.
				737	**/
				738	static int scsi_request_sense(struct scsi_cmnd *scmd)
				739	{
				740	static unsigned char generic_sense[6] =
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	741	{REQUEST_SENSE, 0, 0, 0, 252, 0};
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	742
Mike Christie	0db99e3	2006-08-26 03:00:22 -0400	[diff] [blame]	743	return scsi_send_eh_cmnd(scmd, generic_sense, 6, SENSE_TIMEOUT, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	744	}
				745
				746	/**
				747	* scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
				748	* @scmd: Original SCSI cmd that eh has finished.
				749	* @done_q: Queue for processed commands.
				750	*
				751	* Notes:
				752	* We don't want to use the normal command completion while we are are
				753	* still handling errors - it may cause other commands to be queued,
				754	* and that would disturb what we are doing. thus we really want to
				755	* keep a list of pending commands for final completion, and once we
				756	* are ready to leave error handling we handle completion for real.
				757	**/
Tejun Heo	041c5fc	2006-01-23 13:09:36 +0900	[diff] [blame]	758	void scsi_eh_finish_cmd(struct scsi_cmnd scmd, struct list_head done_q)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	759	{
				760	scmd->device->host->host_failed--;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	761	scmd->eh_eflags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	762	list_move_tail(&scmd->eh_entry, done_q);
				763	}
Tejun Heo	041c5fc	2006-01-23 13:09:36 +0900	[diff] [blame]	764	EXPORT_SYMBOL(scsi_eh_finish_cmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	765
				766	/**
				767	* scsi_eh_get_sense - Get device sense data.
				768	* @work_q: Queue of commands to process.
				769	* @done_q: Queue of proccessed commands..
				770	*
				771	* Description:
				772	* See if we need to request sense information. if so, then get it
				773	* now, so we have a better idea of what to do.
				774	*
				775	* Notes:
				776	* This has the unfortunate side effect that if a shost adapter does
				777	* not automatically request sense information, that we end up shutting
				778	* it down before we request it.
				779	*
				780	* All drivers should request sense information internally these days,
				781	* so for now all I have to say is tough noogies if you end up in here.
				782	*
				783	* XXX: Long term this code should go away, but that needs an audit of
				784	* all LLDDs first.
				785	**/
Darrick J. Wong	dca84e4	2007-01-26 14:08:49 -0800	[diff] [blame]	786	int scsi_eh_get_sense(struct list_head *work_q,
				787	struct list_head *done_q)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	788	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	789	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	790	int rtn;
				791
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	792	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	793	if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	794	SCSI_SENSE_VALID(scmd))
				795	continue;
				796
Jeff Garzik	3bf743e	2005-10-24 18:04:06 -0400	[diff] [blame]	797	SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
				798	"%s: requesting sense\n",
				799	current->comm));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	800	rtn = scsi_request_sense(scmd);
				801	if (rtn != SUCCESS)
				802	continue;
				803
				804	SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
				805	" result %x\n", scmd,
				806	scmd->result));
				807	SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
				808
				809	rtn = scsi_decide_disposition(scmd);
				810
				811	/*
				812	* if the result was normal, then just pass it along to the
				813	* upper level.
				814	*/
				815	if (rtn == SUCCESS)
				816	/* we don't want this command reissued, just
				817	* finished with the sense data, so set
				818	* retries to the max allowed to ensure it
				819	* won't get reissued */
				820	scmd->retries = scmd->allowed;
				821	else if (rtn != NEEDS_RETRY)
				822	continue;
				823
				824	scsi_eh_finish_cmd(scmd, done_q);
				825	}
				826
				827	return list_empty(work_q);
				828	}
Darrick J. Wong	dca84e4	2007-01-26 14:08:49 -0800	[diff] [blame]	829	EXPORT_SYMBOL_GPL(scsi_eh_get_sense);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	830
				831	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	832	* scsi_eh_tur - Send TUR to device.
				833	* @scmd: Scsi cmd to send TUR
				834	*
				835	* Return value:
				836	* 0 - Device is ready. 1 - Device NOT ready.
				837	**/
				838	static int scsi_eh_tur(struct scsi_cmnd *scmd)
				839	{
				840	static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
				841	int retry_cnt = 1, rtn;
				842
				843	retry_tur:
Mike Christie	0db99e3	2006-08-26 03:00:22 -0400	[diff] [blame]	844	rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, SENSE_TIMEOUT, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	845
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	846	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
				847	__FUNCTION__, scmd, rtn));
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	848
				849	switch (rtn) {
				850	case NEEDS_RETRY:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	851	if (retry_cnt--)
				852	goto retry_tur;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	853	/FALLTHRU/
				854	case SUCCESS:
Alan Stern	e47373e	2005-03-30 15:05:45 -0500	[diff] [blame]	855	return 0;
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	856	default:
				857	return 1;
Alan Stern	e47373e	2005-03-30 15:05:45 -0500	[diff] [blame]	858	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	859	}
				860
				861	/**
				862	* scsi_eh_abort_cmds - abort canceled commands.
				863	* @shost: scsi host being recovered.
				864	* @eh_done_q: list_head for processed commands.
				865	*
				866	* Decription:
				867	* Try and see whether or not it makes sense to try and abort the
				868	* running command. this only works out to be the case if we have one
				869	* command that has timed out. if the command simply failed, it makes
				870	* no sense to try and abort the command, since as far as the shost
				871	* adapter is concerned, it isn't running.
				872	**/
				873	static int scsi_eh_abort_cmds(struct list_head *work_q,
				874	struct list_head *done_q)
				875	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	876	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877	int rtn;
				878
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	879	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	880	if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	881	continue;
				882	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
				883	"0x%p\n", current->comm,
				884	scmd));
				885	rtn = scsi_try_to_abort_cmd(scmd);
				886	if (rtn == SUCCESS) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	887	scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	888	if (!scsi_device_online(scmd->device) \|\|
				889	!scsi_eh_tur(scmd)) {
				890	scsi_eh_finish_cmd(scmd, done_q);
				891	}
				892
				893	} else
				894	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
				895	" cmd failed:"
				896	"0x%p\n",
				897	current->comm,
				898	scmd));
				899	}
				900
				901	return list_empty(work_q);
				902	}
				903
				904	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	905	* scsi_eh_try_stu - Send START_UNIT to device.
				906	* @scmd: Scsi cmd to send START_UNIT
				907	*
				908	* Return value:
				909	* 0 - Device is ready. 1 - Device NOT ready.
				910	**/
				911	static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
				912	{
				913	static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	914
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	915	if (scmd->device->allow_restart) {
Brian King	ed773e6	2007-03-29 15:25:52 -0500	[diff] [blame]	916	int i, rtn = NEEDS_RETRY;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	917
Brian King	ed773e6	2007-03-29 15:25:52 -0500	[diff] [blame]	918	for (i = 0; rtn == NEEDS_RETRY && i < 2; i++)
				919	rtn = scsi_send_eh_cmnd(scmd, stu_command, 6,
Brian King	e555db9	2007-04-19 13:59:59 -0500	[diff] [blame]	920	scmd->device->timeout, 0);
Brian King	ed773e6	2007-03-29 15:25:52 -0500	[diff] [blame]	921
Christoph Hellwig	631c228	2006-07-08 20:42:15 +0200	[diff] [blame]	922	if (rtn == SUCCESS)
				923	return 0;
				924	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	925
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	926	return 1;
				927	}
				928
				929	/**
				930	* scsi_eh_stu - send START_UNIT if needed
				931	* @shost: scsi host being recovered.
				932	* @eh_done_q: list_head for processed commands.
				933	*
				934	* Notes:
				935	* If commands are failing due to not ready, initializing command required,
				936	* try revalidating the device, which will end up sending a start unit.
				937	**/
				938	static int scsi_eh_stu(struct Scsi_Host *shost,
				939	struct list_head *work_q,
				940	struct list_head *done_q)
				941	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	942	struct scsi_cmnd scmd, stu_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	943	struct scsi_device *sdev;
				944
				945	shost_for_each_device(sdev, shost) {
				946	stu_scmd = NULL;
				947	list_for_each_entry(scmd, work_q, eh_entry)
				948	if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
				949	scsi_check_sense(scmd) == FAILED ) {
				950	stu_scmd = scmd;
				951	break;
				952	}
				953
				954	if (!stu_scmd)
				955	continue;
				956
				957	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
				958	" 0x%p\n", current->comm, sdev));
				959
				960	if (!scsi_eh_try_stu(stu_scmd)) {
				961	if (!scsi_device_online(sdev) \|\|
				962	!scsi_eh_tur(stu_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	963	list_for_each_entry_safe(scmd, next,
				964	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	965	if (scmd->device == sdev)
				966	scsi_eh_finish_cmd(scmd, done_q);
				967	}
				968	}
				969	} else {
				970	SCSI_LOG_ERROR_RECOVERY(3,
				971	printk("%s: START_UNIT failed to sdev:"
				972	" 0x%p\n", current->comm, sdev));
				973	}
				974	}
				975
				976	return list_empty(work_q);
				977	}
				978
				979
				980	/**
				981	* scsi_eh_bus_device_reset - send bdr if needed
				982	* @shost: scsi host being recovered.
				983	* @eh_done_q: list_head for processed commands.
				984	*
				985	* Notes:
				986	* Try a bus device reset. still, look to see whether we have multiple
				987	* devices that are jammed or not - if we have multiple devices, it
				988	* makes no sense to try bus_device_reset - we really would need to try
				989	* a bus_reset instead.
				990	**/
				991	static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
				992	struct list_head *work_q,
				993	struct list_head *done_q)
				994	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	995	struct scsi_cmnd scmd, bdr_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	996	struct scsi_device *sdev;
				997	int rtn;
				998
				999	shost_for_each_device(sdev, shost) {
				1000	bdr_scmd = NULL;
				1001	list_for_each_entry(scmd, work_q, eh_entry)
				1002	if (scmd->device == sdev) {
				1003	bdr_scmd = scmd;
				1004	break;
				1005	}
				1006
				1007	if (!bdr_scmd)
				1008	continue;
				1009
				1010	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
				1011	" 0x%p\n", current->comm,
				1012	sdev));
				1013	rtn = scsi_try_bus_device_reset(bdr_scmd);
				1014	if (rtn == SUCCESS) {
				1015	if (!scsi_device_online(sdev) \|\|
				1016	!scsi_eh_tur(bdr_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1017	list_for_each_entry_safe(scmd, next,
				1018	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1019	if (scmd->device == sdev)
				1020	scsi_eh_finish_cmd(scmd,
				1021	done_q);
				1022	}
				1023	}
				1024	} else {
				1025	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
				1026	" failed sdev:"
				1027	"0x%p\n",
				1028	current->comm,
				1029	sdev));
				1030	}
				1031	}
				1032
				1033	return list_empty(work_q);
				1034	}
				1035
				1036	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1037	* scsi_eh_bus_reset - send a bus reset
				1038	* @shost: scsi host being recovered.
				1039	* @eh_done_q: list_head for processed commands.
				1040	**/
				1041	static int scsi_eh_bus_reset(struct Scsi_Host *shost,
				1042	struct list_head *work_q,
				1043	struct list_head *done_q)
				1044	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1045	struct scsi_cmnd scmd, chan_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1046	unsigned int channel;
				1047	int rtn;
				1048
				1049	/*
				1050	* we really want to loop over the various channels, and do this on
				1051	* a channel by channel basis. we should also check to see if any
				1052	* of the failed commands are on soft_reset devices, and if so, skip
				1053	* the reset.
				1054	*/
				1055
				1056	for (channel = 0; channel <= shost->max_channel; channel++) {
				1057	chan_scmd = NULL;
				1058	list_for_each_entry(scmd, work_q, eh_entry) {
Jeff Garzik	422c0d6	2005-10-24 18:05:09 -0400	[diff] [blame]	1059	if (channel == scmd_channel(scmd)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1060	chan_scmd = scmd;
				1061	break;
				1062	/*
				1063	* FIXME add back in some support for
				1064	* soft_reset devices.
				1065	*/
				1066	}
				1067	}
				1068
				1069	if (!chan_scmd)
				1070	continue;
				1071	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
				1072	" %d\n", current->comm,
				1073	channel));
				1074	rtn = scsi_try_bus_reset(chan_scmd);
				1075	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1076	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Jeff Garzik	422c0d6	2005-10-24 18:05:09 -0400	[diff] [blame]	1077	if (channel == scmd_channel(scmd))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1078	if (!scsi_device_online(scmd->device) \|\|
				1079	!scsi_eh_tur(scmd))
				1080	scsi_eh_finish_cmd(scmd,
				1081	done_q);
				1082	}
				1083	} else {
				1084	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
				1085	" failed chan: %d\n",
				1086	current->comm,
				1087	channel));
				1088	}
				1089	}
				1090	return list_empty(work_q);
				1091	}
				1092
				1093	/**
				1094	* scsi_eh_host_reset - send a host reset
				1095	* @work_q: list_head for processed commands.
				1096	* @done_q: list_head for processed commands.
				1097	**/
				1098	static int scsi_eh_host_reset(struct list_head *work_q,
				1099	struct list_head *done_q)
				1100	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1101	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1102	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1103
				1104	if (!list_empty(work_q)) {
				1105	scmd = list_entry(work_q->next,
				1106	struct scsi_cmnd, eh_entry);
				1107
				1108	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
				1109	, current->comm));
				1110
				1111	rtn = scsi_try_host_reset(scmd);
				1112	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1113	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1114	if (!scsi_device_online(scmd->device) \|\|
				1115	(!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) \|\|
				1116	!scsi_eh_tur(scmd))
				1117	scsi_eh_finish_cmd(scmd, done_q);
				1118	}
				1119	} else {
				1120	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
				1121	" failed\n",
				1122	current->comm));
				1123	}
				1124	}
				1125	return list_empty(work_q);
				1126	}
				1127
				1128	/**
				1129	* scsi_eh_offline_sdevs - offline scsi devices that fail to recover
				1130	* @work_q: list_head for processed commands.
				1131	* @done_q: list_head for processed commands.
				1132	*
				1133	**/
				1134	static void scsi_eh_offline_sdevs(struct list_head *work_q,
				1135	struct list_head *done_q)
				1136	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1137	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1138
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1139	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame]	1140	sdev_printk(KERN_INFO, scmd->device,
				1141	"scsi: Device offlined - not"
				1142	" ready after error recovery\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1143	scsi_device_set_state(scmd->device, SDEV_OFFLINE);
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	1144	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1145	/*
				1146	* FIXME: Handle lost cmds.
				1147	*/
				1148	}
				1149	scsi_eh_finish_cmd(scmd, done_q);
				1150	}
				1151	return;
				1152	}
				1153
				1154	/**
				1155	* scsi_decide_disposition - Disposition a cmd on return from LLD.
				1156	* @scmd: SCSI cmd to examine.
				1157	*
				1158	* Notes:
				1159	* This is only called when we are examining the status after sending
				1160	* out the actual data command. any commands that are queued for error
				1161	* recovery (e.g. test_unit_ready) do not come through here.
				1162	*
				1163	* When this routine returns failed, it means the error handler thread
				1164	* is woken. In cases where the error code indicates an error that
				1165	* doesn't require the error handler read (i.e. we don't need to
				1166	* abort/reset), this function should return SUCCESS.
				1167	**/
				1168	int scsi_decide_disposition(struct scsi_cmnd *scmd)
				1169	{
				1170	int rtn;
				1171
				1172	/*
				1173	* if the device is offline, then we clearly just pass the result back
				1174	* up to the top level.
				1175	*/
				1176	if (!scsi_device_online(scmd->device)) {
				1177	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
				1178	" as SUCCESS\n",
				1179	__FUNCTION__));
				1180	return SUCCESS;
				1181	}
				1182
				1183	/*
				1184	* first check the host byte, to see if there is anything in there
				1185	* that would indicate what we need to do.
				1186	*/
				1187	switch (host_byte(scmd->result)) {
				1188	case DID_PASSTHROUGH:
				1189	/*
				1190	* no matter what, pass this through to the upper layer.
				1191	* nuke this special code so that it looks like we are saying
				1192	* did_ok.
				1193	*/
				1194	scmd->result &= 0xff00ffff;
				1195	return SUCCESS;
				1196	case DID_OK:
				1197	/*
				1198	* looks good. drop through, and check the next byte.
				1199	*/
				1200	break;
				1201	case DID_NO_CONNECT:
				1202	case DID_BAD_TARGET:
				1203	case DID_ABORT:
				1204	/*
				1205	* note - this means that we just report the status back
				1206	* to the top level driver, not that we actually think
				1207	* that it indicates SUCCESS.
				1208	*/
				1209	return SUCCESS;
				1210	/*
				1211	* when the low level driver returns did_soft_error,
				1212	* it is responsible for keeping an internal retry counter
				1213	* in order to avoid endless loops (db)
				1214	*
				1215	* actually this is a bug in this function here. we should
				1216	* be mindful of the maximum number of retries specified
				1217	* and not get stuck in a loop.
				1218	*/
				1219	case DID_SOFT_ERROR:
				1220	goto maybe_retry;
				1221	case DID_IMM_RETRY:
				1222	return NEEDS_RETRY;
				1223
	bf34191	2005-04-12 17:49:09 -0500	[diff] [blame]	1224	case DID_REQUEUE:
				1225	return ADD_TO_MLQUEUE;
				1226
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1227	case DID_ERROR:
				1228	if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
				1229	status_byte(scmd->result) == RESERVATION_CONFLICT)
				1230	/*
				1231	* execute reservation conflict processing code
				1232	* lower down
				1233	*/
				1234	break;
				1235	/* fallthrough */
				1236
				1237	case DID_BUS_BUSY:
				1238	case DID_PARITY:
				1239	goto maybe_retry;
				1240	case DID_TIME_OUT:
				1241	/*
				1242	* when we scan the bus, we get timeout messages for
				1243	* these commands if there is no device available.
				1244	* other hosts report did_no_connect for the same thing.
				1245	*/
				1246	if ((scmd->cmnd[0] == TEST_UNIT_READY \|\|
				1247	scmd->cmnd[0] == INQUIRY)) {
				1248	return SUCCESS;
				1249	} else {
				1250	return FAILED;
				1251	}
				1252	case DID_RESET:
				1253	return SUCCESS;
				1254	default:
				1255	return FAILED;
				1256	}
				1257
				1258	/*
				1259	* next, check the message byte.
				1260	*/
				1261	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				1262	return FAILED;
				1263
				1264	/*
				1265	* check the status byte to see if this indicates anything special.
				1266	*/
				1267	switch (status_byte(scmd->result)) {
				1268	case QUEUE_FULL:
				1269	/*
				1270	* the case of trying to send too many commands to a
				1271	* tagged queueing device.
				1272	*/
				1273	case BUSY:
				1274	/*
				1275	* device can't talk to us at the moment. Should only
				1276	* occur (SAM-3) when the task queue is empty, so will cause
				1277	* the empty queue handling to trigger a stall in the
				1278	* device.
				1279	*/
				1280	return ADD_TO_MLQUEUE;
				1281	case GOOD:
				1282	case COMMAND_TERMINATED:
				1283	case TASK_ABORTED:
				1284	return SUCCESS;
				1285	case CHECK_CONDITION:
				1286	rtn = scsi_check_sense(scmd);
				1287	if (rtn == NEEDS_RETRY)
				1288	goto maybe_retry;
				1289	/* if rtn == FAILED, we have no sense information;
				1290	* returning FAILED will wake the error handler thread
				1291	* to collect the sense and redo the decide
				1292	* disposition */
				1293	return rtn;
				1294	case CONDITION_GOOD:
				1295	case INTERMEDIATE_GOOD:
				1296	case INTERMEDIATE_C_GOOD:
				1297	case ACA_ACTIVE:
				1298	/*
				1299	* who knows? FIXME(eric)
				1300	*/
				1301	return SUCCESS;
				1302
				1303	case RESERVATION_CONFLICT:
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame]	1304	sdev_printk(KERN_INFO, scmd->device,
				1305	"reservation conflict\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1306	return SUCCESS; /* causes immediate i/o error */
				1307	default:
				1308	return FAILED;
				1309	}
				1310	return FAILED;
				1311
				1312	maybe_retry:
				1313
				1314	/* we requeue for retry because the error was retryable, and
				1315	* the request was not marked fast fail. Note that above,
				1316	* even if the request is marked fast fail, we still requeue
				1317	* for queue congestion conditions (QUEUE_FULL or BUSY) */
Brian King	8884efa	2006-02-24 17:10:04 -0600	[diff] [blame]	1318	if ((++scmd->retries) <= scmd->allowed
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1319	&& !blk_noretry_request(scmd->request)) {
				1320	return NEEDS_RETRY;
				1321	} else {
				1322	/*
				1323	* no more retries - report this one back to upper level.
				1324	*/
				1325	return SUCCESS;
				1326	}
				1327	}
				1328
				1329	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1330	* scsi_eh_lock_door - Prevent medium removal for the specified device
				1331	* @sdev: SCSI device to prevent medium removal
				1332	*
				1333	* Locking:
				1334	* We must be called from process context; scsi_allocate_request()
				1335	* may sleep.
				1336	*
				1337	* Notes:
				1338	* We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
				1339	* head of the devices request queue, and continue.
				1340	*
				1341	* Bugs:
				1342	* scsi_allocate_request() may sleep waiting for existing requests to
				1343	* be processed. However, since we haven't kicked off any request
				1344	* processing for this host, this may deadlock.
				1345	*
				1346	* If scsi_allocate_request() fails for what ever reason, we
				1347	* completely forget to lock the door.
				1348	**/
				1349	static void scsi_eh_lock_door(struct scsi_device *sdev)
				1350	{
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	1351	unsigned char cmnd[MAX_COMMAND_SIZE];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1352
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	1353	cmnd[0] = ALLOW_MEDIUM_REMOVAL;
				1354	cmnd[1] = 0;
				1355	cmnd[2] = 0;
				1356	cmnd[3] = 0;
				1357	cmnd[4] = SCSI_REMOVAL_PREVENT;
				1358	cmnd[5] = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1359
brking@us.ibm.com	bb1d107	2006-01-23 15:03:22 -0600	[diff] [blame]	1360	scsi_execute_async(sdev, cmnd, 6, DMA_NONE, NULL, 0, 0, 10 * HZ,
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	1361	5, NULL, NULL, GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1362	}
				1363
				1364
				1365	/**
				1366	* scsi_restart_operations - restart io operations to the specified host.
				1367	* @shost: Host we are restarting.
				1368	*
				1369	* Notes:
				1370	* When we entered the error handler, we blocked all further i/o to
				1371	* this device. we need to 'reverse' this process.
				1372	**/
				1373	static void scsi_restart_operations(struct Scsi_Host *shost)
				1374	{
				1375	struct scsi_device *sdev;
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	1376	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1377
				1378	/*
				1379	* If the door was locked, we need to insert a door lock request
				1380	* onto the head of the SCSI request queue for the device. There
				1381	* is no point trying to lock the door of an off-line device.
				1382	*/
				1383	shost_for_each_device(sdev, shost) {
				1384	if (scsi_device_online(sdev) && sdev->locked)
				1385	scsi_eh_lock_door(sdev);
				1386	}
				1387
				1388	/*
				1389	* next free up anything directly waiting upon the host. this
				1390	* will be requests for character device operations, and also for
				1391	* ioctls to queued block devices.
				1392	*/
				1393	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
				1394	__FUNCTION__));
				1395
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	1396	spin_lock_irqsave(shost->host_lock, flags);
				1397	if (scsi_host_set_state(shost, SHOST_RUNNING))
				1398	if (scsi_host_set_state(shost, SHOST_CANCEL))
				1399	BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
				1400	spin_unlock_irqrestore(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401
				1402	wake_up(&shost->host_wait);
				1403
				1404	/*
				1405	* finally we need to re-initiate requests that may be pending. we will
				1406	* have had everything blocked while error handling is taking place, and
				1407	* now that error recovery is done, we will need to ensure that these
				1408	* requests are started.
				1409	*/
				1410	scsi_run_host_queues(shost);
				1411	}
				1412
				1413	/**
				1414	* scsi_eh_ready_devs - check device ready state and recover if not.
				1415	* @shost: host to be recovered.
				1416	* @eh_done_q: list_head for processed commands.
				1417	*
				1418	**/
Darrick J. Wong	dca84e4	2007-01-26 14:08:49 -0800	[diff] [blame]	1419	void scsi_eh_ready_devs(struct Scsi_Host *shost,
				1420	struct list_head *work_q,
				1421	struct list_head *done_q)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1422	{
				1423	if (!scsi_eh_stu(shost, work_q, done_q))
				1424	if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
				1425	if (!scsi_eh_bus_reset(shost, work_q, done_q))
				1426	if (!scsi_eh_host_reset(work_q, done_q))
				1427	scsi_eh_offline_sdevs(work_q, done_q);
				1428	}
Darrick J. Wong	dca84e4	2007-01-26 14:08:49 -0800	[diff] [blame]	1429	EXPORT_SYMBOL_GPL(scsi_eh_ready_devs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1430
				1431	/**
				1432	* scsi_eh_flush_done_q - finish processed commands or retry them.
				1433	* @done_q: list_head of processed commands.
				1434	*
				1435	**/
Tejun Heo	041c5fc	2006-01-23 13:09:36 +0900	[diff] [blame]	1436	void scsi_eh_flush_done_q(struct list_head *done_q)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1437	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1438	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1439
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1440	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
				1441	list_del_init(&scmd->eh_entry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1442	if (scsi_device_online(scmd->device) &&
				1443	!blk_noretry_request(scmd->request) &&
Brian King	8884efa	2006-02-24 17:10:04 -0600	[diff] [blame]	1444	(++scmd->retries <= scmd->allowed)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1445	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
				1446	" retry cmd: %p\n",
				1447	current->comm,
				1448	scmd));
				1449	scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
				1450	} else {
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	1451	/*
				1452	* If just we got sense for the device (called
				1453	* scsi_eh_get_sense), scmd->result is already
				1454	* set, do not set DRIVER_TIMEOUT.
				1455	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1456	if (!scmd->result)
				1457	scmd->result \|= (DRIVER_TIMEOUT << 24);
				1458	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
				1459	" cmd: %p\n",
				1460	current->comm, scmd));
				1461	scsi_finish_command(scmd);
				1462	}
				1463	}
				1464	}
Tejun Heo	041c5fc	2006-01-23 13:09:36 +0900	[diff] [blame]	1465	EXPORT_SYMBOL(scsi_eh_flush_done_q);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1466
				1467	/**
				1468	* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
				1469	* @shost: Host to unjam.
				1470	*
				1471	* Notes:
				1472	* When we come in here, we know that all commands on the bus have
				1473	* either completed, failed or timed out. we also know that no further
				1474	* commands are being sent to the host, so things are relatively quiet
				1475	* and we have freedom to fiddle with things as we wish.
				1476	*
				1477	* This is only the default implementation. it is possible for
				1478	* individual drivers to supply their own version of this function, and
				1479	* if the maintainer wishes to do this, it is strongly suggested that
				1480	* this function be taken as a template and modified. this function
				1481	* was designed to correctly handle problems for about 95% of the
				1482	* different cases out there, and it should always provide at least a
				1483	* reasonable amount of error recovery.
				1484	*
				1485	* Any command marked 'failed' or 'timeout' must eventually have
				1486	* scsi_finish_cmd() called for it. we do all of the retry stuff
				1487	* here, so when we restart the host after we return it should have an
				1488	* empty queue.
				1489	**/
				1490	static void scsi_unjam_host(struct Scsi_Host *shost)
				1491	{
				1492	unsigned long flags;
				1493	LIST_HEAD(eh_work_q);
				1494	LIST_HEAD(eh_done_q);
				1495
				1496	spin_lock_irqsave(shost->host_lock, flags);
				1497	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
				1498	spin_unlock_irqrestore(shost->host_lock, flags);
				1499
				1500	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
				1501
				1502	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
				1503	if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
				1504	scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
				1505
				1506	scsi_eh_flush_done_q(&eh_done_q);
				1507	}
				1508
				1509	/**
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1510	* scsi_error_handler - SCSI error handler thread
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1511	* @data: Host for which we are running.
				1512	*
				1513	* Notes:
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1514	* This is the main error handling loop. This is run as a kernel thread
				1515	* for every SCSI host and handles all error handling activity.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1516	**/
				1517	int scsi_error_handler(void *data)
				1518	{
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1519	struct Scsi_Host *shost = data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1520
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1521	/*
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1522	* We use TASK_INTERRUPTIBLE so that the thread is not
				1523	* counted against the load average as a running process.
				1524	* We never actually get interrupted because kthread_run
				1525	* disables singal delivery for the created thread.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1526	*/
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1527	set_current_state(TASK_INTERRUPTIBLE);
				1528	while (!kthread_should_stop()) {
Tejun Heo	ee7863b	2006-05-15 20:57:20 +0900	[diff] [blame]	1529	if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) \|\|
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1530	shost->host_failed != shost->host_busy) {
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1531	SCSI_LOG_ERROR_RECOVERY(1,
				1532	printk("Error handler scsi_eh_%d sleeping\n",
				1533	shost->host_no));
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1534	schedule();
				1535	set_current_state(TASK_INTERRUPTIBLE);
				1536	continue;
				1537	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1538
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1539	__set_current_state(TASK_RUNNING);
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1540	SCSI_LOG_ERROR_RECOVERY(1,
				1541	printk("Error handler scsi_eh_%d waking up\n",
				1542	shost->host_no));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1543
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1544	/*
				1545	* We have a host that is failing for some reason. Figure out
				1546	* what we need to do to get it up and online again (if we can).
				1547	* If we fail, we end up taking the thing offline.
				1548	*/
Christoph Hellwig	9227c33	2006-04-01 19:21:04 +0200	[diff] [blame]	1549	if (shost->transportt->eh_strategy_handler)
				1550	shost->transportt->eh_strategy_handler(shost);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1551	else
				1552	scsi_unjam_host(shost);
				1553
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1554	/*
				1555	* Note - if the above fails completely, the action is to take
				1556	* individual devices offline and flush the queue of any
				1557	* outstanding requests that may have been pending. When we
				1558	* restart, we restart any I/O to any other devices on the bus
				1559	* which are still online.
				1560	*/
				1561	scsi_restart_operations(shost);
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1562	set_current_state(TASK_INTERRUPTIBLE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1563	}
Steven Rostedt	461a0ff	2005-10-19 08:22:13 -0400	[diff] [blame]	1564	__set_current_state(TASK_RUNNING);
				1565
Christoph Hellwig	ad42eb1	2005-10-29 01:01:55 +0200	[diff] [blame]	1566	SCSI_LOG_ERROR_RECOVERY(1,
				1567	printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1568	shost->ehandler = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1569	return 0;
				1570	}
				1571
				1572	/*
				1573	* Function: scsi_report_bus_reset()
				1574	*
				1575	* Purpose: Utility function used by low-level drivers to report that
				1576	* they have observed a bus reset on the bus being handled.
				1577	*
				1578	* Arguments: shost - Host in question
				1579	* channel - channel on which reset was observed.
				1580	*
				1581	* Returns: Nothing
				1582	*
				1583	* Lock status: Host lock must be held.
				1584	*
				1585	* Notes: This only needs to be called if the reset is one which
				1586	* originates from an unknown location. Resets originated
				1587	* by the mid-level itself don't need to call this, but there
				1588	* should be no harm.
				1589	*
				1590	* The main purpose of this is to make sure that a CHECK_CONDITION
				1591	* is properly treated.
				1592	*/
				1593	void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
				1594	{
				1595	struct scsi_device *sdev;
				1596
				1597	__shost_for_each_device(sdev, shost) {
Jeff Garzik	422c0d6	2005-10-24 18:05:09 -0400	[diff] [blame]	1598	if (channel == sdev_channel(sdev)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1599	sdev->was_reset = 1;
				1600	sdev->expecting_cc_ua = 1;
				1601	}
				1602	}
				1603	}
				1604	EXPORT_SYMBOL(scsi_report_bus_reset);
				1605
				1606	/*
				1607	* Function: scsi_report_device_reset()
				1608	*
				1609	* Purpose: Utility function used by low-level drivers to report that
				1610	* they have observed a device reset on the device being handled.
				1611	*
				1612	* Arguments: shost - Host in question
				1613	* channel - channel on which reset was observed
				1614	* target - target on which reset was observed
				1615	*
				1616	* Returns: Nothing
				1617	*
				1618	* Lock status: Host lock must be held
				1619	*
				1620	* Notes: This only needs to be called if the reset is one which
				1621	* originates from an unknown location. Resets originated
				1622	* by the mid-level itself don't need to call this, but there
				1623	* should be no harm.
				1624	*
				1625	* The main purpose of this is to make sure that a CHECK_CONDITION
				1626	* is properly treated.
				1627	*/
				1628	void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
				1629	{
				1630	struct scsi_device *sdev;
				1631
				1632	__shost_for_each_device(sdev, shost) {
Jeff Garzik	422c0d6	2005-10-24 18:05:09 -0400	[diff] [blame]	1633	if (channel == sdev_channel(sdev) &&
				1634	target == sdev_id(sdev)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1635	sdev->was_reset = 1;
				1636	sdev->expecting_cc_ua = 1;
				1637	}
				1638	}
				1639	}
				1640	EXPORT_SYMBOL(scsi_report_device_reset);
				1641
				1642	static void
				1643	scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
				1644	{
				1645	}
				1646
				1647	/*
				1648	* Function: scsi_reset_provider
				1649	*
				1650	* Purpose: Send requested reset to a bus or device at any phase.
				1651	*
				1652	* Arguments: device - device to send reset to
				1653	* flag - reset type (see scsi.h)
				1654	*
				1655	* Returns: SUCCESS/FAILURE.
				1656	*
				1657	* Notes: This is used by the SCSI Generic driver to provide
				1658	* Bus/Device reset capability.
				1659	*/
				1660	int
				1661	scsi_reset_provider(struct scsi_device *dev, int flag)
				1662	{
				1663	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
James Smart	d7a1bb0	2006-03-08 14:50:12 -0500	[diff] [blame]	1664	struct Scsi_Host *shost = dev->host;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1665	struct request req;
James Smart	d7a1bb0	2006-03-08 14:50:12 -0500	[diff] [blame]	1666	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1667	int rtn;
				1668
				1669	scmd->request = &req;
				1670	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
Christoph Hellwig	b4edcbc	2005-06-19 13:40:52 +0200	[diff] [blame]	1671
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1672	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
				1673
				1674	scmd->scsi_done = scsi_reset_provider_done_command;
				1675	scmd->done = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1676	scmd->request_buffer = NULL;
				1677	scmd->request_bufflen = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1678
				1679	scmd->cmd_len = 0;
				1680
				1681	scmd->sc_data_direction = DMA_BIDIRECTIONAL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1682
				1683	init_timer(&scmd->eh_timeout);
				1684
				1685	/*
				1686	* Sometimes the command can get back into the timer chain,
				1687	* so use the pid as an identifier.
				1688	*/
				1689	scmd->pid = 0;
				1690
James Smart	d7a1bb0	2006-03-08 14:50:12 -0500	[diff] [blame]	1691	spin_lock_irqsave(shost->host_lock, flags);
				1692	shost->tmf_in_progress = 1;
				1693	spin_unlock_irqrestore(shost->host_lock, flags);
				1694
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1695	switch (flag) {
				1696	case SCSI_TRY_RESET_DEVICE:
				1697	rtn = scsi_try_bus_device_reset(scmd);
				1698	if (rtn == SUCCESS)
				1699	break;
				1700	/* FALLTHROUGH */
				1701	case SCSI_TRY_RESET_BUS:
				1702	rtn = scsi_try_bus_reset(scmd);
				1703	if (rtn == SUCCESS)
				1704	break;
				1705	/* FALLTHROUGH */
				1706	case SCSI_TRY_RESET_HOST:
				1707	rtn = scsi_try_host_reset(scmd);
				1708	break;
				1709	default:
				1710	rtn = FAILED;
				1711	}
				1712
James Smart	d7a1bb0	2006-03-08 14:50:12 -0500	[diff] [blame]	1713	spin_lock_irqsave(shost->host_lock, flags);
				1714	shost->tmf_in_progress = 0;
				1715	spin_unlock_irqrestore(shost->host_lock, flags);
				1716
				1717	/*
				1718	* be sure to wake up anyone who was sleeping or had their queue
				1719	* suspended while we performed the TMF.
				1720	*/
				1721	SCSI_LOG_ERROR_RECOVERY(3,
				1722	printk("%s: waking up host to restart after TMF\n",
				1723	__FUNCTION__));
				1724
				1725	wake_up(&shost->host_wait);
				1726
				1727	scsi_run_host_queues(shost);
				1728
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1729	scsi_next_command(scmd);
				1730	return rtn;
				1731	}
				1732	EXPORT_SYMBOL(scsi_reset_provider);
				1733
				1734	/**
				1735	* scsi_normalize_sense - normalize main elements from either fixed or
				1736	* descriptor sense data format into a common format.
				1737	*
				1738	* @sense_buffer: byte array containing sense data returned by device
				1739	* @sb_len: number of valid bytes in sense_buffer
				1740	* @sshdr: pointer to instance of structure that common
				1741	* elements are written to.
				1742	*
				1743	* Notes:
				1744	* The "main elements" from sense data are: response_code, sense_key,
				1745	* asc, ascq and additional_length (only for descriptor format).
				1746	*
				1747	* Typically this function can be called after a device has
				1748	* responded to a SCSI command with the CHECK_CONDITION status.
				1749	*
				1750	* Return value:
				1751	* 1 if valid sense data information found, else 0;
				1752	**/
				1753	int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
				1754	struct scsi_sense_hdr *sshdr)
				1755	{
James Bottomley	33aa687	2005-08-28 11:31:14 -0500	[diff] [blame]	1756	if (!sense_buffer \|\| !sb_len)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1757	return 0;
				1758
				1759	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
				1760
				1761	sshdr->response_code = (sense_buffer[0] & 0x7f);
James Bottomley	33aa687	2005-08-28 11:31:14 -0500	[diff] [blame]	1762
				1763	if (!scsi_sense_valid(sshdr))
				1764	return 0;
				1765
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1766	if (sshdr->response_code >= 0x72) {
				1767	/*
				1768	* descriptor format
				1769	*/
				1770	if (sb_len > 1)
				1771	sshdr->sense_key = (sense_buffer[1] & 0xf);
				1772	if (sb_len > 2)
				1773	sshdr->asc = sense_buffer[2];
				1774	if (sb_len > 3)
				1775	sshdr->ascq = sense_buffer[3];
				1776	if (sb_len > 7)
				1777	sshdr->additional_length = sense_buffer[7];
				1778	} else {
				1779	/*
				1780	* fixed format
				1781	*/
				1782	if (sb_len > 2)
				1783	sshdr->sense_key = (sense_buffer[2] & 0xf);
				1784	if (sb_len > 7) {
				1785	sb_len = (sb_len < (sense_buffer[7] + 8)) ?
				1786	sb_len : (sense_buffer[7] + 8);
				1787	if (sb_len > 12)
				1788	sshdr->asc = sense_buffer[12];
				1789	if (sb_len > 13)
				1790	sshdr->ascq = sense_buffer[13];
				1791	}
				1792	}
				1793
				1794	return 1;
				1795	}
				1796	EXPORT_SYMBOL(scsi_normalize_sense);
				1797
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1798	int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
				1799	struct scsi_sense_hdr *sshdr)
				1800	{
				1801	return scsi_normalize_sense(cmd->sense_buffer,
				1802	sizeof(cmd->sense_buffer), sshdr);
				1803	}
				1804	EXPORT_SYMBOL(scsi_command_normalize_sense);
				1805
				1806	/**
				1807	* scsi_sense_desc_find - search for a given descriptor type in
				1808	* descriptor sense data format.
				1809	*
				1810	* @sense_buffer: byte array of descriptor format sense data
				1811	* @sb_len: number of valid bytes in sense_buffer
				1812	* @desc_type: value of descriptor type to find
				1813	* (e.g. 0 -> information)
				1814	*
				1815	* Notes:
				1816	* only valid when sense data is in descriptor format
				1817	*
				1818	* Return value:
				1819	* pointer to start of (first) descriptor if found else NULL
				1820	**/
				1821	const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
				1822	int desc_type)
				1823	{
				1824	int add_sen_len, add_len, desc_len, k;
				1825	const u8 * descp;
				1826
				1827	if ((sb_len < 8) \|\| (0 == (add_sen_len = sense_buffer[7])))
				1828	return NULL;
				1829	if ((sense_buffer[0] < 0x72) \|\| (sense_buffer[0] > 0x73))
				1830	return NULL;
				1831	add_sen_len = (add_sen_len < (sb_len - 8)) ?
				1832	add_sen_len : (sb_len - 8);
				1833	descp = &sense_buffer[8];
				1834	for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
				1835	descp += desc_len;
				1836	add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
				1837	desc_len = add_len + 2;
				1838	if (descp[0] == desc_type)
				1839	return descp;
				1840	if (add_len < 0) // short descriptor ??
				1841	break;
				1842	}
				1843	return NULL;
				1844	}
				1845	EXPORT_SYMBOL(scsi_sense_desc_find);
				1846
				1847	/**
				1848	* scsi_get_sense_info_fld - attempts to get information field from
				1849	* sense data (either fixed or descriptor format)
				1850	*
				1851	* @sense_buffer: byte array of sense data
				1852	* @sb_len: number of valid bytes in sense_buffer
				1853	* @info_out: pointer to 64 integer where 8 or 4 byte information
				1854	* field will be placed if found.
				1855	*
				1856	* Return value:
				1857	* 1 if information field found, 0 if not found.
				1858	**/
				1859	int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
				1860	u64 * info_out)
				1861	{
				1862	int j;
				1863	const u8 * ucp;
				1864	u64 ull;
				1865
				1866	if (sb_len < 7)
				1867	return 0;
				1868	switch (sense_buffer[0] & 0x7f) {
				1869	case 0x70:
				1870	case 0x71:
				1871	if (sense_buffer[0] & 0x80) {
				1872	*info_out = (sense_buffer[3] << 24) +
				1873	(sense_buffer[4] << 16) +
				1874	(sense_buffer[5] << 8) + sense_buffer[6];
				1875	return 1;
				1876	} else
				1877	return 0;
				1878	case 0x72:
				1879	case 0x73:
				1880	ucp = scsi_sense_desc_find(sense_buffer, sb_len,
				1881	0 /* info desc */);
				1882	if (ucp && (0xa == ucp[1])) {
				1883	ull = 0;
				1884	for (j = 0; j < 8; ++j) {
				1885	if (j > 0)
				1886	ull <<= 8;
				1887	ull \|= ucp[4 + j];
				1888	}
				1889	*info_out = ull;
				1890	return 1;
				1891	} else
				1892	return 0;
				1893	default:
				1894	return 0;
				1895	}
				1896	}
				1897	EXPORT_SYMBOL(scsi_get_sense_info_fld);